Notes by Jinyu Du
Mar.6.2022
Link to the course is here
Course author: Terezija Semenski
Visualization libraries for Python: matplotlib, seaborn, bokeh, ggplot2, altair.
%matplotlib notebook
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
average_monthly_temperatures = [39.1, 40.1, 48.0, 50.4, 60.3, 73.7, 80.0, 76.9, 68.8, 57.9, 53.0, 39.2]
months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
fig = plt.figure()
plt.plot(months,average_monthly_temperatures)
plt.title("Average monthly temperatures")
plt.xlabel("months")
plt.ylabel("temperature")
plt.show() # this prints out just the plot
fig.savefig('average_monthly_temperatures.png')
fig.savefig('average_monthly_temperatures.pdf')
!ls -lh average_monthly_temperatures.png
-rw-r--r-- 1 jinyudu staff 16K Mar 6 08:53 average_monthly_temperatures.png
!ls -lh average_monthly_temperatures.pdf
-rw-r--r-- 1 jinyudu staff 13K Mar 6 08:53 average_monthly_temperatures.pdf
x = np.arange(3)
plt.plot(x,x)
plt.plot(x,2*x)
plt.plot(x,3*x)
plt.grid(True)
plt.show()
x = np.linspace(0,5,5)
y=2*x
plt.plot(x,y)
plt.show()
The object oriented method to create a plot.
fig = plt.figure() # create a figure object
axes = fig.add_axes([0.1,0.1,0.8,0.8])
axes.plot(x,y)
plt.show()
fig=plt.figure()
x=np.arange(3)
y=2*x
plt.subplot(2,2,1)
plt.plot(x,y,'b')
plt.subplot(2,2,2)
plt.plot(x,1-y,'r')
plt.subplot(2,2,3)
plt.plot(x,2-y,'g')
plt.subplot(2,2,4)
plt.plot(x,y,'y')
plt.show()
Another elegant way to create subplots.
fig, axs = plt.subplots(2, 2, figsize=(6,6))
axs[0, 0].plot(x, y, 'b')
axs[0, 1].plot(x, 1-y, 'r')
axs[1, 0].plot(x, 2-y, 'g')
axs[1, 1].plot(x, y, 'y')
plt.show()
x = np.linspace(1,10)
first_line = plt.plot(x, x+1, label= 'y=x+1')
plt.legend(); # create legend with the label
second_line, = plt.plot(x,x+2,linestyle='solid')
second_line.set_label('y=x+2')
third_line, = plt.plot(x,x+3,linestyle='dashed')
third_line.set_label('y=x+3')
plt.legend();
first_plot,=plt.plot([1,2,3],label='first plot')
second_plot,=plt.plot([3,2,1],label='second plot')
third_plot,=plt.plot([2,2,2],label='third plot')
plt.legend(bbox_to_anchor=(1.02, 1.0), borderaxespad=0);
plt.figure(dpi=720) # dpi is for resolution of the figure
first_student_books=[2,4,7,3,1,5,1,0,2,3,6,4]
second_student_books=[0,5,3,1,6,4,1,1,3,4,3,2]
first_line=plt.plot(range(1,13),first_student_books)
second_line=plt.plot(range(1,13),second_student_books)
plt.xlabel('months')
plt.ylabel('books read')
plt.legend(['books first student','books second student'],loc=1)
plt.title('Books read by students')
plt.show()
first_figure = plt.figure()
x = np.linspace(1, 10)
y = np.linspace(1, 10)
ax=first_figure.add_axes([0,0,1,1])
ax.plot(x,y, color='red');
second_figure = plt.figure()
ax=second_figure.add_axes([0,0,1,1])
ax.plot(x,y, color='g');
third_figure = plt.figure()
ax=third_figure.add_axes([0,0,1,1])
ax.plot(x,y, color='#FF00FF');
# change line style
plt.plot(x,2*x,linestyle='solid')
plt.plot(x,3*x,linestyle='dashed')
plt.plot(x,4*x,linestyle='dashdot')
plt.plot(x,5*x,linestyle='dotted');
plt.plot(x,2*x,linestyle='-')
plt.plot(x,3*x,linestyle='--')
plt.plot(x,4*x,linestyle='-.')
plt.plot(x,5*x,linestyle=':');
# specify line style and color
plt.plot(x, 3*x ,'-.g');
Three types of nonlinear scales:
logarithmic scale. The most used nonlinear scales. This is used for a series of values where each value equals the previous value multiplied by a constant.
symmetrical logarithmic scale. This is used for representing non-positive numbers.
logit scale
x = np.linspace(1, 10, 1024)
plt.xscale('log')
plt.yscale('log')
plt.plot(x, x, label ='$f(x)=x$')
plt.plot(x, 10**x, label ='$f(x)=10^x$')
plt.plot(x, np.log(x),label ='$f(x)=log(x)$')
plt.legend()
plt.show()
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
x = np.arange(0.0, 50.0, 0.1)
y = x**2
fig, ax = plt.subplots()
ax.plot(x,y)
ax.xaxis.set_major_locator(MultipleLocator(10))
ax.xaxis.set_major_formatter('{x:.0f}')
ax.xaxis.set_minor_locator(MultipleLocator(2))
plt.show()
x = np.arange(0.0, 50.0, 0.1)
y = x**2
fig, ax = plt.subplots()
ax.plot(x,y)
ax.set_xlim([0, 50])
ax.set_ylim([0, 2500])
plt.show()
x = np.arange(0.0, 50.0, 0.1)
y = x**2
fig, ax = plt.subplots()
ax.plot(x,y)
ax.set_xticks([0,5,10,15,20,25,30,35,40,45,50])
ax.set_yticks([0,250,500,750,1000,1250,1500,1750,2000,2250,2500])
plt.show()
x = np.linspace(0, 10)
y1 = x
y2 = 8-x
fig, ax = plt.subplots()
plt.plot(x,y1,label='supply')
plt.plot(x,y2,label='demand')
ax.annotate("Equilibrium", xy=(4,4), xytext=(3,2), \
fontsize=12, fontweight='semibold',\
arrowprops=dict(linewidth=2, arrowstyle="->"))
plt.xlabel('quantity',fontsize=12)
plt.ylabel('price',fontsize=12)
plt.legend()
plt.show()
x = np.linspace(0, 10)
y1 = x
y2 = 8-x
# Plot the data
fig, ax = plt.subplots()
plt.plot(x,y1,label='supply')
plt.plot(x,y2,label='demand')
# Annotate the equilibrium point with arrow and text
bbox_props = dict(boxstyle="rarrow", fc=(0.8, 0.9, 0.9), lw=2)
t = ax.text(2,4, "equilibrium", ha="center", va="center", rotation=0,
size=10,bbox=bbox_props)
# Label the axes
plt.xlabel('quantity',fontsize=12)
plt.ylabel('price',fontsize=12)
plt.legend()
plt.show()
from matplotlib.patches import Circle, Polygon
from matplotlib.collections import PatchCollection
fig, ax = plt.subplots()
patches = []
# draw circle and triangle
circle = Circle((.42,.75),0.12)
triangle = Polygon([[.1,.5],[.2,.7],[.3,.54]], True)
patches += [circle,triangle]
# Draw the patches
colors = 100*np.random.rand(len(patches)) # set random colors
p = PatchCollection(patches)
p.set_array(np.array(colors))
ax.add_collection(p)
# Show the figure
plt.show()
preferred_workoption = [10.7, 47.6, 38.8, 2.9]
colors = ['b', 'g', 'r', 'c']
labels = ['Collocated', 'Hybrid', 'Fully remote', 'Not applicable']
explode = (0, 0.2, 0, 0)
plt.pie(preferred_workoption, colors=colors, labels=labels,
explode=explode, autopct='%1.1f%%',
counterclock=False, shadow=True)
plt.title('Preferred workoption')
plt.show()
preferred_workoption = [10.7, 47.6, 38.8, 2.9]
colors = ['b', 'g', 'r', 'c']
labels = ['Collocated', 'Hybrid', 'Fully remote', 'Not applicable']
widths= [0.7, 0.7, 0.7, 0.7]
plt.bar(range(0, 4), preferred_workoption, width=widths, color=colors, align='center')
plt.title('Preferred workoption')
plt.show()
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook
X = np.random.randn(10000)
plt.hist(X, bins = 20)
plt.show();
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
theta = np.linspace(-3 * np.pi, 3 * np.pi, 200)
z = np.linspace(-3, 3, 200)
r = z**3 + 1
x = r * np.sin(theta)
y = r * np.cos(theta)
ax.plot(x, y, z, label='Parametric Curve')
ax.legend()
plt.show()
from __future__ import print_function
numbers=np.arange(1,11)
numbers
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
np.sin(numbers)
array([ 0.84147098, 0.90929743, 0.14112001, -0.7568025 , -0.95892427, -0.2794155 , 0.6569866 , 0.98935825, 0.41211849, -0.54402111])
np.log(numbers)
array([0. , 0.69314718, 1.09861229, 1.38629436, 1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509])
# creating numpy array
integers = np.arange(1, 101)
print("integers :", *integers)
# creating own function
def modulo(val):
return (val % 10)
# adding into numpy
mod_10=np.frompyfunc(modulo, 1, 1)
# using function over numpy array
mod_integers=mod_10(integers)
print("mod_integers :", *mod_integers)
integers : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 mod_integers : 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0
numbers = np.arange(10, dtype = np.int8)
numbers
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int8)
numbers.strides
(1,)
numbers.shape = 2,5
numbers
array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int8)
numbers.strides
(5, 1)
first_array = np.zeros((100000,))
first_array
array([0., 0., 0., ..., 0., 0., 0.])
second_array = np.zeros((100000 * 100, ))[::100]
second_array
array([0., 0., 0., ..., 0., 0., 0.])
first_array.shape
(100000,)
second_array.shape
(100000,)
# each element has 8 bytes apart from the next element
first_array.strides
(8,)
# each element has 800 bytes apart from the next element
second_array.strides
(800,)
%timeit first_array.sum()
20.7 µs ± 59.8 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit second_array.sum()
176 µs ± 9.65 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
student_records = np.array([('Lazaro','Oneal', '0526993', 2009, 2.33), ('Dorie','Salinas', '0710325', 2006, 2.26), ('Mathilde','Hooper', '0496813', 2000, 2.56),('Nell','Gomez', '0740631', 2003, 2.22),('Lachelle','Jordan', '0490888', 2003, 2.13),('Claud','Waller', '0922492', 2004, 3.60),('Bob','Steele', '0264843', 2002, 2.79),('Zelma','Welch', '0885463', 2007, 3.69)],
dtype=[('name', (np.str_, 10)),('surname', (np.str_, 10)), ('id', (np.str_,7)),('graduation_year', np.int32), ('gpa', np.float64)])
student_records
array([('Lazaro', 'Oneal', '0526993', 2009, 2.33), ('Dorie', 'Salinas', '0710325', 2006, 2.26), ('Mathilde', 'Hooper', '0496813', 2000, 2.56), ('Nell', 'Gomez', '0740631', 2003, 2.22), ('Lachelle', 'Jordan', '0490888', 2003, 2.13), ('Claud', 'Waller', '0922492', 2004, 3.6 ), ('Bob', 'Steele', '0264843', 2002, 2.79), ('Zelma', 'Welch', '0885463', 2007, 3.69)], dtype=[('name', '<U10'), ('surname', '<U10'), ('id', '<U7'), ('graduation_year', '<i4'), ('gpa', '<f8')])
student_records[['id','graduation_year']]
array([('0526993', 2009), ('0710325', 2006), ('0496813', 2000), ('0740631', 2003), ('0490888', 2003), ('0922492', 2004), ('0264843', 2002), ('0885463', 2007)], dtype={'names':['id','graduation_year'], 'formats':['<U7','<i4'], 'offsets':[80,108], 'itemsize':120})
students_sorted_by_surname = np.sort(student_records, order='surname')
print('Students sorted according to the surname :\n', students_sorted_by_surname)
Students sorted according to the surname : [('Nell', 'Gomez', '0740631', 2003, 2.22) ('Mathilde', 'Hooper', '0496813', 2000, 2.56) ('Lachelle', 'Jordan', '0490888', 2003, 2.13) ('Lazaro', 'Oneal', '0526993', 2009, 2.33) ('Dorie', 'Salinas', '0710325', 2006, 2.26) ('Bob', 'Steele', '0264843', 2002, 2.79) ('Claud', 'Waller', '0922492', 2004, 3.6 ) ('Zelma', 'Welch', '0885463', 2007, 3.69)]
students_sorted_by_grad_year = np.sort(student_records, order='graduation_year')
print('Students sorted according to the graduation year :\n', students_sorted_by_grad_year)
Students sorted according to the graduation year : [('Mathilde', 'Hooper', '0496813', 2000, 2.56) ('Bob', 'Steele', '0264843', 2002, 2.79) ('Lachelle', 'Jordan', '0490888', 2003, 2.13) ('Nell', 'Gomez', '0740631', 2003, 2.22) ('Claud', 'Waller', '0922492', 2004, 3.6 ) ('Dorie', 'Salinas', '0710325', 2006, 2.26) ('Zelma', 'Welch', '0885463', 2007, 3.69) ('Lazaro', 'Oneal', '0526993', 2009, 2.33)]
np.datetime64('2022-03-01')
numpy.datetime64('2022-03-01')
np.datetime64('2022-03')
numpy.datetime64('2022-03')
print('Number of weekdays in 2022:')
print(np.busday_count('2022','2023'))
Number of weekdays in 2022: 260
print('Number of weekdays in June 2022:')
np.busday_count('2022-06', '2022-07')
Number of weekdays in June 2022:
22
np.is_busday(np.datetime64('2022-06-05'))
False
first_array = np.arange(16).reshape(4,4)
first_array
array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15]])
first_matrix = np.matrix(first_array)
first_matrix
matrix([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15]])
second_matrix = np.matrix(np.identity(4))
second_matrix
matrix([[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [0., 0., 0., 1.]])
matrix_a=np.random.randint(5,size=(2,3))
matrix_a
array([[2, 4, 1], [2, 3, 0]])
matrix_b=np.random.randint(5,size=(3,2))
matrix_b
array([[1, 0], [1, 1], [0, 2]])
np.matmul(matrix_a,matrix_b)
array([[6, 6], [5, 3]])
matrix_c=np.matrix("0 1 2;1 0 3;4 -3 8")
matrix_c
matrix([[ 0, 1, 2], [ 1, 0, 3], [ 4, -3, 8]])
inverse = np.linalg.inv(matrix_c)
inverse
matrix([[-4.5, 7. , -1.5], [-2. , 4. , -1. ], [ 1.5, -2. , 0.5]])
print(matrix_c*inverse)
[[1. 0. 0.] [0. 1. 0.] [0. 0. 1.]]
A =np.mat("1 -2 1;0 2 -8;-4 5 9")
A
matrix([[ 1, -2, 1], [ 0, 2, -8], [-4, 5, 9]])
b = np.array([0, 16, -18])
b
array([ 0, 16, -18])
x = np.linalg.solve(A, b)
print("Solution", x)
Solution [58. 32. 6.]
Matrix Decomposition techniques:
first_matrix=np.matrix([[4,8],[10,14]])
print("Matrix:\n",first_matrix)
Matrix: [[ 4 8] [10 14]]
eigenvalues, eigenvectors = np.linalg.eig(first_matrix)
print("Eigenvalues:", eigenvalues)
print("Eigenvectors:", eigenvectors)
Eigenvalues: [-1.24695077 19.24695077] Eigenvectors: [[-0.83619408 -0.46462222] [ 0.54843365 -0.885509 ]]
eigenvalues= np.linalg.eigvals(first_matrix)
print("Eigenvalues:", eigenvalues)
Eigenvalues: [-1.24695077 19.24695077]
A = np.mat("3 1 4;1 5 9;2 6 5")
print("A\n", A)
U, Sigma, V = np.linalg.svd(A, full_matrices=False)
print("U: ",U)
print("Sigma : ",Sigma)
print("V : ", V)
A [[3 1 4] [1 5 9] [2 6 5]] U: [[-0.32463251 0.79898436 0.50619929] [-0.75307473 0.1054674 -0.64942672] [-0.57226932 -0.59203093 0.56745679]] Sigma : [13.58235799 2.84547726 2.32869289] V : [[-0.21141476 -0.55392606 -0.80527617] [ 0.46331722 -0.78224635 0.41644663] [ 0.86060499 0.28505536 -0.42202191]]
print("Product\n", U * np.diag(Sigma) * V)
Product [[3. 1. 4.] [1. 5. 9.] [2. 6. 5.]]
A
matrix([[3, 1, 4], [1, 5, 9], [2, 6, 5]])
b = np.array([1,2,3]).reshape(3,1)
q, r = np.linalg.qr(A)
x = np.dot(np.linalg.inv(r), np.dot(q.T, b))
x
matrix([[ 0.26666667], [ 0.46666667], [-0.06666667]])
np.linalg.solve(A,b)
array([[ 0.26666667], [ 0.46666667], [-0.06666667]])
from numpy.polynomial import polynomial
first_polynomial = np.polynomial.Polynomial([2, -3, 1])
first_polynomial
second_polynomial = np.polynomial.Polynomial.fromroots([1, 2])
second_polynomial
first_polynomial.roots()
array([1., 2.])
second_polynomial.roots()
array([1., 2.])
np.polyval([5,4,3,2,1], 1)
15
third_polynomial = np.polynomial.Polynomial([1,2,3,4,5])
third_polynomial
integral=third_polynomial.integ()
integral
integral.deriv()
derivative=third_polynomial.deriv()
derivative
year = np.arange(1,11)
price = np.array([129000, 133000, 138000, 144000, 142000, 141000, 150000, 135000, 134000, 137000])
year
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
a, b, c = np.polyfit(year, price, 2)
print ("a:",a)
print ("b:",b)
print ("c:",c)
a: -594.696969696968 b: 7032.575757575749 c: 122516.66666666664
print("Estimated price for 2022:",a*11**2 + b*11 + c )
Estimated price for 2022: 127916.66666666674
# plt.clf()
plt.figure()
plt.plot(year,price)
plt.scatter(year, price)
plt.scatter(11, a*11**2 + b*11 + c )
plt.title('Linear regression')
plt.xlabel('year')
plt.ylabel('average house price')
Text(0, 0.5, 'average house price')