One of the reasons NumPy is so important for numerical computations in Python is because it is designed for efficiency on large arrays of data.
import numpy as np
quiz_1 = np.array([2, 5, 4, 6, 8, 3, 3])
quiz_1
array([2, 5, 4, 6, 8, 3, 3])
quiz_1 = quiz_1 + 2
quiz_1
array([ 4, 7, 6, 8, 10, 5, 5])
quiz_2 = np.array([5, 7, 7, 4, 9, 5, 8])
quiz_1 + quiz_2
array([ 9, 14, 13, 12, 19, 10, 13])
data = np.random.randn(2, 3)
data
array([[-0.43402531, -0.17128507, -0.33244723], [-1.33995114, -0.41079432, -1.21245102]])
data.shape
(2, 3)
data.dtype
dtype('float64')
data.ndim
2
np.zeros(10)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.ones(14)
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
np.empty((2,3,4))
array([[[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]], [[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]]])
np.arange(15)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
Review more functions in the book page 160
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr.dtype
dtype('float64')
arr_int = arr.astype(int)
print(arr_int)
arr_int
[ 3 -1 -2 0 12 10]
array([ 3, -1, -2, 0, 12, 10])
# Cast from string
numeric_strings = np.array(['1.25', '-9.6', '42'])
numeric_strings.astype(float)
array([ 1.25, -9.6 , 42. ])
arr = np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[3:5]
array([3, 4])
It is different from Python’s built-in lists is that array slices are views on the original array
arr_slice = arr[5:8]
arr_slice
array([5, 6, 7])
arr_slice[0] = -5
print(arr_slice)
print(arr)
[-5 6 7] [ 0 1 2 3 4 -5 6 7 8 9]
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[1][2]
6
arr3d = np.array([[[ 0, 1],
[2, 3]],
[[ 4, 5],
[ 6, 7]],
[[ 8, 9],
[10, 11]],
[[12, 13],
[14, 15]]])
arr3d.shape
(4, 2, 2)
arr3d[1,0] #return all values whose indices start with (1, 0)
array([4, 5])
arr3d[:2]
array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]])
arr3d[:2, :1]
array([[[0, 1]], [[4, 5]]])
arr = np.arange(15)
arr
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
arr.reshape(5,3)
array([[ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8], [ 9, 10, 11], [12, 13, 14]])
arr = np.arange(15).reshape((3, 5))
arr
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]])
arr.T
array([[ 0, 5, 10], [ 1, 6, 11], [ 2, 7, 12], [ 3, 8, 13], [ 4, 9, 14]])
A universal function, or ufunc, is a function that performs element-wise operations on data in ndarrays.
arr = np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.sqrt(arr)
array([0. , 1. , 1.41421356, 1.73205081, 2. , 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])
np.exp(arr)
array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03])
x = np.random.randn(8)
y = np.random.randn(8)
x, y
(array([-1.02339892, -0.31914358, -0.62594658, 0.56634561, -0.04781019, -0.50958232, -0.19189244, 0.10514314]), array([-1.29927276, 1.04835848, 1.47446977, 0.4718186 , -0.56813979, -0.66551754, -0.09810613, -0.34811884]))
np.maximum(x, y)
array([-1.02339892, 1.04835848, 1.47446977, 0.56634561, -0.04781019, -0.50958232, -0.09810613, 0.10514314])
np.add(x, y)
array([-2.32267168, 0.72921491, 0.84852319, 1.03816421, -0.61594998, -1.17509986, -0.28999857, -0.24297571])
arr = np.random.randn(5, 4)
arr
array([[-0.7777328 , 1.68694402, -0.26486943, -1.1175823 ], [-1.26289577, 0.58083666, -0.10501015, -0.43331565], [ 0.65139138, 0.75855367, -0.79297748, 0.08067757], [-0.03707011, 1.41360521, -1.10087436, -1.03202629], [-0.52883251, -1.03657222, 1.20883805, 0.10639226]])
arr.mean(axis=1) #compute mean across the columns
array([-0.11831013, -0.30509623, 0.17441129, -0.18909139, -0.0625436 ])
arr.mean(axis=0)
array([-0.39102796, 0.68067347, -0.21097867, -0.47917088])
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr
array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr.cumsum(axis=0)
array([[ 0, 1, 2], [ 3, 5, 7], [ 9, 12, 15]])
arr = np.random.randn(20)
print(arr)
arr > 1
[-0.39865273 1.84245074 -0.25709915 -1.31675921 1.29881523 -0.74195666 -0.86255858 -2.06712123 -0.04579372 -0.53765541 -0.02602825 1.17739062 -0.27865596 0.28736465 1.5241784 -0.41691826 -0.74333696 -0.15779299 1.64339274 -0.30294148]
array([False, True, False, False, True, False, False, False, False, False, False, True, False, False, True, False, False, False, True, False])
# Count how many True
(arr > 1).sum()
5
(arr > 1).any()
True
(arr > 1).all()
False
arr = np.random.randn(5, 3)
arr
array([[-0.46540847, -1.71177707, -0.84448996], [ 1.2378045 , -0.38446321, -0.24074731], [ 1.1510245 , 0.36555014, -0.47874062], [-0.69335483, 0.80256177, -0.05947087], [ 0.21247471, 2.69164903, 0.60130957]])
arr.sort(1)
arr
array([[-1.71177707, -0.84448996, -0.46540847], [-0.38446321, -0.24074731, 1.2378045 ], [-0.47874062, 0.36555014, 1.1510245 ], [-0.69335483, -0.05947087, 0.80256177], [ 0.21247471, 0.60130957, 2.69164903]])
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)
array(['Bob', 'Joe', 'Will'], dtype='<U4')
set(names)
{'Bob', 'Joe', 'Will'}
Arrays are saved by default in an uncompressed raw binary format with file extension .npy
arr = np.arange(10)
np.save('some_array', arr)
np.load('some_array.npy')
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Try at home
x = np.array([[1, 2,], [3, 4]])
x
array([[1, 2], [3, 4]])
y = np.array([[6, 23], [-1, 7]])
y
array([[ 6, 23], [-1, 7]])
x * y
array([[ 6, 46], [-3, 28]])
How to calculate dot prodct?
x.dot(y)
#np.dot(x, y):
array([[ 4, 37], [14, 97]])
from numpy.linalg import det
X = np.random.randn(5, 5)
X
array([[-0.07206205, 0.66244219, -0.27415439, 1.19915517, -0.62921893], [ 1.40669103, 1.23250392, 0.09315574, -0.21424153, 0.5826936 ], [ 1.34485978, 0.66375048, 1.94932698, 0.69955914, -0.40290843], [-0.1244241 , -0.1716577 , -0.88798675, -1.14550937, 1.02411647], [-0.55653207, 0.45554383, -0.47304156, 0.1021546 , 0.38182886]])
det(X)
-0.4097423568653652
groceries.zip contains numpy array files of vegtable prices for different groceries in the country. Each array contains kilo price for multiple types of vegtablesas ordered as: Lemon, Tomato, Potato, Watermelon, Zucchini, Eggplplant, Apple, Panana.
You want to buy the following amounts(kg) from one of the groceries:
Lemon | Tomato | Potato | Watermelon | Zucchini | Eggplant | Apple | Panana |
---|---|---|---|---|---|---|---|
2 | 1.5 | 3 | 6 | 1 | 1 | 2 | 3 |
The numpy.random module supplements the built-in Python random with functions for efficiently generating whole arrays of sample values from many kinds of probability distributions.
# samples from the standard normal distribution
np.random.normal(size=(4, 4))
Why it generates new numbers every time you run the code ?
np.random.seed(1234)
np.random.normal(size=(4, 4))