UdemyでPython for TIme Series Data Analysisのクラスの学習を進めている。Pythonで機械学習を用いて時系列データの予測方法が解説される。まずはPythonの使い方で、numpyとpandasの基礎を学習した。
numpyの学習コード1つ目
import numpy as np mylist = [1,2,3] type(mylist) np.array(mylist) arr = np.array(mylist) arr mylist = [[1,2,3],[4,5,6],[7,8,9]] mylist mymatrix = np.array(mylist) mymatrix.shape mynewmatrix = np.array(mylist) mynewmatrix = np.array np.arange(0,10,2) np.zeros((4,10)) np.ones((5,5)) + 4 np.ones(4)*100 [1,1,1,1]*100 np.linspace(0,10) np.eye(5) np.random.rand(5,5) np.random.randn(10) np.random.randint(1,100,10) np.random.seed(555) np.random.rand(4) arr = np.arange(25) arr ranarr = np.random.randint(0,50,10) ranarr arr.reshape(5,5) ranarr.max() ranarr.argmax() ranarr.argmin()
numpyの学習コード2つ目
import numpy as np arr = np.arange(0,11) arr[8] arr[1:5] arr[0:5] arr[5:] arr + 100 new_arr = arr/2 new_arr arr**2 arr slice_of_arr = arr[0:6] slice_of_arr slice_of_arr[:] = 99 slice_of_arr arr arr_2d = np.array([[5,10,15],[20,25,30],[35,40,45]]) arr_2d.shape arr_2d[1,1] arr_2d[2,2] arr_2d arr_2d[:2,1:] arr = np.arange(1,11) arr bool_arr = arr > 4 arr[bool_arr] arr[arr>4] arr[arr<=6]
numpyの学習コード3つ目
import numpy as np arr = np.arange(0,10) arr arr + 100 arr / 2 arr**2 (arr+2)/100 arr arr + arr 1/arr arr/arr np.sqrt(arr) np.log(arr) np.sin(arr) arr.sum() arr.mean() arr.max() arr_2d = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]]) arr_2d arr_2d.shape arr_2d.sum() arr_2d.sum(axis=0)
numpyの学習コード4つ目
import numpy as np np.zeros(10) np.ones(10) np.ones(10)*5 np.arange(10,51) np.arange(10,51,2) np.arange(9).reshape(3,3) np.eye(3,3) np.random.rand(1) np.random.randn(25) arr = np.arange(1,101)/100 arr.reshape(10,10) np.linspace(0,1,20) mat = np.arange(1,26).reshape(5,5) mat mat[2:,1:] mat[3,4] mat[:3,1:2] mat[4,:] mat[3:5,:] mat.sum() mat.std() mat.sum(axis=0) np.random.seed(101) np.random.rand(1)
pandasの学習コード1つ目
import numpy as np import pandas as pd labels = ['a','b','c'] mylist = [10,20,30] arr = np.array(mylist) arr d = {'a':10,'b':20,'c':30} pd.Series(data=mylist) pd.Series(arr,index=labels) pd.Series(data=['d','a','e']) ser1 = pd.Series([1,2,3,4],index=['USA','Germany','USSR','Japan']) ser1 ser1['USA'] ser2 = pd.Series([1,4,5,6],index=['USA','Germany','Italy','Japan']) ser2 ser1+ser2
pandasの学習コード2つ目
import pandas as pd import numpy as np from numpy.random import randn np.random.seed(101) rand_mat = randn(5,4) rand_mat df = pd.DataFrame(data=rand_mat,index='A B C D E'.split(),columns='W X Y Z'.split()) df df[['W','Y']] df['NEW'] = df['W'] + df['Y'] df df.drop('NEW',axis=1,inplace=True) df df.loc['A'] df.iloc[2] df df_bool = df>0 df[df_bool] df[df['W'] > 0] cond1 = df['W'] > 0 cond2 = df['Y'] > 1 df[(cond1) & (cond2)] df df.reset_index() new_ind = 'CA NY WY OR CO'.split() new_ind df['States'] = new_ind df.set_index('States',inplace=True) ser_w = df['W'] > 0
pandasの学習コード3つ目
import numpy as np import pandas as pd df = pd.DataFrame({'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]}) df df.dropna() df.dropna(axis=1) df.dropna(thresh=2) df.fillna(value='FILL VALUE') df.mean() df.fillna(df.mean()) df['A'].fillna(value=df['A'].mean())