Python for Time Series Data Analysis (Udemy) 4

投稿者: | 2022年7月23日

引き続きUdemyでPython for Time Series Data Analysisの学習を進めている。Pandasを用いたTime Series Dataの解析手法に関する内容。

Datetime classの使用例

from datetime import datetime
my_year = 20
my_month = 1
my_day = 2
my_hour = 13
my_min = 30
my_sec = 15
my_date = datetime(my_year,my_month,my_day)
my_date
my_date_time = datetime(my_year,my_month,my_day,my_hour,my_min,my_sec)
type(my_date_time)
my_date_time.hour
np.array(['2020-03-15','2020-03-16','2020-03-17'],dtype='datetime64[h]')
np.arange('1968','1976',dtype='datetime64[D]')

index methodの使用例

import pandas as pd
pd.date_range('Jan 01, 2018',periods=7,freq='D')
pd.to_datetime(['1/2/2018','Jan 03, 2017'])
pd.to_datetime(['2/1/2018','3/1/2018'],format='%d/%m/%Y')
data = np.random.randn(3,2)
cols = ['A','B']
print(data)
idx = pd.date_range('2020-01-01',periods=3,freq='D')
df = pd.DataFrame(data,index=idx,columns=cols)
df
df.index
df.index.max()
df.index.argmax()
df.index.min()
df.index.argmin()

resample methodの使用例

import pandas as pd
df = pd.read_csv('./UDEMY_TSA_FINAL/Data/starbucks.csv',index_col='Date',parse_dates=True)
df.head()
df.index
df.resample(rule='A').mean()
def first_day(entry):
    if len(entry) != 0:
        return entry[0]
df.resample(rule='A').apply(first_day)
df.head()
df['Close'].resample('A').mean().plot.bar()

shiftメソッドの使い方

import pandas as pd
df = pd.read_csv('./UDEMY_TSA_FINAL/Data/Starbucks.csv',parse_dates=True)
df.head()
df.tail()
df.shift(1).tail()
df.shift(-1).tail()

rollingを用いた移動平均線のplot

import pandas as pd
df = pd.read_csv('./UDEMY_TSA_FINAL/Data/Starbucks.csv',index_col='Date',parse_dates=True)
df.head()
df['Close'].plot(figsize=(12,5))
df.rolling(window=60).mean()['Close'].plot()
df['Close: 30 Day Mean'] = df['Close'].rolling(window=30).mean()
df.head
df[['Close','Close: 30 Day Mean']].plot(figsize=(12,5))
df['Close'].expanding().mean().plot(figsize=(12,5))

plotの整形

import pandas as pd
df = pd.read_csv('./UDEMY_TSA_FINAL/Data/starbucks.csv',index_col='Date',parse_dates=True)
df.head()
df.index
df['Close'].plot()
df['Volume'].plot()
title = 'TITLE'
ylabel = 'Y LABEL'
xlabel = 'X LABEL'
df['Close'].plot(figsize=(12,6),title=title)
ax.autoscale(axis='both',tight=True)
ax.set(xlabel=xlabel,ylabel=ylabel)
df['Close'].plot(figsize=(12,4))
df['Close']['2017-01-01':'2017-12-31'].plot(figsize=(12,4),xlim=['2017-01-01','2017-12-31'])
df['Close']['2017-01-01':'2017-12-31'].plot(figsize=(12,4),ylim=[0,70],ls='--',c='green')

Plotの整形

from matplotlib import dates
df['Close'].plot(xlim=['2017-01-01','2017-03-01'],ylim=[50,60],figsize=(12,5))
ax.set(xlabel='')
ax.xaxis.set_major_locator(dates.WeekdayLocator(byweekday=0))
ax.xaxis.set_major_formatter(dates.DateFormatter('%a-%B-%d'))
ax.xaxis.set_minor_locator(dates.MonthLocator())
ax.xaxis.set_minor_formatter(dates.DateFormatter('\n\n%b'))

Time Series Dataの可視化

import pandas as pd
pd.read_csv('./UDEMY_TSA_FINAL/Data/monthly_milk_production.csv',encoding='utf8')
title = "Monthly milk production: pounds per cow. Jan '62 - Dec '75"
print(len(df))
print(df.head())
df.info()
df.dtypes
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df.head()
df.plot()
df['Month'] = df.index.month
df.head()
df['Month'].index.strftime('%B')
df.head()
df.boxplot(by='Month',figsize=(12,5))

Boxplot

Time Series Dataの整形/可視化

import numpy as np
import pandas as pd
df = pd.read_csv('./UDEMY_TSA_FINAL/Data/UMTMVS.csv')
df.head()
df = df.set_index('DATE')
df.index
df.index = pd.to_datetime(df.index)
df.index
df.plot(figsize=(14,8))
100*(df.loc['2019-01-01'] - df.loc['2009-01-01'])/df.loc['2009-01-01']
df.loc['2005-01-01':].idxmin()
df.sort_values(by='UMTMVS',ascending=False).head(6)
df.loc['2008-01-01'] = df.loc['2009-01-01']
df.resample('Y').mean().plot.bar(figsize=(12,8))
yearly_data = df.resample('Y').mean()
yearly_data_shift = yearly_data.shift(1)
change = yearly_data - yearly_data_shift
change['UMTMVS'].idxmax()
df['Yearly Mean'] = df['UMTMVS'].rolling(window=12).mean()
df.plot(figsize=(12,5))
df = pd.read_csv('./UDEMY_TSA_FINAL/Data/UMTMVS.csv',index_col='DATE',parse_dates=True)
df2008 = df.loc['2008-01-01':'2008-12-01']
df2008.idxmax()
df2008.max()
df_post_peak = df.loc['2008-06-01':]
df_post_peak[df_post_peak>=510081.0].dropna()
len(df.loc['2008-06-01':'2014-03-01'])

コメントを残す

メールアドレスが公開されることはありません。 が付いている欄は必須項目です