Jovian
⭐️
Sign In

Time Series First Trials in Python

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6
In [8]:
data = pd.read_csv('AirPassengers.csv')
print(data.head())
Month #Passengers 0 1949-01 112 1 1949-02 118 2 1949-03 132 3 1949-04 129 4 1949-05 121
In [9]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m')
data = pd.read_csv('AirPassengers.csv', parse_dates=['Month'], index_col='Month',date_parser=dateparse)
print(data.head())
#Passengers Month 1949-01-01 112 1949-02-01 118 1949-03-01 132 1949-04-01 129 1949-05-01 121
In [11]:
data.index
Out[11]:
DatetimeIndex(['1949-01-01', '1949-02-01', '1949-03-01', '1949-04-01',
               '1949-05-01', '1949-06-01', '1949-07-01', '1949-08-01',
               '1949-09-01', '1949-10-01',
               ...
               '1960-03-01', '1960-04-01', '1960-05-01', '1960-06-01',
               '1960-07-01', '1960-08-01', '1960-09-01', '1960-10-01',
               '1960-11-01', '1960-12-01'],
              dtype='datetime64[ns]', name='Month', length=144, freq=None)
In [14]:
ts = data[:2] 
In [15]:
print(ts)
#Passengers Month 1949-01-01 112 1949-02-01 118
In [16]:
from datetime import datetime
In [18]:
ts = data["#Passengers"] 
In [19]:
print(ts.head())
Month 1949-01-01 112 1949-02-01 118 1949-03-01 132 1949-04-01 129 1949-05-01 121 Name: #Passengers, dtype: int64
In [20]:
ts[datetime(1949,1,1)]
Out[20]:
112
In [21]:
plt.plot(ts)
Out[21]:
[<matplotlib.lines.Line2D at 0x1f52ae59550>]
Notebook Image
In [29]:
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(12).mean()
    rolstd = timeseries.rolling(12).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
In [30]:
test_stationarity(ts)
Notebook Image
Results of Dickey-Fuller Test: Test Statistic 0.815369 p-value 0.991880 #Lags Used 13.000000 Number of Observations Used 130.000000 Critical Value (1%) -3.481682 Critical Value (5%) -2.884042 Critical Value (10%) -2.578770 dtype: float64
In [1]:
import jovian
In [ ]:
jovian.commit()
[jovian] Saving notebook..
[jovian] Creating a new notebook on https://jvn.io [jovian] Please enter your API key (from https://jvn.io ):
In [ ]: