Jovian
⭐️
Sign In
Learn data science and machine learning by building real-world projects on Jovian
In [1]:
from urllib.request import urlretrieve
In [2]:
italy_covid_url = 'https://gist.githubusercontent.com/aakashns/f6a004fa20c84fec53262f9a8bfee775/raw/f309558b1cf5103424cef58e2ecb8704dcd4d74c/italy-covid-daywise.csv'

urlretrieve(italy_covid_url, 'italy-covid-daywise.csv')
Out[2]:
('italy-covid-daywise.csv', <http.client.HTTPMessage at 0x23c3ef06850>)
In [3]:
!pip install pandas
Requirement already satisfied: pandas in c:\users\chaimf\anaconda3\lib\site-packages (1.2.4) Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\chaimf\anaconda3\lib\site-packages (from pandas) (2.8.1) Requirement already satisfied: pytz>=2017.3 in c:\users\chaimf\anaconda3\lib\site-packages (from pandas) (2021.1) Requirement already satisfied: numpy>=1.16.5 in c:\users\chaimf\anaconda3\lib\site-packages (from pandas) (1.20.1) Requirement already satisfied: six>=1.5 in c:\users\chaimf\anaconda3\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)
In [4]:
import pandas as pd
In [5]:
covid_df = pd.read_csv('italy-covid-daywise.csv')
In [6]:
covid_df
Out[6]:
In [7]:
type(covid_df)
Out[7]:
pandas.core.frame.DataFrame
In [8]:
covid_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 248 entries, 0 to 247 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 248 non-null object 1 new_cases 248 non-null float64 2 new_deaths 248 non-null float64 3 new_tests 135 non-null float64 dtypes: float64(3), object(1) memory usage: 7.9+ KB
In [9]:
covid_df.describe()
Out[9]:
In [11]:
covid_df.columns
Out[11]:
Index(['date', 'new_cases', 'new_deaths', 'new_tests'], dtype='object')
In [13]:
covid_df.shape
Out[13]:
(248, 4)
In [14]:
import jovian
In [15]:
jovian.commit(project='Lesson 2 loop and if')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/lesson-2-loop-and-if" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/lesson-2-loop-and-if
In [17]:
covid_data_dict = {
    'date':       ['2020-08-30', '2020-08-31', '2020-09-01', '2020-09-02', '2020-09-03'],
    'new_cases':  [1444, 1365, 996, 975, 1326],
    'new_deaths': [1, 4, 6, 8, 6],
    'new_tests': [53541, 42583, 54395, None, None]
    }
In [19]:
covid_data_dict['new_cases']
Out[19]:
[1444, 1365, 996, 975, 1326]
In [23]:
covid_df['new_cases'][246]
Out[23]:
975.0
In [21]:
type(covid_df['new_cases'])
Out[21]:
pandas.core.series.Series
In [24]:
covid_df.new_cases
Out[24]:
0         0.0
1         0.0
2         0.0
3         0.0
4         0.0
        ...  
243    1444.0
244    1365.0
245     996.0
246     975.0
247    1326.0
Name: new_cases, Length: 248, dtype: float64
In [25]:
date_cases = covid_df[['date','new_cases']]
date_cases
Out[25]:
In [29]:
covid_df.loc[105:115]
Out[29]:
In [28]:
covid_df.new_tests.first_valid_index()
Out[28]:
111
In [33]:
covid_df.sample(10)
Out[33]:
In [44]:
import jovian
In [45]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [37]:
total_cases=covid_df.new_cases.sum()
Out[37]:
271515.0
In [38]:
total_deaths=covid_df.new_deaths.sum()
In [40]:
death_rate=total_deaths/total_cases
Out[40]:
0.13073679170579894
In [66]:
number_of_test=935310+covid_df.new_tests.sum()
positive_rate=total_cases/number_of_test
In [67]:
positive_rate
Out[67]:
0.05206657403227681
In [51]:
high_new_cases=covid_df.new_cases>1000
In [48]:
print(high_cases)
0 False 1 False 2 False 3 False 4 False ... 243 True 244 True 245 False 246 False 247 True Name: new_cases, Length: 248, dtype: bool
In [52]:
covid_df[high_new_cases]
Out[52]:
In [60]:
high_deaths=covid_df.new_deaths>200
In [64]:
a=covid_df[high_new_cases & high_deaths]
a.shape
Out[64]:
(54, 4)
In [65]:
#Control the number of rows presented by python

from IPython.display import display
with pd.option_context('display.max_rows', 100):
    display(covid_df[covid_df.new_cases > 1000])
In [68]:
high_ratio_df = covid_df[covid_df.new_cases / covid_df.new_tests > positive_rate]
In [69]:
high_ratio_df
Out[69]:
In [75]:
covid_df['positive_rate']=covid_df.new_cases/covid_df.new_tests
In [76]:
covid_df
Out[76]:
In [79]:
covid_df.drop(columns=['positive_rate'], inplace=True)
In [80]:
covid_df
Out[80]:
In [81]:
import jovian
In [82]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [85]:
covid_df.sort_values('new_cases', ascending=False).head(10)
Out[85]:
In [86]:
covid_df.sort_values('new_deaths', ascending=False).head(10)
Out[86]:
In [87]:
covid_df.sort_values('new_cases').head(10)
Out[87]:
In [90]:
a=172
covid_df.new_cases.at[a]=0.5*(covid_df.new_cases[a+1]+covid_df.new_cases[a-1])
In [91]:
covid_df.new_cases.at[a]
Out[91]:
297.5
In [92]:
import jovian
In [93]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [94]:
covid_df.describe()
Out[94]:
In [99]:
adam=covid_df[covid_df.new_deaths>500]
erika=adam[covid_df.new_cases>6556]
erika
<ipython-input-99-894362e89085>:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index. erika=adam[covid_df.new_cases>6556]
Out[99]:
In [100]:
covid_df[covid_df.new_deaths>500]
Out[100]:
In [103]:
covid_df.loc[(covid_df.new_deaths>=500) & (covid_df.new_cases>=6100) ]

Out[103]:
In [115]:
covid_df.new_deaths.loc[covid_df.date.str[0:7]=='2020-04'].sum()
Out[115]:
16091.0
In [107]:
covid_df.date.startswith
Out[107]:
0    2019-12-31
1    2020-01-01
2    2020-01-02
3    2020-01-03
4    2020-01-04
5    2020-01-05
6    2020-01-06
Name: date, dtype: object
In [116]:
covid_df.date
Out[116]:
0      2019-12-31
1      2020-01-01
2      2020-01-02
3      2020-01-03
4      2020-01-04
          ...    
243    2020-08-30
244    2020-08-31
245    2020-09-01
246    2020-09-02
247    2020-09-03
Name: date, Length: 248, dtype: object
In [117]:
covid_df['date']=pd.to_datetime(covid_df.date)
In [119]:
  covid_df['date']
Out[119]:
0     2019-12-31
1     2020-01-01
2     2020-01-02
3     2020-01-03
4     2020-01-04
         ...    
243   2020-08-30
244   2020-08-31
245   2020-09-01
246   2020-09-02
247   2020-09-03
Name: date, Length: 248, dtype: datetime64[ns]
In [123]:
covid_df['year']=pd.DatetimeIndex(covid_df.date).year
covid_df['month']=pd.DatetimeIndex(covid_df.date).month
covid_df['day']=pd.DatetimeIndex(covid_df.date).day
covid_df['weekday']=pd.DatetimeIndex(covid_df.date).weekday
In [124]:
covid_df
Out[124]:
In [131]:
covid_df.loc[covid_df.month ==4]
Out[131]:
In [134]:
covid_df_may=covid_df.loc[covid_df.month ==5]
In [135]:
covid_df_may_matrix=covid_df_may[['new_cases', 'new_deaths','new_tests']]
In [139]:
covid_may_totals=covid_df_may_matrix.sum()
In [140]:
covid_df
Out[140]:
new_cases       29073.0
new_deaths       5658.0
new_tests     1078720.0
dtype: float64
In [144]:
covid_df[covid_df.month ==5][['new_cases', 'new_deaths','new_tests']].sum()
Out[144]:
new_cases       29073.0
new_deaths       5658.0
new_tests     1078720.0
dtype: float64
In [146]:
covid_df.new_deaths[covid_df.weekday ==0].mean()>=covid_df.new_deaths[covid_df.weekday ==1].mean()
Out[146]:
False
In [147]:
import jovian
In [148]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [158]:
 monthly_groups=covid_df.groupby('weekday')
In [159]:
 monthly_groups=covid_df.groupby('weekday')[['new_cases', 'new_deaths','new_tests']].mean()
In [160]:
 monthly_groups
Out[160]:
In [164]:
covid_df['total_deaths']=covid_df.new_deaths.cumsum()
In [166]:
covid_df['total_cases']=covid_df.new_cases.cumsum()
In [167]:
covid_df['total_tests']=covid_df.new_tests.cumsum()
In [168]:
covid_df
Out[168]:
In [169]:
import jovian
In [170]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [171]:
urlretrieve('https://gist.githubusercontent.com/aakashns/8684589ef4f266116cdce023377fc9c8/raw/99ce3826b2a9d1e6d0bde7e9e559fc8b6e9ac88b/locations.csv', 
            'locations.csv')
Out[171]:
('locations.csv', <http.client.HTTPMessage at 0x23c44ee0430>)
In [173]:
locations_df=pd.read_csv('locations.csv')
In [174]:
locations_df
Out[174]:
In [185]:
locations_df[locations_df.location =='Italy']
Out[185]:
In [189]:
covid_df['location']='Italy'
In [188]:
covid_df
Out[188]:
In [193]:
merged_df=covid_df.merge(locations_df, on='location')
In [191]:
merged_df
Out[191]:
In [194]:
merged_df['cases_per_million'] = merged_df.total_cases * 1e6 / merged_df.population
merged_df['deaths_per_million'] = merged_df.total_deaths * 1e6 / merged_df.population
merged_df['tests_per_million'] = merged_df.total_tests * 1e6 / merged_df.population
In [195]:
merged_df
Out[195]:
In [196]:
import jovian
In [197]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [198]:
result_df = merged_df[['date',
                       'new_cases', 
                       'total_cases', 
                       'new_deaths', 
                       'total_deaths', 
                       'new_tests', 
                       'total_tests', 
                       'cases_per_million', 
                       'deaths_per_million', 
                       'tests_per_million']]
In [199]:
result_df
Out[199]:
In [200]:
result_df.to_csv('results.csv', index=None)
In [205]:
import jovian
In [206]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [207]:
jovian.commit(outputs=['results.csv'])
[jovian] Attempting to save notebook.. [jovian] Updating notebook "adamfersh/pandas1" on https://jovian.ai/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Uploading additional outputs... [jovian] Committed successfully! https://jovian.ai/adamfersh/pandas1
In [211]:
result_df.new_deaths.plot();
Notebook Image
In [212]:
result_df.set_index('date',inplace=True)
In [213]:
result_df
Out[213]:
In [214]:
result_df.loc['2020-09-01']
Out[214]:
new_cases             9.960000e+02
total_cases           2.696595e+05
new_deaths            3.548300e+04
total_deaths          4.943383e+06
new_tests             5.439500e+04
total_tests           4.279456e+06
cases_per_million     4.459996e+03
deaths_per_million    8.176040e+04
tests_per_million     7.077947e+04
Name: 2020-09-01 00:00:00, dtype: float64
In [216]:
result_df.new_cases.plot()
result_df.new_deaths.plot();
Notebook Image
In [221]:
death_rate = result_df.total_deaths / (100*result_df.total_cases)

In [222]:
death_rate.plot(title='Death Rate');
Notebook Image
In [219]:
positive_rates = result_df.total_cases / result_df.total_tests
positive_rates.plot(title='Positive Rate');
Notebook Image
In [226]:
covid_month_df.new_cases.plot(kind='bar');
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-226-2426c8c54771> in <module> ----> 1 covid_month_df.new_cases.plot(kind='bar'); ~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in __getattr__(self, attr) 748 return object.__getattribute__(self, attr) 749 if attr in self.obj: --> 750 return self[attr] 751 752 raise AttributeError( ~\anaconda3\lib\site-packages\pandas\core\groupby\generic.py in __getitem__(self, key) 1540 stacklevel=2, 1541 ) -> 1542 return super().__getitem__(key) 1543 1544 def _gotitem(self, key, ndim: int, subset=None): ~\anaconda3\lib\site-packages\pandas\core\base.py in __getitem__(self, key) 253 def __getitem__(self, key): 254 if self._selection is not None: --> 255 raise IndexError(f"Column(s) {self._selection} already selected") 256 257 if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): IndexError: Column(s) ['new_cases', 'new_deaths', 'new_tests'] already selected
In [227]:
import jovian
In [ ]:
jovian.commit(project='pandas1')
[jovian] Attempting to save notebook..
In [ ]: