Learn practical skills, build real-world projects, and advance your career
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set
<function seaborn.rcmod.set(context='notebook', style='darkgrid', palette='deep', font='sans-serif', font_scale=1, color_codes=True, rc=None)>
# importing files of May 2019
df_1 = pd.read_json('2019-05-01.jsonl', lines = True)
df_2 = pd.read_json('2019-05-02.jsonl', lines = True)
df_3 = pd.read_json('2019-05-03.jsonl', lines = True)
df_4 = pd.read_json('2019-05-04.jsonl', lines = True)
df_5 = pd.read_json('2019-05-05.jsonl', lines = True)
df_6 = pd.read_json('2019-05-06.jsonl', lines = True)
df_7 = pd.read_json('2019-05-07.jsonl', lines = True)
df_8 = pd.read_json('2019-05-08.jsonl', lines = True)
df_9 = pd.read_json('2019-05-09.jsonl', lines = True)
df_10 = pd.read_json('2019-05-10.jsonl', lines = True)
df_11 = pd.read_json('2019-05-11.jsonl', lines = True)
df_12 = pd.read_json('2019-05-12.jsonl', lines = True)
df_13 = pd.read_json('2019-05-13.jsonl', lines = True)
df_14 = pd.read_json('2019-05-14.jsonl', lines = True)
df_15 = pd.read_json('2019-05-15.jsonl', lines = True)
df_16 = pd.read_json('2019-05-16.jsonl', lines = True)
df_17 = pd.read_json('2019-05-17.jsonl', lines = True)
df_18 = pd.read_json('2019-05-18.jsonl', lines = True)
df_19 = pd.read_json('2019-05-19.jsonl', lines = True)
df_20 = pd.read_json('2019-05-20.jsonl', lines = True)
df_21 = pd.read_json('2019-05-21.jsonl', lines = True)
df_22 = pd.read_json('2019-05-22.jsonl', lines = True)
df_23 = pd.read_json('2019-05-23.jsonl', lines = True)
df_24 = pd.read_json('2019-05-24.jsonl', lines = True)
df_25 = pd.read_json('2019-05-25.jsonl', lines = True)
df_26 = pd.read_json('2019-05-26.jsonl', lines = True)
df_27 = pd.read_json('2019-05-27.jsonl', lines = True)
df_28 = pd.read_json('2019-05-28.jsonl', lines = True)
df_29 = pd.read_json('2019-05-29.jsonl', lines = True)
df_30 = pd.read_json('2019-05-30.jsonl', lines = True)
df_31 = pd.read_json('2019-05-31.jsonl', lines = True)
# merge all the imported files
df = pd.concat([df_1, df_2, df_3, df_4, df_5, df_6, df_7, df_8, df_9, df_10, df_11, df_12, df_13, df_14, df_15, df_16, df_17, df_18, df_19, df_20, df_21, df_22, df_23, df_24, df_25, df_26, df_27, df_28, df_29, df_30, df_31], ignore_index = True, sort = True)
df
# to find the no. of unique projects across May, 2019
df.spec.nunique()
6180
# to find the most popular projects in the data and their count
most_popular_projects = df.groupby(['spec'])['spec'].count()
most_popular_projects.nlargest(10)
spec
ipython/ipython-in-depth/master              183362
jupyterlab/jupyterlab-demo/master             34704
DS-100/textbook/master                        21773
ines/spacy-io-binder/live                     18498
bokeh/bokeh-notebooks/master                   8916
ines/spacy-course/binder                       6108
binder-examples/r/master                       5703
binder-examples/requirements/master            5402
rationalmatter/juno-demo-notebooks/master      5153
QuantStack/xeus-cling/stable                   4512
Name: spec, dtype: int64