Jovian
⭐️
Sign In
In [1]:
!pip install jovian --upgrade --quiet
In [2]:
!pip install pandas --upgrade
Requirement already up-to-date: pandas in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (1.1.2) Requirement already satisfied, skipping upgrade: numpy>=1.15.4 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (1.19.2) Requirement already satisfied, skipping upgrade: python-dateutil>=2.7.3 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2.8.1) Requirement already satisfied, skipping upgrade: pytz>=2017.2 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2020.1) Requirement already satisfied, skipping upgrade: six>=1.5 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)
In [3]:
!pip install matplotlib seaborn --upgrade --quiet
In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

In [2]:
matches_raw_df = pd.read_csv('matches.csv')
In [3]:
matches_raw_df
Out[3]:
In [4]:
matches_raw_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 756 entries, 0 to 755 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 756 non-null int64 1 season 756 non-null int64 2 city 749 non-null object 3 date 756 non-null object 4 team1 756 non-null object 5 team2 756 non-null object 6 toss_winner 756 non-null object 7 toss_decision 756 non-null object 8 result 756 non-null object 9 dl_applied 756 non-null int64 10 winner 752 non-null object 11 win_by_runs 756 non-null int64 12 win_by_wickets 756 non-null int64 13 player_of_match 752 non-null object 14 venue 756 non-null object 15 umpire1 754 non-null object 16 umpire2 754 non-null object 17 umpire3 119 non-null object dtypes: int64(5), object(13) memory usage: 106.4+ KB
In [5]:
matches_raw_df.result.value_counts()
Out[5]:
normal       743
tie            9
no result      4
Name: result, dtype: int64
In [6]:
matches_per_season = matches_raw_df.groupby('season').season.count()
In [7]:
plt.figure(figsize=(12,6))
plt.xticks(rotation=75)
plt.title('Matches Per Season')
match_per_season_plot = sns.barplot(x = matches_per_season.index, y = matches_per_season)
match_per_season_plot.set(xlabel = 'Seasons', ylabel = 'No. of Matches');
In [8]:
toss_decision_percentage = matches_raw_df.groupby('season').toss_decision.value_counts().sort_index() / matches_per_season * 100
In [9]:
toss_decision_percentage
Out[9]:
season  toss_decision
2008    bat              44.827586
        field            55.172414
2009    bat              61.403509
        field            38.596491
2010    bat              65.000000
        field            35.000000
2011    bat              34.246575
        field            65.753425
2012    bat              50.000000
        field            50.000000
2013    bat              59.210526
        field            40.789474
2014    bat              31.666667
        field            68.333333
2015    bat              42.372881
        field            57.627119
2016    bat              18.333333
        field            81.666667
2017    bat              18.644068
        field            81.355932
2018    bat              16.666667
        field            83.333333
2019    bat              16.666667
        field            83.333333
dtype: float64
In [10]:
toss_decision_percentage.unstack().plot(kind = 'bar', figsize=(12,6), title = 'Toss Decisions', xlabel = 'Seasons', ylabel = 'Percentage');
In [11]:
wins_batting_second = matches_raw_df[(matches_raw_df.win_by_runs == 0) & (matches_raw_df.result == 'normal')].groupby('season').winner.count() / matches_per_season * 100
In [12]:
wins_batting_first = matches_raw_df[(matches_raw_df.win_by_wickets == 0) & (matches_raw_df.result == 'normal')].groupby('season').winner.count() / matches_per_season * 100
In [13]:
combined_wins_df = pd.concat([wins_batting_first, wins_batting_second], axis = 1)
combined_wins_df.columns = ['batting_first', 'batting_second']
combined_wins_df
Out[13]:
In [14]:
combined_wins_df.plot(kind = 'bar', figsize=(12,6), title = 'Wins', xlabel = 'Seasons', ylabel = 'Percentage');
In [15]:
total_matches_played = (matches_raw_df.team2.value_counts() + matches_raw_df.team1.value_counts()).sort_values(ascending = False)
total_matches_played
Out[15]:
Mumbai Indians                 187
Royal Challengers Bangalore    180
Kolkata Knight Riders          178
Kings XI Punjab                176
Chennai Super Kings            164
Delhi Daredevils               161
Rajasthan Royals               147
Sunrisers Hyderabad            108
Deccan Chargers                 75
Pune Warriors                   46
Gujarat Lions                   30
Rising Pune Supergiant          16
Delhi Capitals                  16
Rising Pune Supergiants         14
Kochi Tuskers Kerala            14
dtype: int64
In [16]:
plt.figure(figsize=(12,6))
plt.title('Total Matches Played')
total_matches_played_plot = sns.barplot(y = total_matches_played.index, x = total_matches_played)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'No. of Matches');
In [17]:
win_percentage = (matches_raw_df.winner.value_counts() / total_matches_played).sort_values(ascending = False) * 100
win_percentage
Out[17]:
Rising Pune Supergiant         62.500000
Delhi Capitals                 62.500000
Chennai Super Kings            60.975610
Mumbai Indians                 58.288770
Sunrisers Hyderabad            53.703704
Kolkata Knight Riders          51.685393
Rajasthan Royals               51.020408
Royal Challengers Bangalore    46.666667
Kings XI Punjab                46.590909
Gujarat Lions                  43.333333
Kochi Tuskers Kerala           42.857143
Delhi Daredevils               41.614907
Deccan Chargers                38.666667
Rising Pune Supergiants        35.714286
Pune Warriors                  26.086957
dtype: float64
In [18]:
matches_raw_df.winner.value_counts()
Out[18]:
Mumbai Indians                 109
Chennai Super Kings            100
Kolkata Knight Riders           92
Royal Challengers Bangalore     84
Kings XI Punjab                 82
Rajasthan Royals                75
Delhi Daredevils                67
Sunrisers Hyderabad             58
Deccan Chargers                 29
Gujarat Lions                   13
Pune Warriors                   12
Delhi Capitals                  10
Rising Pune Supergiant          10
Kochi Tuskers Kerala             6
Rising Pune Supergiants          5
Name: winner, dtype: int64
In [19]:
plt.figure(figsize=(12,6))
plt.title('Win Percentage')
win_percentage_plot = sns.barplot(y = win_percentage.index, x = win_percentage)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'Percentage');
In [94]:
highest_wins_by_runs_df = matches_raw_df[matches_raw_df.win_by_runs != 0].sort_values('win_by_runs', ascending = False)
highest_wins_by_runs_df
Out[94]:
In [95]:
plt.figure(figsize=(25, 10))
plt.xlabel('Seasons',size=30)
plt.ylabel('Runs',size=30)
plt.title('Highest Wins By Runs', size = 30)
sns.scatterplot(x = 'season',y =  'win_by_runs', data = highest_wins_by_runs_df, s =150, color = 'black');
sns.scatterplot(x = 'season',y =  'win_by_runs', data = highest_wins_by_runs_df.head(10), s =220, color = 'red');
for i in range(highest_wins_by_runs_df.head(10).shape[0]):
    plt.annotate(highest_wins_by_runs_df.winner.tolist()[i], (highest_wins_by_runs_df.season.tolist()[i], highest_wins_by_runs_df.win_by_runs.tolist()[i]), size = 20)
In [96]:
largest_wins_by_wickets = matches_raw_df.sort_values('win_by_wickets', ascending = False).head(10)
In [97]:
largest_wins_by_wickets
Out[97]:
In [ ]:
most_experienced_umpires = (matches_raw_df.umpire1.value_counts() + matches_raw_df.umpire2.value_counts()).sort_values(ascending = False).head(10)
In [ ]:
most_experienced_umpires
In [ ]:
plt.figure(figsize=(12,6))
plt.title('Most Matches Umpired')
most_experienced_umpires_plot = sns.barplot(y = most_experienced_umpires.index, x = most_experienced_umpires)
most_experienced_umpires_plot.set(xlabel = 'Matches', ylabel = 'Umpires');
In [ ]:
 
In [98]:
import jovian
In [99]:
jovian.commit()
[jovian] Attempting to save notebook.. [jovian] Updating notebook "srijansrj5901/ipl-matches-data-analysis" on https://jovian.ml/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Committed successfully! https://jovian.ml/srijansrj5901/ipl-matches-data-analysis
In [ ]:
jovian.commit(files = ['matches.csv'])
[jovian] Attempting to save notebook..
In [ ]: