Jovian
⭐️
Sign In
In [1]:
!pip install jovian --upgrade --quiet
In [2]:
!pip install pandas --upgrade
Requirement already up-to-date: pandas in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (1.1.2) Requirement already satisfied, skipping upgrade: numpy>=1.15.4 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (1.19.2) Requirement already satisfied, skipping upgrade: python-dateutil>=2.7.3 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2.8.1) Requirement already satisfied, skipping upgrade: pytz>=2017.2 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2020.1) Requirement already satisfied, skipping upgrade: six>=1.5 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)
In [3]:
!pip install matplotlib seaborn --upgrade --quiet
In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

In [2]:
matches_raw_df = pd.read_csv('matches.csv')
In [3]:
matches_raw_df
Out[3]:
In [7]:
matches_raw_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 756 entries, 0 to 755 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 756 non-null int64 1 season 756 non-null int64 2 city 749 non-null object 3 date 756 non-null object 4 team1 756 non-null object 5 team2 756 non-null object 6 toss_winner 756 non-null object 7 toss_decision 756 non-null object 8 result 756 non-null object 9 dl_applied 756 non-null int64 10 winner 752 non-null object 11 win_by_runs 756 non-null int64 12 win_by_wickets 756 non-null int64 13 player_of_match 752 non-null object 14 venue 756 non-null object 15 umpire1 754 non-null object 16 umpire2 754 non-null object 17 umpire3 119 non-null object dtypes: int64(5), object(13) memory usage: 106.4+ KB
In [165]:
matches_raw_df.result.value_counts()
Out[165]:
normal       743
tie            9
no result      4
Name: result, dtype: int64
In [12]:
matches_per_season = matches_raw_df.groupby('season').season.count()
In [14]:
plt.figure(figsize=(12,6))
plt.xticks(rotation=75)
plt.title('Matches Per Season')
match_per_season_plot = sns.barplot(x = matches_per_season.index, y = matches_per_season)
match_per_season_plot.set(xlabel = 'Seasons', ylabel = 'No. of Matches');
In [132]:
toss_decision_percentage = matches_raw_df.groupby('season').toss_decision.value_counts().sort_index() / matches_per_season * 100
In [140]:
toss_decision_percentage
Out[140]:
season  toss_decision
2008    bat              44.827586
        field            55.172414
2009    bat              61.403509
        field            38.596491
2010    bat              65.000000
        field            35.000000
2011    bat              34.246575
        field            65.753425
2012    bat              50.000000
        field            50.000000
2013    bat              59.210526
        field            40.789474
2014    bat              31.666667
        field            68.333333
2015    bat              42.372881
        field            57.627119
2016    bat              18.333333
        field            81.666667
2017    bat              18.644068
        field            81.355932
2018    bat              16.666667
        field            83.333333
2019    bat              16.666667
        field            83.333333
dtype: float64
In [147]:
toss_decision_percentage.unstack().plot(kind = 'bar', figsize=(12,6), title = 'Toss Decisions', xlabel = 'Seasons', ylabel = 'Percentage');
In [162]:
matches_raw_df[(matches_raw_df.toss_decision == 'field') & (matches_raw_df.toss_winner == matches_raw_df.winner)].groupby('season').winner.count()
Out[162]:
season
2008    19
2009    14
2010    10
2011    27
2012    18
2013    15
2014    24
2015    14
2016    32
2017    28
2018    27
2019    31
Name: winner, dtype: int64
In [41]:
total_matches_played = (matches_raw_df.team2.value_counts() + matches_raw_df.team1.value_counts()).sort_values(ascending = False)
total_matches_played
Out[41]:
Mumbai Indians                 187
Royal Challengers Bangalore    180
Kolkata Knight Riders          178
Kings XI Punjab                176
Chennai Super Kings            164
Delhi Daredevils               161
Rajasthan Royals               147
Sunrisers Hyderabad            108
Deccan Chargers                 75
Pune Warriors                   46
Gujarat Lions                   30
Rising Pune Supergiant          16
Delhi Capitals                  16
Rising Pune Supergiants         14
Kochi Tuskers Kerala            14
dtype: int64
In [42]:
plt.figure(figsize=(12,6))
plt.title('Total Matches Played')
total_matches_played_plot = sns.barplot(y = total_matches_played.index, x = total_matches_played)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'No. of Matches');
In [47]:
win_percentage = (matches_raw_df.winner.value_counts() / total_matches_played).sort_values(ascending = False) * 100
win_percentage
Out[47]:
Rising Pune Supergiant         62.500000
Delhi Capitals                 62.500000
Chennai Super Kings            60.975610
Mumbai Indians                 58.288770
Sunrisers Hyderabad            53.703704
Kolkata Knight Riders          51.685393
Rajasthan Royals               51.020408
Royal Challengers Bangalore    46.666667
Kings XI Punjab                46.590909
Gujarat Lions                  43.333333
Kochi Tuskers Kerala           42.857143
Delhi Daredevils               41.614907
Deccan Chargers                38.666667
Rising Pune Supergiants        35.714286
Pune Warriors                  26.086957
dtype: float64
In [56]:
plt.figure(figsize=(12,6))
plt.title('Win Percentage')
win_percentage_plot = sns.barplot(y = win_percentage.index, x = win_percentage)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'Percentage');
In [66]:
highest_win_by_runs_df = matches_raw_df.sort_values('win_by_runs', ascending = False).head(5)
highest_win_by_runs_df
Out[66]:
In [73]:
plt.figure(figsize=(12,6))
plt.xticks(rotation=90)
plt.title('Highest Wins by Runs')
highest_win_by_runs_plot = sns.barplot(x = highest_win_by_runs_df.id, y = highest_win_by_runs_df.win_by_runs)
In [14]:
matches_raw_df.sort_values('win_by_wickets', ascending = False).head(20)
Out[14]:
In [8]:
matches_raw_df.umpire1.value_counts() + matches_raw_df.umpire2.value_counts().max()
Out[8]:
HDPK Dharmasena    130
Asad Rauf          108
S Ravi             106
AK Chaudhary       100
Aleem Dar           95
                  ... 
Sundaram Ravi       58
A Nanda Kishore     58
Nanda Kishore       58
SL Shastri          58
SJA Taufel          58
Name: umpire1, Length: 61, dtype: int64
In [25]:
matches_raw_df[(matches_raw_df.toss_decision == 'field') & (matches_raw_df.toss_winner == matches_raw_df.winner)].winner.value_counts()
Out[25]:
Kolkata Knight Riders          38
Royal Challengers Bangalore    32
Mumbai Indians                 31
Kings XI Punjab                29
Rajasthan Royals               27
Chennai Super Kings            27
Delhi Daredevils               24
Sunrisers Hyderabad            15
Gujarat Lions                  10
Deccan Chargers                 8
Delhi Capitals                  6
Rising Pune Supergiant          5
Kochi Tuskers Kerala            4
Rising Pune Supergiants         3
Name: winner, dtype: int64
In [26]:
matches_raw_df[(matches_raw_df.toss_decision == 'bat') & (matches_raw_df.toss_winner != matches_raw_df.winner)].winner.value_counts()
Out[26]:
Chennai Super Kings            22
Kolkata Knight Riders          20
Mumbai Indians                 20
Rajasthan Royals               19
Delhi Daredevils               18
Royal Challengers Bangalore    17
Kings XI Punjab                15
Sunrisers Hyderabad            13
Pune Warriors                   6
Deccan Chargers                 3
Rising Pune Supergiants         2
Gujarat Lions                   2
Delhi Capitals                  1
Name: winner, dtype: int64
In [29]:
matches_raw_df[(matches_raw_df.toss_decision == 'bat') & (matches_raw_df.toss_winner == matches_raw_df.winner)].winner.value_counts()
Out[29]:
Chennai Super Kings            30
Mumbai Indians                 25
Kolkata Knight Riders          15
Rajasthan Royals               15
Deccan Chargers                11
Delhi Daredevils               11
Royal Challengers Bangalore     9
Sunrisers Hyderabad             8
Kings XI Punjab                 6
Pune Warriors                   3
Delhi Capitals                  1
Name: winner, dtype: int64
In [30]:
matches_raw_df[(matches_raw_df.toss_decision == 'field') & (matches_raw_df.toss_winner != matches_raw_df.winner)].winner.value_counts()
Out[30]:
Mumbai Indians                 33
Kings XI Punjab                32
Royal Challengers Bangalore    26
Sunrisers Hyderabad            22
Chennai Super Kings            21
Kolkata Knight Riders          19
Delhi Daredevils               14
Rajasthan Royals               14
Deccan Chargers                 7
Rising Pune Supergiant          5
Pune Warriors                   3
Kochi Tuskers Kerala            2
Delhi Capitals                  2
Gujarat Lions                   1
Name: winner, dtype: int64
In [ ]:
 
In [149]:
import jovian
In [ ]:
jovian.commit()
[jovian] Attempting to save notebook..
In [164]:
jovian.commit(files = ['matches.csv'])
[jovian] Attempting to save notebook.. [jovian] Updating notebook "srijansrj5901/ipl-matches-data-analysis" on https://jovian.ml/ [jovian] Uploading notebook.. [jovian] Capturing environment.. [jovian] Uploading additional files... [jovian] Committed successfully! https://jovian.ml/srijansrj5901/ipl-matches-data-analysis
In [ ]: