!pip install jovian --upgrade --quiet
!pip install pandas --upgrade
Requirement already up-to-date: pandas in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (1.1.2)
Requirement already satisfied, skipping upgrade: numpy>=1.15.4 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (1.19.2)
Requirement already satisfied, skipping upgrade: python-dateutil>=2.7.3 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2.8.1)
Requirement already satisfied, skipping upgrade: pytz>=2017.2 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2020.1)
Requirement already satisfied, skipping upgrade: six>=1.5 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)
!pip install matplotlib seaborn --upgrade --quiet
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'
matches_raw_df = pd.read_csv('matches.csv')
matches_raw_df
matches_raw_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 756 entries, 0 to 755
Data columns (total 18 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 756 non-null int64
1 season 756 non-null int64
2 city 749 non-null object
3 date 756 non-null object
4 team1 756 non-null object
5 team2 756 non-null object
6 toss_winner 756 non-null object
7 toss_decision 756 non-null object
8 result 756 non-null object
9 dl_applied 756 non-null int64
10 winner 752 non-null object
11 win_by_runs 756 non-null int64
12 win_by_wickets 756 non-null int64
13 player_of_match 752 non-null object
14 venue 756 non-null object
15 umpire1 754 non-null object
16 umpire2 754 non-null object
17 umpire3 119 non-null object
dtypes: int64(5), object(13)
memory usage: 106.4+ KB
matches_per_season = matches_raw_df.groupby('season').season.count()
matches_per_season.index
Int64Index([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
2019],
dtype='int64', name='season')
plt.figure(figsize=(12,6))
plt.xticks(rotation=75)
plt.title('Matches Per Season')
match_per_season_plot = sns.barplot(x = matches_per_season.index, y = matches_per_season)
match_per_season_plot.set(xlabel = 'Seasons', ylabel = 'No. of Matches');
toss_decision_percentage = matches_raw_df.groupby('season').toss_decision.value_counts().sort_index() / matches_per_season * 100
toss_decision_percentage
season toss_decision
2008 bat 44.827586
field 55.172414
2009 bat 61.403509
field 38.596491
2010 bat 65.000000
field 35.000000
2011 bat 34.246575
field 65.753425
2012 bat 50.000000
field 50.000000
2013 bat 59.210526
field 40.789474
2014 bat 31.666667
field 68.333333
2015 bat 42.372881
field 57.627119
2016 bat 18.333333
field 81.666667
2017 bat 18.644068
field 81.355932
2018 bat 16.666667
field 83.333333
2019 bat 16.666667
field 83.333333
dtype: float64
toss_decision_percentage.unstack().plot(kind = 'bar', figsize=(12,6), title = 'Toss Decisions', xlabel = 'Seasons', ylabel = 'Percentage');
matches_raw_df[(matches_raw_df.toss_decision == 'field') & (matches_raw_df.toss_winner == matches_raw_df.winner)].groupby('season').winner.count()
season
2008 19
2009 14
2010 10
2011 27
2012 18
2013 15
2014 24
2015 14
2016 32
2017 28
2018 27
2019 31
Name: winner, dtype: int64
total_matches_played = (matches_raw_df.team2.value_counts() + matches_raw_df.team1.value_counts()).sort_values(ascending = False)
total_matches_played
Mumbai Indians 187
Royal Challengers Bangalore 180
Kolkata Knight Riders 178
Kings XI Punjab 176
Chennai Super Kings 164
Delhi Daredevils 161
Rajasthan Royals 147
Sunrisers Hyderabad 108
Deccan Chargers 75
Pune Warriors 46
Gujarat Lions 30
Rising Pune Supergiant 16
Delhi Capitals 16
Rising Pune Supergiants 14
Kochi Tuskers Kerala 14
dtype: int64
plt.figure(figsize=(12,6))
plt.title('Total Matches Played')
total_matches_played_plot = sns.barplot(y = total_matches_played.index, x = total_matches_played)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'No. of Matches');
win_percentage = (matches_raw_df.winner.value_counts() / total_matches_played).sort_values(ascending = False) * 100
win_percentage
Rising Pune Supergiant 62.500000
Delhi Capitals 62.500000
Chennai Super Kings 60.975610
Mumbai Indians 58.288770
Sunrisers Hyderabad 53.703704
Kolkata Knight Riders 51.685393
Rajasthan Royals 51.020408
Royal Challengers Bangalore 46.666667
Kings XI Punjab 46.590909
Gujarat Lions 43.333333
Kochi Tuskers Kerala 42.857143
Delhi Daredevils 41.614907
Deccan Chargers 38.666667
Rising Pune Supergiants 35.714286
Pune Warriors 26.086957
dtype: float64
plt.figure(figsize=(12,6))
plt.title('Win Percentage')
win_percentage_plot = sns.barplot(y = win_percentage.index, x = win_percentage)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'Percentage');
highest_win_by_runs_df = matches_raw_df.sort_values('win_by_runs', ascending = False).head(5)
highest_win_by_runs_df
plt.figure(figsize=(12,6))
plt.xticks(rotation=90)
plt.title('Highest Wins by Runs')
highest_win_by_runs_plot = sns.barplot(x = highest_win_by_runs_df.id, y = highest_win_by_runs_df.win_by_runs)
matches_raw_df.sort_values('win_by_wickets', ascending = False).head(20)
matches_raw_df.umpire1.value_counts() + matches_raw_df.umpire2.value_counts().max()
HDPK Dharmasena 130
Asad Rauf 108
S Ravi 106
AK Chaudhary 100
Aleem Dar 95
...
Sundaram Ravi 58
A Nanda Kishore 58
Nanda Kishore 58
SL Shastri 58
SJA Taufel 58
Name: umpire1, Length: 61, dtype: int64
matches_raw_df[(matches_raw_df.toss_decision == 'field') & (matches_raw_df.toss_winner == matches_raw_df.winner)].winner.value_counts()
Kolkata Knight Riders 38
Royal Challengers Bangalore 32
Mumbai Indians 31
Kings XI Punjab 29
Rajasthan Royals 27
Chennai Super Kings 27
Delhi Daredevils 24
Sunrisers Hyderabad 15
Gujarat Lions 10
Deccan Chargers 8
Delhi Capitals 6
Rising Pune Supergiant 5
Kochi Tuskers Kerala 4
Rising Pune Supergiants 3
Name: winner, dtype: int64
matches_raw_df[(matches_raw_df.toss_decision == 'bat') & (matches_raw_df.toss_winner != matches_raw_df.winner)].winner.value_counts()
Chennai Super Kings 22
Kolkata Knight Riders 20
Mumbai Indians 20
Rajasthan Royals 19
Delhi Daredevils 18
Royal Challengers Bangalore 17
Kings XI Punjab 15
Sunrisers Hyderabad 13
Pune Warriors 6
Deccan Chargers 3
Rising Pune Supergiants 2
Gujarat Lions 2
Delhi Capitals 1
Name: winner, dtype: int64
matches_raw_df[(matches_raw_df.toss_decision == 'bat') & (matches_raw_df.toss_winner == matches_raw_df.winner)].winner.value_counts()
Chennai Super Kings 30
Mumbai Indians 25
Kolkata Knight Riders 15
Rajasthan Royals 15
Deccan Chargers 11
Delhi Daredevils 11
Royal Challengers Bangalore 9
Sunrisers Hyderabad 8
Kings XI Punjab 6
Pune Warriors 3
Delhi Capitals 1
Name: winner, dtype: int64
matches_raw_df[(matches_raw_df.toss_decision == 'field') & (matches_raw_df.toss_winner != matches_raw_df.winner)].winner.value_counts()
Mumbai Indians 33
Kings XI Punjab 32
Royal Challengers Bangalore 26
Sunrisers Hyderabad 22
Chennai Super Kings 21
Kolkata Knight Riders 19
Delhi Daredevils 14
Rajasthan Royals 14
Deccan Chargers 7
Rising Pune Supergiant 5
Pune Warriors 3
Kochi Tuskers Kerala 2
Delhi Capitals 2
Gujarat Lions 1
Name: winner, dtype: int64
import jovian
jovian.commit()
[jovian] Attempting to save notebook..
[jovian] Updating notebook "srijansrj5901/ipl-matches-data-analysis" on https://jovian.ml/
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/srijansrj5901/ipl-matches-data-analysis
jovian.commit(files = ['matches.csv'])
[jovian] Attempting to save notebook..