!pip install jovian --upgrade --quiet
!pip install pandas --upgrade
Requirement already up-to-date: pandas in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (1.1.2)
Requirement already satisfied, skipping upgrade: numpy>=1.15.4 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (1.19.2)
Requirement already satisfied, skipping upgrade: python-dateutil>=2.7.3 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2.8.1)
Requirement already satisfied, skipping upgrade: pytz>=2017.2 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from pandas) (2020.1)
Requirement already satisfied, skipping upgrade: six>=1.5 in c:\users\s\anaconda3\envs\courseproject\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)
!pip install matplotlib seaborn --upgrade --quiet
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'
matches_raw_df = pd.read_csv('matches.csv')
matches_raw_df
matches_raw_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 756 entries, 0 to 755
Data columns (total 18 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 756 non-null int64
1 season 756 non-null int64
2 city 749 non-null object
3 date 756 non-null object
4 team1 756 non-null object
5 team2 756 non-null object
6 toss_winner 756 non-null object
7 toss_decision 756 non-null object
8 result 756 non-null object
9 dl_applied 756 non-null int64
10 winner 752 non-null object
11 win_by_runs 756 non-null int64
12 win_by_wickets 756 non-null int64
13 player_of_match 752 non-null object
14 venue 756 non-null object
15 umpire1 754 non-null object
16 umpire2 754 non-null object
17 umpire3 119 non-null object
dtypes: int64(5), object(13)
memory usage: 106.4+ KB
matches_raw_df.result.value_counts()
normal 743
tie 9
no result 4
Name: result, dtype: int64
matches_per_season = matches_raw_df.groupby('season').season.count()
plt.figure(figsize=(12,6))
plt.xticks(rotation=75)
plt.title('Matches Per Season')
match_per_season_plot = sns.barplot(x = matches_per_season.index, y = matches_per_season)
match_per_season_plot.set(xlabel = 'Seasons', ylabel = 'No. of Matches');
toss_decision_percentage = matches_raw_df.groupby('season').toss_decision.value_counts().sort_index() / matches_per_season * 100
toss_decision_percentage
season toss_decision
2008 bat 44.827586
field 55.172414
2009 bat 61.403509
field 38.596491
2010 bat 65.000000
field 35.000000
2011 bat 34.246575
field 65.753425
2012 bat 50.000000
field 50.000000
2013 bat 59.210526
field 40.789474
2014 bat 31.666667
field 68.333333
2015 bat 42.372881
field 57.627119
2016 bat 18.333333
field 81.666667
2017 bat 18.644068
field 81.355932
2018 bat 16.666667
field 83.333333
2019 bat 16.666667
field 83.333333
dtype: float64
toss_decision_percentage.unstack().plot(kind = 'bar', figsize=(12,6), title = 'Toss Decisions', xlabel = 'Seasons', ylabel = 'Percentage');
wins_batting_second = matches_raw_df[(matches_raw_df.win_by_runs == 0) & (matches_raw_df.result == 'normal')].groupby('season').winner.count() / matches_per_season * 100
wins_batting_first = matches_raw_df[(matches_raw_df.win_by_wickets == 0) & (matches_raw_df.result == 'normal')].groupby('season').winner.count() / matches_per_season * 100
combined_wins_df = pd.concat([wins_batting_first, wins_batting_second], axis = 1)
combined_wins_df.columns = ['batting_first', 'batting_second']
combined_wins_df
combined_wins_df.plot(kind = 'bar', figsize=(12,6), title = 'Wins', xlabel = 'Seasons', ylabel = 'Percentage');
total_matches_played = (matches_raw_df.team2.value_counts() + matches_raw_df.team1.value_counts()).sort_values(ascending = False)
total_matches_played
Mumbai Indians 187
Royal Challengers Bangalore 180
Kolkata Knight Riders 178
Kings XI Punjab 176
Chennai Super Kings 164
Delhi Daredevils 161
Rajasthan Royals 147
Sunrisers Hyderabad 108
Deccan Chargers 75
Pune Warriors 46
Gujarat Lions 30
Rising Pune Supergiant 16
Delhi Capitals 16
Rising Pune Supergiants 14
Kochi Tuskers Kerala 14
dtype: int64
plt.figure(figsize=(12,6))
plt.title('Total Matches Played')
total_matches_played_plot = sns.barplot(y = total_matches_played.index, x = total_matches_played)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'No. of Matches');
win_percentage = (matches_raw_df.winner.value_counts() / total_matches_played).sort_values(ascending = False) * 100
win_percentage
Rising Pune Supergiant 62.500000
Delhi Capitals 62.500000
Chennai Super Kings 60.975610
Mumbai Indians 58.288770
Sunrisers Hyderabad 53.703704
Kolkata Knight Riders 51.685393
Rajasthan Royals 51.020408
Royal Challengers Bangalore 46.666667
Kings XI Punjab 46.590909
Gujarat Lions 43.333333
Kochi Tuskers Kerala 42.857143
Delhi Daredevils 41.614907
Deccan Chargers 38.666667
Rising Pune Supergiants 35.714286
Pune Warriors 26.086957
dtype: float64
matches_raw_df.winner.value_counts()
Mumbai Indians 109
Chennai Super Kings 100
Kolkata Knight Riders 92
Royal Challengers Bangalore 84
Kings XI Punjab 82
Rajasthan Royals 75
Delhi Daredevils 67
Sunrisers Hyderabad 58
Deccan Chargers 29
Gujarat Lions 13
Pune Warriors 12
Delhi Capitals 10
Rising Pune Supergiant 10
Kochi Tuskers Kerala 6
Rising Pune Supergiants 5
Name: winner, dtype: int64
plt.figure(figsize=(12,6))
plt.title('Win Percentage')
win_percentage_plot = sns.barplot(y = win_percentage.index, x = win_percentage)
total_matches_played_plot.set(ylabel = 'Teams', xlabel = 'Percentage');
highest_wins_by_runs_df = matches_raw_df[matches_raw_df.win_by_runs != 0].sort_values('win_by_runs', ascending = False)
highest_wins_by_runs_df
plt.figure(figsize=(25, 10))
plt.xlabel('Seasons',size=30)
plt.ylabel('Runs',size=30)
plt.title('Highest Wins By Runs', size = 30)
sns.scatterplot(x = 'season',y = 'win_by_runs', data = highest_wins_by_runs_df, s =150, color = 'black');
sns.scatterplot(x = 'season',y = 'win_by_runs', data = highest_wins_by_runs_df.head(10), s =220, color = 'red');
for i in range(highest_wins_by_runs_df.head(10).shape[0]):
plt.annotate(highest_wins_by_runs_df.winner.tolist()[i], (highest_wins_by_runs_df.season.tolist()[i], highest_wins_by_runs_df.win_by_runs.tolist()[i]), size = 20)
largest_wins_by_wickets = matches_raw_df.sort_values('win_by_wickets', ascending = False).head(10)
largest_wins_by_wickets
most_experienced_umpires = (matches_raw_df.umpire1.value_counts() + matches_raw_df.umpire2.value_counts()).sort_values(ascending = False).head(10)
most_experienced_umpires
S Ravi 106.0
HDPK Dharmasena 87.0
C Shamshuddin 73.0
AK Chaudhary 58.0
SJA Taufel 55.0
M Erasmus 54.0
Nitin Menon 42.0
BR Doctrove 42.0
RE Koertzen 41.0
CK Nandan 41.0
dtype: float64
plt.figure(figsize=(12,6))
plt.title('Most Matches Umpired')
most_experienced_umpires_plot = sns.barplot(y = most_experienced_umpires.index, x = most_experienced_umpires)
most_experienced_umpires_plot.set(ylabel = 'Matches', xlabel = 'Umpires');
import jovian
jovian.commit()
[jovian] Attempting to save notebook..
jovian.commit(files = ['matches.csv'])