Jovian
⭐️
Sign In
In [1]:
import pandas as pd
import numpy as np

ipl_match = pd.read_excel("C:\\Users\\KARNDEEP SINGH\\Downloads\\IPL Data\\IPL Data\\Match.xlsx")
In [2]:

ipl_ball_by_ball = pd.read_excel("C:\\Users\\KARNDEEP SINGH\\Downloads\\IPL Data\\IPL Data\\Ball_by_Ball.xlsx")
In [3]:
ipl_player = pd.read_excel("C:\\Users\\KARNDEEP SINGH\\Downloads\\IPL Data\\IPL Data\\Player.xlsx")
In [4]:
ipl_player_match = pd.read_excel("C:\\Users\\KARNDEEP SINGH\\Downloads\\IPL Data\\IPL Data\\Player_Match.xlsx")
In [5]:
ipl_season = pd.read_excel("C:\\Users\\KARNDEEP SINGH\\Downloads\\IPL Data\\IPL Data\\Season.xlsx")
In [6]:
ipl_team = pd.read_excel("C:\\Users\\KARNDEEP SINGH\\Downloads\\IPL Data\\IPL Data\\Team.xlsx")
In [7]:
ipl_match.head()
Out[7]:
In [8]:
ipl_match.rename({"Team_Name_Id":"Team_Id"}, inplace =False, axis= 1)
Out[8]:
In [9]:
ipl_match.head()
Out[9]:
In [10]:
match = ipl_match.rename({"Team_Name_Id":"Team_Id"}, axis = 1)
match.head()
Out[10]:
In [11]:
ipl_team.head()
Out[11]:
In [12]:
combine_team_match_2 = pd.merge(left = match, right = ipl_team, on = "Team_Id")
combine_team_match_2

Out[12]:
In [13]:
#combine_team_match_2.pivot_table(index = ["Team_Id", "Team_Name"], values = "Match_Winner_Id", aggfunc = np.size).sort_values(by="Match_Winner_Id", ascending =False)
In [14]:
combine_team_match = pd.merge(left = ipl_match, right = ipl_team, left_on = "Match_Winner_Id", right_on = "Team_Id")

In [15]:

match_winner = combine_team_match.pivot_table(index = "Team_Id", values = "Match_Winner_Id", aggfunc = [np.size])
match_winner
Out[15]:
In [16]:
#match_winner.sort_values(by = "Match_Winner_Id", ascending =False)
In [17]:
#percentage of win by each team
merging_opponent = pd.merge(left = ipl_match, right = ipl_team, left_on = "Opponent_Team_Id", right_on = "Team_Id")
x = merging_opponent.pivot_table(index = "Team_Id", values = "Opponent_Team_Id", aggfunc = [np.size])
In [18]:
merging_team_id = pd.merge(left = ipl_match, right = ipl_team, left_on = "Team_Name_Id", right_on = "Team_Id")
y = merging_team_id.pivot_table(index = "Team_Id", values = "Team_Id", aggfunc = [np.size])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-18-850343098429> in <module> 1 merging_team_id = pd.merge(left = ipl_match, right = ipl_team, left_on = "Team_Name_Id", right_on = "Team_Id") ----> 2 y = merging_team_id.pivot_table(index = "Team_Id", values = "Team_Id", aggfunc = [np.size]) ~\Anaconda3\lib\site-packages\pandas\core\frame.py in pivot_table(self, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name) 5301 aggfunc=aggfunc, fill_value=fill_value, 5302 margins=margins, dropna=dropna, -> 5303 margins_name=margins_name) 5304 5305 def stack(self, level=-1, dropna=True): ~\Anaconda3\lib\site-packages\pandas\core\reshape\pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name) 39 columns=columns, 40 fill_value=fill_value, aggfunc=func, ---> 41 margins=margins, margins_name=margins_name) 42 pieces.append(table) 43 keys.append(getattr(func, '__name__', func)) ~\Anaconda3\lib\site-packages\pandas\core\reshape\pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name) 84 # group by the cartesian product of the grouper 85 # if we have a categorical ---> 86 grouped = data.groupby(keys, observed=False) 87 agged = grouped.agg(aggfunc) 88 if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): ~\Anaconda3\lib\site-packages\pandas\core\generic.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, **kwargs) 6663 return groupby(self, by=by, axis=axis, level=level, as_index=as_index, 6664 sort=sort, group_keys=group_keys, squeeze=squeeze, -> 6665 observed=observed, **kwargs) 6666 6667 def asfreq(self, freq, method=None, how=None, normalize=False, ~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in groupby(obj, by, **kwds) 2150 raise TypeError('invalid type: %s' % type(obj)) 2151 -> 2152 return klass(obj, by, **kwds) 2153 2154 ~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, **kwargs) 597 sort=sort, 598 observed=observed, --> 599 mutated=self.mutated) 600 601 self.obj = obj ~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _get_grouper(obj, key, axis, level, sort, observed, mutated, validate) 3313 observed=observed, 3314 in_axis=in_axis) \ -> 3315 if not isinstance(gpr, Grouping) else gpr 3316 3317 groupings.append(ping) ~\Anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in __init__(self, index, grouper, obj, name, level, sort, observed, in_axis) 3023 if getattr(self.grouper, 'ndim', 1) != 1: 3024 t = self.name or str(type(self.grouper)) -> 3025 raise ValueError("Grouper for '%s' not 1-dimensional" % t) 3026 self.grouper = self.index.map(self.grouper) 3027 if not (hasattr(self.grouper, "__len__") and ValueError: Grouper for 'Team_Id' not 1-dimensional
In [19]:
ipl_match
Out[19]:
In [20]:
x = combine_team_match["Team_Name_Id"].value_counts().sort_index()
y = combine_team_match["Opponent_Team_Id"].value_counts().sort_index()
z = combine_team_match["Match_Winner_Id"].value_counts().sort_index()
In [21]:
x
Out[21]:
1     62
2     76
3     74
4     65
5     52
6     68
7     62
8     39
9      7
10    23
11    30
12     7
13     9
Name: Team_Name_Id, dtype: int64
In [22]:
y
Out[22]:
1     70
2     61
3     57
4     69
5     65
6     63
7     78
8     36
9      7
10    22
11    32
12     7
13     7
Name: Opponent_Team_Id, dtype: int64
In [23]:
z.index = z.index.astype(int)
z
Out[23]:
1     68
2     70
3     79
4     63
5     63
6     56
7     80
8     29
9      6
10    12
11    34
12     5
13     9
Name: Match_Winner_Id, dtype: int64
In [24]:
total_match = x + y
In [25]:
total_match
Out[25]:
1     132
2     137
3     131
4     134
5     117
6     131
7     140
8      75
9      14
10     45
11     62
12     14
13     16
dtype: int64
In [26]:
ipl_team.set_index("Team_Id", inplace =True)
In [27]:
ipl_team["Home_Played"] = x
ipl_team["Away_Played"] = y
ipl_team["Total_Played"] = total_match
ipl_team["Number_Win"] = z
ipl_team["Percentage_Win"] = (ipl_team["Number_Win"]/ ipl_team["Total_Played"]) * 100

#ipl_team.drop("total_played", axis = 1, inplace= True)
ipl_team

Out[27]:
In [28]:
ipl_ball_by_ball
Out[28]:
In [29]:
#top 10 batsman :
#a.based on run scored
combine_player_bbyb= pd.merge(left = ipl_ball_by_ball, right = ipl_player, left_on = "Striker_Id", right_on = "Player_Id")
combine_player_bbyb.pivot_table(index = ["Striker_Id","Player_Name"], values = "Batsman_Scored", aggfunc = [np.sum])["sum"].sort_values(by ="Batsman_Scored", ascending=False).head(10)
Out[29]:
In [30]:
#b based on number of 6s
combine_player_bbyb[combine_player_bbyb["Batsman_Scored"]== 6 ].pivot_table(index = ["Striker_Id", "Player_Name"],values = "Batsman_Scored", aggfunc = [np.size])["size"].sort_values("Batsman_Scored", ascending =False).head(10)
Out[30]:
In [31]:
#b based on number of 4s
combine_player_bbyb[combine_player_bbyb["Batsman_Scored"]== 4 ].pivot_table(index = ["Striker_Id", "Player_Name"],values = "Batsman_Scored", aggfunc = [np.size])["size"].sort_values("Batsman_Scored", ascending =False).head(10)
Out[31]:
In [32]:
#b based on number of 4s
combine_player_bbyb[combine_player_bbyb["Batsman_Scored"]== 4 ].pivot_table(index = ["Striker_Id", "Player_Name"],values = "Batsman_Scored", aggfunc = [np.size])["size"].sort_values("Batsman_Scored", ascending =False).head(10)
Out[32]:

TOP 10 TEAMS :

  • Based on Number of Matches Won:
In [33]:
ipl_teamI
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-33-d4450fc1053c> in <module> ----> 1 ipl_teamI NameError: name 'ipl_teamI' is not defined
In [ ]:
ipl_team.sort_values(by = "Number_Win", ascending = False)
  • Based on Toss win and winning the matches
In [ ]:
m = ipl_match["Toss_Winner_Id"].value_counts().sort_index()
In [34]:
ipl_team["Toss_Win"] = m
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-34-68634a79f81a> in <module> ----> 1 ipl_team["Toss_Win"] = m NameError: name 'm' is not defined
In [35]:
ipl_team
Out[35]:
In [36]:
ipl_team.sort_values(by=["Toss_Win", "Number_Win"], ascending = [False, False])
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-36-75a188b07b04> in <module> ----> 1 ipl_team.sort_values(by=["Toss_Win", "Number_Win"], ascending = [False, False]) ~\Anaconda3\lib\site-packages\pandas\core\frame.py in sort_values(self, by, axis, ascending, inplace, kind, na_position) 4409 for x in by: 4410 k = self._get_label_or_level_values(x, axis=axis, -> 4411 stacklevel=stacklevel) 4412 keys.append(k) 4413 indexer = lexsort_indexer(keys, orders=ascending, ~\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_label_or_level_values(self, key, axis, stacklevel) 1380 values = self.axes[axis].get_level_values(key)._values 1381 else: -> 1382 raise KeyError(key) 1383 1384 # Check for duplicates KeyError: 'Toss_Win'
  • Based on Team Winning Highest Number of Man of Matches
In [37]:
mom = ipl_match["Man_Of_The_Match_Id"].value_counts()
mom
Out[37]:
162.0    17
31.0     16
110.0    15
187.0    14
57.0     13
21.0     13
85.0     12
20.0     12
40.0     12
19.0     12
8.0      11
41.0     11
32.0     10
9.0      10
147.0    10
136.0     9
100.0     9
133.0     8
221.0     8
35.0      7
53.0      7
73.0      6
185.0     6
334.0     6
232.0     6
50.0      6
104.0     6
208.0     6
254.0     5
64.0      5
         ..
364.0     1
188.0     1
210.0     1
83.0      1
125.0     1
65.0      1
192.0     1
42.0      1
304.0     1
324.0     1
385.0     1
214.0     1
120.0     1
336.0     1
260.0     1
175.0     1
296.0     1
216.0     1
15.0      1
460.0     1
308.0     1
408.0     1
420.0     1
273.0     1
335.0     1
393.0     1
355.0     1
149.0     1
307.0     1
10.0      1
Name: Man_Of_The_Match_Id, Length: 187, dtype: int64
In [38]:
ipl_player_match
Out[38]:
In [39]:
combine_match_player_match = pd.merge(left = ipl_match, right = ipl_player_match, left_on = ["Man_Of_The_Match_Id","Match_Id"], right_on = ["Player_Id","Match_Id"])
In [40]:
combine_match_player_match
Out[40]:
In [41]:
c = combine_match_player_match.pivot_table(index = ["Team_Id"], values = ["Man_Of_The_Match_Id"], aggfunc = [np.size])
In [42]:
ipl_player_match
Out[42]:
In [43]:
ipl_team["total_Man_Of_Match_Of_Team"] = c
In [44]:
ipl_team.sort_values(by = ["total_Man_Of_Match_Of_Team"], ascending =False)
Out[44]:

SEASON WISE:

  • ORANGE CAP HOLDER
In [45]:
ipl_season
Out[45]:
In [46]:
ipl_player
Out[46]:
In [47]:
combine_season_player_1 = pd.merge(left = ipl_season, right = ipl_player, left_on = "Orange_Cap_Id", right_on = "Player_Id")
In [48]:
combine_season_player_1[["Season_Year", "Orange_Cap_Id", "Player_Name"]]
Out[48]:
  • PURPLE CAP HOLDER
In [49]:
combine_season_player_2 = pd.merge(left = ipl_season, right = ipl_player, left_on = "Purple_Cap_Id", right_on = "Player_Id")
combine_season_player_1[["Season_Year", "Purple_Cap_Id", "Player_Name"]]
Out[49]:

FOR EACH SEASON MATCHES WITH BIGGEST WIN MARGIN BASED ON RUNS:

In [50]:
ipl_ball_by_ball.columns
Out[50]:
Index(['Match_Id', 'Season_Id', 'Innings_Id', 'Over_Id', 'Ball_Id',
       'Team_Batting_Id', 'Team_Bowling_Id', 'Striker_Id',
       'Striker_Batting_Position', 'Non_Striker_Id', 'Bowler_Id',
       'Batsman_Scored', 'Extra_Type', 'Extra_Runs', 'Player_dissimal_Id',
       'Dissimal_Type', 'Fielder_Id'],
      dtype='object')
In [51]:
match_wintype = ipl_match[ipl_match["Win_Type"]=="by runs"]
combine_season_match = pd.merge(left = match_wintype, right = ipl_season, left_on = "Season_Id", right_on = "Season_Id")
combine_s_m_team = pd.merge(left = combine_season_match, right = ipl_team, left_on ="Match_Winner_Id", right_on = "Team_Id")
combine_s_m_team

Out[51]:
In [52]:
t = combine_s_m_team.pivot_table(index = ["Season_Id","Season_Year"], values = ["Won_By"], aggfunc = np.max)
t
Out[52]:
In [53]:
f = pd.merge(left = t, right = combine_s_m_team, left_on = ["Season_Id", "Won_By"], right_on = ["Season_Id","Won_By"])
f[["Season_Year","Team_Name","Won_By"]]
Out[53]:

TOTAL NUMBERS OF :

  • 100s
In [ ]:
 
In [54]:
ipl_ball_by_ball.columns
Out[54]:
Index(['Match_Id', 'Season_Id', 'Innings_Id', 'Over_Id', 'Ball_Id',
       'Team_Batting_Id', 'Team_Bowling_Id', 'Striker_Id',
       'Striker_Batting_Position', 'Non_Striker_Id', 'Bowler_Id',
       'Batsman_Scored', 'Extra_Type', 'Extra_Runs', 'Player_dissimal_Id',
       'Dissimal_Type', 'Fielder_Id'],
      dtype='object')
In [55]:
runs_sum = ipl_ball_by_ball.pivot_table(index = ["Match_Id", "Striker_Id"], values = "Batsman_Scored", aggfunc = np.sum)
runs_sum
Out[55]:
In [56]:
runs_sum[runs_sum["Batsman_Scored"]>=100].groupby("Striker_Id").count().sort_values(by="Batsman_Scored",ascending = False).head(10)
Out[56]:
  • 50s
In [57]:
runs_sum[(runs_sum["Batsman_Scored"]>=50) & (runs_sum["Batsman_Scored"]<100)].count()
Out[57]:
Batsman_Scored    815
dtype: int64
  • 6s
In [58]:
runs_sum2 = ipl_ball_by_ball[ipl_ball_by_ball["Batsman_Scored"] == 6]
In [59]:
runs_sum2
Out[59]:
In [60]:
runs_sum3 = runs_sum2.pivot_table(index = ["Match_Id", "Striker_Id"], values = "Batsman_Scored", aggfunc = np.size)
In [61]:
runs_sum3
Out[61]:
In [63]:
ipl_ball_by_ball.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 136590 entries, 0 to 136589 Data columns (total 17 columns): Match_Id 136590 non-null int64 Season_Id 136590 non-null int64 Innings_Id 136590 non-null int64 Over_Id 136590 non-null int64 Ball_Id 136590 non-null int64 Team_Batting_Id 136590 non-null int64 Team_Bowling_Id 136590 non-null int64 Striker_Id 136590 non-null int64 Striker_Batting_Position 136590 non-null int64 Non_Striker_Id 136590 non-null int64 Bowler_Id 136590 non-null int64 Batsman_Scored 136590 non-null int64 Extra_Type 136590 non-null object Extra_Runs 136590 non-null object Player_dissimal_Id 136590 non-null object Dissimal_Type 136590 non-null object Fielder_Id 136590 non-null object dtypes: int64(12), object(5) memory usage: 17.7+ MB
In [66]:
cols = [cols for cols in ipl_ball_by_ball.columns if (cols !="Batsman_Scored") and (cols !="Extra_Runs") ]
In [67]:
cols
Out[67]:
['Match_Id',
 'Season_Id',
 'Innings_Id',
 'Over_Id',
 'Ball_Id',
 'Team_Batting_Id',
 'Team_Bowling_Id',
 'Striker_Id',
 'Striker_Batting_Position',
 'Non_Striker_Id',
 'Bowler_Id',
 'Extra_Type',
 'Player_dissimal_Id',
 'Dissimal_Type',
 'Fielder_Id']
In [68]:
import jovian
In [ ]:
jovian.commit()
[jovian] Saving notebook..
In [ ]: