Jovian
⭐️
Sign In
In [2]:
!pip install jovian -q --upgrade
In [3]:
import jovian
In [26]:
import pandas as pd
import numpy as np
In [27]:
df=pd.read_csv('./zomato.csv')
df.head(2)

Out[27]:
In [49]:
print(len(df))
51717
In [50]:
#df1 = pd.DataFrame(df) 
#df1.drop(['url'], axis = 1)
#print(len(df1))
df1 = df.drop(['url','address','name','phone','location','dish_liked','menu_item','listed_in(type)','reviews_list'], axis = 1)
df1.head(5)

Out[50]:
In [51]:
print(len(df1))
51717
In [53]:

df1['rate'].unique()
df1 = df1.loc[df1.rate !='NEW']
df1 = df1.loc[df1.rate !='-'].reset_index(drop=True)
remove_slash = lambda x: x.replace('/5', '') if type(x) == np.str else x
df1.rate = df1.rate.apply(remove_slash).str.strip().astype('float')
df1['rate'].head()

#print(len(df1))
Out[53]:
0    4.1
1    4.1
2    3.8
3    3.7
4    3.8
Name: rate, dtype: float64
In [54]:
print(len(df1))
49440
In [55]:
# Optimising the dropping of null values
ctr=1
l=[]
for i in df1:
    if ctr==1:
        l.append(i)
    else:
        break
print(l)
new_data = df1.dropna(axis = 0, how ='any')
print("Old data frame length:", len(df1), "\nNew data frame length:",  
       len(new_data), "\nNumber of rows with at least 1 NA value: ", 
       (len(df1)-len(new_data))) 

#df1.head()
['online_order', 'book_table', 'rate', 'votes', 'rest_type', 'cuisines', 'approx_cost(for two people)', 'listed_in(city)'] Old data frame length: 49440 New data frame length: 41263 Number of rows with at least 1 NA value: 8177
In [56]:
mean_rate = df1['rate'].mean(skipna = True)
round(mean_rate , 2)

df1['rate'].fillna(value = mean_rate, inplace = True)
df1.head(57000)
#print(len(df1))
Out[56]:
In [57]:
print(len(df1))
49440

#Before Optimising the drop of null value ctr=1 l=[] for i in df: if ctr==1: l.append(i) else: break print(l) new_data = df.dropna(axis = 0, how ='any') print("Old data frame length:", len(df), "\nNew data frame length:",
len(new_data), "\nNumber of rows with at least 1 NA value: ", (len(df)-len(new_data)))

ctr=1 l=[] for i in df: if ctr==1: l.append(i) else: break print(l) for i in l: x=list(df[i].isnull()) print('----------------------------------') print(i) print("Missing Value="+str(x.count(True))) print('----------------------------------')

In [58]:
null_columns=df1.columns[df1.isnull().any()]
df1[null_columns].isnull().sum()
Out[58]:
rest_type                      225
cuisines                        45
approx_cost(for two people)    341
dtype: int64
In [59]:
df2 = df1.dropna(axis = 0, how = 'any')

# comparing sizes of data frames 
print("Old data frame length:", len(df1), "\nNew data frame length:",  
       len(df2), "\nNumber of rows with at least 1 NA value: ", 
       (len(df1)-len(df2))) 
Old data frame length: 49440 New data frame length: 48878 Number of rows with at least 1 NA value: 562
In [ ]:
jovian.commit()
[jovian] Saving notebook..
In [ ]: