Learn practical skills, build real-world projects, and advance your career
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import missingno as msno
data = pd.read_csv("blackFriday_train.csv")
data.head()
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 550068 entries, 0 to 550067 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 User_ID 550068 non-null int64 1 Product_ID 550068 non-null object 2 Gender 550068 non-null object 3 Age 550068 non-null object 4 Occupation 550068 non-null int64 5 City_Category 550068 non-null object 6 Stay_In_Current_City_Years 550068 non-null object 7 Marital_Status 550068 non-null int64 8 Product_Category_1 550068 non-null int64 9 Product_Category_2 376430 non-null float64 10 Product_Category_3 166821 non-null float64 11 Purchase 550068 non-null int64 dtypes: float64(2), int64(5), object(5) memory usage: 50.4+ MB
sns.distplot(data['Purchase'], fit=stats.norm)
<matplotlib.axes._subplots.AxesSubplot at 0x17a8c5eb2e0>
Notebook Image
def unique_category(df, features=[]):
    for i in features:
        print('feature :',i)
        print('Count of unique categories :\n',data[i].value_counts())
        print('unique categories :',data[i].unique())
        print('toal number of unique cat :', len(data[i].unique()))
        print("_"*50)