Learn practical skills, build real-world projects, and advance your career
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use(['seaborn-darkgrid'])
import seaborn as sns
import warnings
from collections import Counter

warnings.filterwarnings('ignore')
sns.set()
prev_data=pd.read_csv("previous_application.csv")
pd.set_option('display.max_columns', None)
prev_data.head()
prev_data.shape
(1670214, 37)
#Calculating the percentage of missing values in each column and storing it in a dataset df_null
pd.set_option('display.max_rows', None)
prev_data_null=prev_data.isnull().sum()/len(prev_data)*100
prev_data_null[prev_data_null.values>0].sort_values(ascending=False)
RATE_INTEREST_PRIMARY        99.643698
RATE_INTEREST_PRIVILEGED     99.643698
AMT_DOWN_PAYMENT             53.636480
RATE_DOWN_PAYMENT            53.636480
NAME_TYPE_SUITE              49.119754
DAYS_FIRST_DRAWING           40.298129
DAYS_FIRST_DUE               40.298129
DAYS_LAST_DUE_1ST_VERSION    40.298129
DAYS_LAST_DUE                40.298129
DAYS_TERMINATION             40.298129
NFLAG_INSURED_ON_APPROVAL    40.298129
AMT_GOODS_PRICE              23.081773
AMT_ANNUITY                  22.286665
CNT_PAYMENT                  22.286366
PRODUCT_COMBINATION           0.020716
AMT_CREDIT                    0.000060
dtype: float64