Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use(['seaborn-darkgrid'])
import seaborn as sns
import warnings
from collections import Counter
warnings.filterwarnings('ignore')
sns.set()
prev_data=pd.read_csv("previous_application.csv")
pd.set_option('display.max_columns', None)
prev_data.head()
prev_data.shape
(1670214, 37)
#Calculating the percentage of missing values in each column and storing it in a dataset df_null
pd.set_option('display.max_rows', None)
prev_data_null=prev_data.isnull().sum()/len(prev_data)*100
prev_data_null[prev_data_null.values>0].sort_values(ascending=False)
RATE_INTEREST_PRIMARY 99.643698
RATE_INTEREST_PRIVILEGED 99.643698
AMT_DOWN_PAYMENT 53.636480
RATE_DOWN_PAYMENT 53.636480
NAME_TYPE_SUITE 49.119754
DAYS_FIRST_DRAWING 40.298129
DAYS_FIRST_DUE 40.298129
DAYS_LAST_DUE_1ST_VERSION 40.298129
DAYS_LAST_DUE 40.298129
DAYS_TERMINATION 40.298129
NFLAG_INSURED_ON_APPROVAL 40.298129
AMT_GOODS_PRICE 23.081773
AMT_ANNUITY 22.286665
CNT_PAYMENT 22.286366
PRODUCT_COMBINATION 0.020716
AMT_CREDIT 0.000060
dtype: float64