Learn practical skills, build real-world projects, and advance your career
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
plt.style.use(style='ggplot')
plt.rcParams['figure.figsize'] = (10, 6)
application = pd.read_csv("C:\\Users\\Anshu\\Desktop\\DATA SCIENCE\\Case Study\\application_data.csv")
application.head()
application.shape
(307511, 122)
application.describe()
target_col = application['TARGET'].value_counts()
x = target_col.index.tolist()
y = target_col.values
total_observations = y.sum()

'''Matplotlib code starts here'''
fig = plt.figure()
ax = fig.add_subplot(111)
#or use this: fig, ax = plt.subplots()
bar_plot = ax.bar(x, y, width=0.5, color='gr')
ax.set_xticks(x)
ax.set_xlabel('Target variable values')
ax.set_xticklabels(['Repay Loan','Default Loan'], rotation=0, fontsize=15)

ax.set_ylim(ymin=0, ymax=300000)
ax.set_ylabel('Count of target variable')
ax.set_yticks(np.arange(0, 325000, 25000))

#The commented code below will convert y axis into percentage
# formatter = FuncFormatter(lambda y, pos: "%d%%" % (y))
# ax.yaxis.set_major_formatter(formatter)

for rect in bar_plot:
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width()/2., 0.99*height,
            '%.2f' % ((height/total_observations)*100) + "%", ha='center', va='bottom', fontsize=15)

plt.title('Distribution of target variable')
plt.show()
<ipython-input-4-ffac02ff3a5b>:10: MatplotlibDeprecationWarning: Using a string of single character colors as a color sequence is deprecated since 3.2 and will be removed two minor releases later. Use an explicit list instead. bar_plot = ax.bar(x, y, width=0.5, color='gr')
Notebook Image