Jovian
⭐️
Sign In
In [1]:
# importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as t_t_s
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_curve, roc_auc_score
from matplotlib import pyplot
In [2]:
#Reading data
data = pd.read_csv("https://github.com/mpourhoma/CS4661/raw/master/Heart_short.csv")
In [3]:
data.head()
Out[3]:
In [4]:
#feature columns
feature_columns =['Age','RestBP', 'Chol', 'RestECG', 'MaxHR','Oldpeak']
In [5]:
cols = data[feature_columns]
lable = data['AHD']
In [6]:
# Normalizing the feature (scale)
feature_arr = np.array(cols)
normalize_feature = preprocessing.normalize(feature_arr)
In [7]:
# Split data test and train
x_train, x_test, y_train, y_test = t_t_s(cols, lable, test_size = 0.25, random_state = 3)
In [8]:
# creating instance of logistic regression
logmodel = LogisticRegression(solver='lbfgs')
#fit the model using training data
logmodel.fit(x_train,y_train)
predictions = logmodel.predict(x_test)

Testing data and creating a report

In [9]:
print(classification_report(y_test, predictions))
precision recall f1-score support No 0.73 0.80 0.77 41 Yes 0.69 0.60 0.64 30 accuracy 0.72 71 macro avg 0.71 0.70 0.71 71 weighted avg 0.72 0.72 0.71 71

The accuracy of LogisticRegression is 0.72 and the weighted avg accuracy is 0.71

Predicting Probability using LogisticRegression

In [10]:
# no skill predictions
ns_probs = [0 for _ in range(len(y_test))]
# predict probabilities
probs = logmodel.predict_proba(x_test)
# we keep only probabilities for positive outcome
probs = probs[:, 1]

Caluclating scores of the probabilities

In [11]:
# calculate scores
ns_auc = roc_auc_score(y_test, ns_probs)
prob_auc = roc_auc_score(y_test, probs)
#summarize scores
print('No Skill: ROC AUC = %.3f' % (ns_auc))
print('Logictic Probability: ROC AUC = %.3f' % (prob_auc))
No Skill: ROC AUC = 0.500 Logictic Probability: ROC AUC = 0.772

Calculate ROC curves

In [14]:
# Calculating ROC curves
ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_probs, pos_label = "Yes")
probs_fpr, probs_tpr, _ = roc_curve(y_test, probs, pos_label = "Yes")
print(probs_fpr)
print(probs_tpr)
[0. 0. 0.02439024 0.02439024 0.07317073 0.07317073 0.09756098 0.09756098 0.12195122 0.12195122 0.17073171 0.17073171 0.2195122 0.2195122 0.24390244 0.24390244 0.29268293 0.29268293 0.41463415 0.41463415 0.48780488 0.48780488 0.53658537 0.53658537 0.68292683 0.68292683 0.73170732 0.73170732 0.82926829 0.82926829 1. ] [0. 0.03333333 0.03333333 0.36666667 0.36666667 0.4 0.4 0.43333333 0.43333333 0.56666667 0.56666667 0.6 0.6 0.63333333 0.63333333 0.7 0.7 0.73333333 0.73333333 0.8 0.8 0.83333333 0.83333333 0.86666667 0.86666667 0.93333333 0.93333333 0.96666667 0.96666667 1. 1. ]

Plotting the data

In [15]:
pyplot.figure()
# Plot the ROC curve
pyplot.plot(ns_fpr, ns_tpr, color='blue',lw = 2, linestyle='--', label = 'No Skill')
pyplot.plot(probs_fpr, probs_tpr, color ='red', lw = 2, marker = '.', label = 'LogisticRegression')
# Axis labels
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
# show legend
pyplot.legend(loc='lower right')
# show the plot
pyplot.show()
Notebook Image