Learn practical skills, build real-world projects, and advance your career
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report
from  sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

data=pd.read_csv("datasets/pimaindians.csv")
data=data.values
X=data[:,0:7]
y=data[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
lr=LogisticRegression()
lr.fit(X_train,y_train)
p=lr.predict(X_test)

print(classification_report(y_test,p))
print(metrics.mean_squared_error(y_test,p))
print(np.sqrt(metrics.mean_squared_error(y_test,p)))

plt.scatter(y_test,p)
precision recall f1-score support 0.0 0.74 0.82 0.78 95 1.0 0.65 0.54 0.59 59 accuracy 0.71 154 macro avg 0.70 0.68 0.69 154 weighted avg 0.71 0.71 0.71 154 0.2857142857142857 0.5345224838248488
c:\users\inker_fseai_sys1\appdata\local\programs\python\python36\lib\site-packages\sklearn\linear_model\_logistic.py:939: ConvergenceWarning: lbfgs failed to converge (status=1): STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. Increase the number of iterations (max_iter) or scale the data as shown in: https://scikit-learn.org/stable/modules/preprocessing.html. Please also refer to the documentation for alternative solver options: https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
<matplotlib.collections.PathCollection at 0x14c12f08438>
Notebook Image
import pandas as pd

# Loading our dataset
data = pd.read_csv('datasets/diabetes.csv',header = 0)
data.head()
# Selected features for training our model 

features = ['Pregnancies','Insulin','BMI','Glucose','BloodPressure','DiabetesPedigreeFunction'] # Selected features for training our model 
#print(features)
X = data[features] # Train data
print(X)
Y = data.Outcome # Test data
Pregnancies Insulin BMI Glucose BloodPressure \ 0 6 0 33.6 148 72 1 1 0 26.6 85 66 2 8 0 23.3 183 64 3 1 94 28.1 89 66 4 0 168 43.1 137 40 .. ... ... ... ... ... 763 10 180 32.9 101 76 764 2 0 36.8 122 70 765 5 112 26.2 121 72 766 1 0 30.1 126 60 767 1 0 30.4 93 70 DiabetesPedigreeFunction 0 0.627 1 0.351 2 0.672 3 0.167 4 2.288 .. ... 763 0.171 764 0.340 765 0.245 766 0.349 767 0.315 [768 rows x 6 columns]
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y ,train_size = 0.75 , random_state = 0)

# Importing logistic regression module
from sklearn.linear_model import LogisticRegression


# Defining a regression classifier object using Logistic Regression() function.
model = LogisticRegression()

# Fitting the model on the training data using fit() function 
model.fit(X_train , Y_train)

# Predicting the Y_test using predict() function. 
Y_pred = model.predict(X_test)
print(Y_pred)

from sklearn import metrics

print("---------------------------Classification Report---------------------------\n")
print(metrics.classification_report(Y_test,Y_pred))

print('---------------------------Mean Squared Error---------------------------\n')
print(metrics.mean_squared_error(Y_test,Y_pred))

print('---------------------------Root Mean Squared Error---------------------------\n')
print(np.sqrt(metrics.mean_squared_error(Y_test,Y_pred)))


print("\nAccuracy of Logistic Regression model is:",metrics.accuracy_score(Y_test, Y_pred)*100)