Learn practical skills, build real-world projects, and advance your career
Updated 4 years ago
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data=pd.read_csv("datasets/pimaindians.csv")
data=data.values
X=data[:,0:7]
y=data[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
lr=LogisticRegression()
lr.fit(X_train,y_train)
p=lr.predict(X_test)
print(classification_report(y_test,p))
print(metrics.mean_squared_error(y_test,p))
print(np.sqrt(metrics.mean_squared_error(y_test,p)))
plt.scatter(y_test,p)
precision recall f1-score support
0.0 0.74 0.82 0.78 95
1.0 0.65 0.54 0.59 59
accuracy 0.71 154
macro avg 0.70 0.68 0.69 154
weighted avg 0.71 0.71 0.71 154
0.2857142857142857
0.5345224838248488
c:\users\inker_fseai_sys1\appdata\local\programs\python\python36\lib\site-packages\sklearn\linear_model\_logistic.py:939: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
<matplotlib.collections.PathCollection at 0x14c12f08438>
import pandas as pd
# Loading our dataset
data = pd.read_csv('datasets/diabetes.csv',header = 0)
data.head()
# Selected features for training our model
features = ['Pregnancies','Insulin','BMI','Glucose','BloodPressure','DiabetesPedigreeFunction'] # Selected features for training our model
#print(features)
X = data[features] # Train data
print(X)
Y = data.Outcome # Test data
Pregnancies Insulin BMI Glucose BloodPressure \
0 6 0 33.6 148 72
1 1 0 26.6 85 66
2 8 0 23.3 183 64
3 1 94 28.1 89 66
4 0 168 43.1 137 40
.. ... ... ... ... ...
763 10 180 32.9 101 76
764 2 0 36.8 122 70
765 5 112 26.2 121 72
766 1 0 30.1 126 60
767 1 0 30.4 93 70
DiabetesPedigreeFunction
0 0.627
1 0.351
2 0.672
3 0.167
4 2.288
.. ...
763 0.171
764 0.340
765 0.245
766 0.349
767 0.315
[768 rows x 6 columns]
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y ,train_size = 0.75 , random_state = 0)
# Importing logistic regression module
from sklearn.linear_model import LogisticRegression
# Defining a regression classifier object using Logistic Regression() function.
model = LogisticRegression()
# Fitting the model on the training data using fit() function
model.fit(X_train , Y_train)
# Predicting the Y_test using predict() function.
Y_pred = model.predict(X_test)
print(Y_pred)
from sklearn import metrics
print("---------------------------Classification Report---------------------------\n")
print(metrics.classification_report(Y_test,Y_pred))
print('---------------------------Mean Squared Error---------------------------\n')
print(metrics.mean_squared_error(Y_test,Y_pred))
print('---------------------------Root Mean Squared Error---------------------------\n')
print(np.sqrt(metrics.mean_squared_error(Y_test,Y_pred)))
print("\nAccuracy of Logistic Regression model is:",metrics.accuracy_score(Y_test, Y_pred)*100)