from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data=pd.read_csv("datasets/pimaindians.csv")
data=data.values
X=data[:,0:7]
y=data[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
lr=LogisticRegression()
lr.fit(X_train,y_train)
p=lr.predict(X_test)
print(classification_report(y_test,p))
print(metrics.mean_squared_error(y_test,p))
print(np.sqrt(metrics.mean_squared_error(y_test,p)))
plt.scatter(y_test,p)
precision recall f1-score support
0.0 0.74 0.82 0.78 95
1.0 0.65 0.54 0.59 59
accuracy 0.71 154
macro avg 0.70 0.68 0.69 154
weighted avg 0.71 0.71 0.71 154
0.2857142857142857
0.5345224838248488
c:\users\inker_fseai_sys1\appdata\local\programs\python\python36\lib\site-packages\sklearn\linear_model\_logistic.py:939: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
<matplotlib.collections.PathCollection at 0x14c12f08438>
import pandas as pd
# Loading our dataset
data = pd.read_csv('datasets/diabetes.csv',header = 0)
data.head()
# Selected features for training our model
features = ['Pregnancies','Insulin','BMI','Glucose','BloodPressure','DiabetesPedigreeFunction'] # Selected features for training our model
#print(features)
X = data[features] # Train data
print(X)
Y = data.Outcome # Test data
Pregnancies Insulin BMI Glucose BloodPressure \
0 6 0 33.6 148 72
1 1 0 26.6 85 66
2 8 0 23.3 183 64
3 1 94 28.1 89 66
4 0 168 43.1 137 40
.. ... ... ... ... ...
763 10 180 32.9 101 76
764 2 0 36.8 122 70
765 5 112 26.2 121 72
766 1 0 30.1 126 60
767 1 0 30.4 93 70
DiabetesPedigreeFunction
0 0.627
1 0.351
2 0.672
3 0.167
4 2.288
.. ...
763 0.171
764 0.340
765 0.245
766 0.349
767 0.315
[768 rows x 6 columns]
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y ,train_size = 0.75 , random_state = 0)
# Importing logistic regression module
from sklearn.linear_model import LogisticRegression
# Defining a regression classifier object using Logistic Regression() function.
model = LogisticRegression()
# Fitting the model on the training data using fit() function
model.fit(X_train , Y_train)
# Predicting the Y_test using predict() function.
Y_pred = model.predict(X_test)
print(Y_pred)
from sklearn import metrics
print("---------------------------Classification Report---------------------------\n")
print(metrics.classification_report(Y_test,Y_pred))
print('---------------------------Mean Squared Error---------------------------\n')
print(metrics.mean_squared_error(Y_test,Y_pred))
print('---------------------------Root Mean Squared Error---------------------------\n')
print(np.sqrt(metrics.mean_squared_error(Y_test,Y_pred)))
print("\nAccuracy of Logistic Regression model is:",metrics.accuracy_score(Y_test, Y_pred)*100)
confu = metrics.confusion_matrix(Y_test,Y_pred)
print(confu)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
class_names=[0,1] # name of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
#print(tick_marks)
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(confu), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion Matrix\n')
plt.ylabel('---------------- Actual Outcomes ----------------')
plt.xlabel('---------------- Predicted Outcomes ----------------\n')
y_pred_proba = model.predict_proba(X_test)[::,1]
# fpr = False positive rate , tpr = True positive rate
fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_pred_proba)
# Syntax : sklearn.metrics.auc(x, y) , Where x = x coordinates and y = y coordinates
# auc = area under the curve
AUC = metrics.roc_auc_score(Y_test, y_pred_proba)
# Plotting the ROC curve
plt.plot(fpr,tpr,label = "Diabetes data, AUC = "+str(AUC))
plt.legend(loc=4) # loc=4, means the right bottom position
plt.show()
# Loading libraries
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
# Load data
iris = datasets.load_iris()
print(iris)
X = iris.data
y = iris.target
print(X[0:5,:])
# Standarize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X)
print(X_train[0:5,:])
# Create one-vs-rest logistic regression object
LR = LogisticRegression(random_state=0, multi_class='multinomial', solver='newton-cg')
model = LR.fit(X_train, y)
# Create new observation
new_samples = [[4.7, 3.2, 1.5, 2.2]]
# Predict class
model.predict(new_samples)
# View predicted probabilities
y_pred_proba = model.predict_proba(new_samples)
y_pred_tar = model.predict(new_samples)
print(y_pred,'\n')
print(y_pred_tar)
import pandas as pd
data = pd.read_csv('datasets/ratings.csv')
data.head()
import matplotlib.pyplot as plt
import numpy as np
import math
x = np.linspace(-10, 10, 100)
z = 1/(1 + np.exp(-x))
plt.plot(x, z)
plt.xlabel("X")
plt.ylabel("Sigmoid(X)")
plt.show()