Jovian
⭐️
Sign In
In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report
from  sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

data=pd.read_csv("datasets/pimaindians.csv")
data=data.values
X=data[:,0:7]
y=data[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
lr=LogisticRegression()
lr.fit(X_train,y_train)
p=lr.predict(X_test)

print(classification_report(y_test,p))
print(metrics.mean_squared_error(y_test,p))
print(np.sqrt(metrics.mean_squared_error(y_test,p)))

plt.scatter(y_test,p)

precision recall f1-score support 0.0 0.74 0.82 0.78 95 1.0 0.65 0.54 0.59 59 accuracy 0.71 154 macro avg 0.70 0.68 0.69 154 weighted avg 0.71 0.71 0.71 154 0.2857142857142857 0.5345224838248488
c:\users\inker_fseai_sys1\appdata\local\programs\python\python36\lib\site-packages\sklearn\linear_model\_logistic.py:939: ConvergenceWarning: lbfgs failed to converge (status=1): STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. Increase the number of iterations (max_iter) or scale the data as shown in: https://scikit-learn.org/stable/modules/preprocessing.html. Please also refer to the documentation for alternative solver options: https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
Out[2]:
<matplotlib.collections.PathCollection at 0x14c12f08438>
Notebook Image
In [8]:
import pandas as pd

# Loading our dataset
data = pd.read_csv('datasets/diabetes.csv',header = 0)
data.head()
Out[8]:
In [9]:
# Selected features for training our model 

features = ['Pregnancies','Insulin','BMI','Glucose','BloodPressure','DiabetesPedigreeFunction'] # Selected features for training our model 
#print(features)
In [10]:
X = data[features] # Train data
print(X)
Y = data.Outcome # Test data
Pregnancies Insulin BMI Glucose BloodPressure \ 0 6 0 33.6 148 72 1 1 0 26.6 85 66 2 8 0 23.3 183 64 3 1 94 28.1 89 66 4 0 168 43.1 137 40 .. ... ... ... ... ... 763 10 180 32.9 101 76 764 2 0 36.8 122 70 765 5 112 26.2 121 72 766 1 0 30.1 126 60 767 1 0 30.4 93 70 DiabetesPedigreeFunction 0 0.627 1 0.351 2 0.672 3 0.167 4 2.288 .. ... 763 0.171 764 0.340 765 0.245 766 0.349 767 0.315 [768 rows x 6 columns]
In [ ]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y ,train_size = 0.75 , random_state = 0)

# Importing logistic regression module
from sklearn.linear_model import LogisticRegression


# Defining a regression classifier object using Logistic Regression() function.
model = LogisticRegression()

# Fitting the model on the training data using fit() function 
model.fit(X_train , Y_train)

# Predicting the Y_test using predict() function. 
Y_pred = model.predict(X_test)
print(Y_pred)

from sklearn import metrics

print("---------------------------Classification Report---------------------------\n")
print(metrics.classification_report(Y_test,Y_pred))

print('---------------------------Mean Squared Error---------------------------\n')
print(metrics.mean_squared_error(Y_test,Y_pred))

print('---------------------------Root Mean Squared Error---------------------------\n')
print(np.sqrt(metrics.mean_squared_error(Y_test,Y_pred)))


print("\nAccuracy of Logistic Regression model is:",metrics.accuracy_score(Y_test, Y_pred)*100)
In [ ]:
confu = metrics.confusion_matrix(Y_test,Y_pred)
print(confu)
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


class_names=[0,1] # name of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
#print(tick_marks)

plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)

# create heatmap
sns.heatmap(pd.DataFrame(confu), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")

plt.tight_layout()
plt.title('Confusion Matrix\n')
plt.ylabel('---------------- Actual Outcomes ----------------')
plt.xlabel('---------------- Predicted Outcomes ----------------\n')

In [ ]:
y_pred_proba = model.predict_proba(X_test)[::,1]

# fpr = False positive rate , tpr = True positive rate 
fpr, tpr, thresholds = metrics.roc_curve(Y_test,y_pred_proba)

# Syntax : sklearn.metrics.auc(x, y) , Where x = x coordinates and y = y coordinates
# auc = area under the curve
AUC = metrics.roc_auc_score(Y_test, y_pred_proba)  

# Plotting the ROC curve 
plt.plot(fpr,tpr,label = "Diabetes data, AUC = "+str(AUC))
plt.legend(loc=4) # loc=4, means the right bottom position
plt.show()

Multinominal Logistic Regression

In [ ]:
# Loading libraries 

from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
In [ ]:
# Load data
iris = datasets.load_iris()
print(iris)
X = iris.data
y = iris.target
print(X[0:5,:])
In [ ]:
# Standarize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X)
print(X_train[0:5,:])
In [ ]:
# Create one-vs-rest logistic regression object
LR = LogisticRegression(random_state=0, multi_class='multinomial', solver='newton-cg')
model = LR.fit(X_train, y)
# Create new observation
new_samples = [[4.7, 3.2, 1.5, 2.2]]
In [ ]:
# Predict class
model.predict(new_samples)

# View predicted probabilities
y_pred_proba = model.predict_proba(new_samples)
y_pred_tar = model.predict(new_samples)
print(y_pred,'\n')
print(y_pred_tar)

Ordinal Logistic regression

In [185]:
import pandas as pd

data = pd.read_csv('datasets/ratings.csv')
data.head()
Out[185]:
In [209]:
import matplotlib.pyplot as plt 
import numpy as np 
import math 
  
x = np.linspace(-10, 10, 100) 
z = 1/(1 + np.exp(-x)) 
  
plt.plot(x, z) 
plt.xlabel("X") 
plt.ylabel("Sigmoid(X)") 
  
plt.show() 
Notebook Image
In [ ]: