Jovian
⭐️
Sign In

Linear Regression using sklearn

In [109]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
In [73]:
n = np.random.randint(-100,4,100).reshape(-1,1) # some random numeber to add as noise
X = np.arange(100) # for X axis points
X = X.reshape(-1,1) # reshape as row,1 col
Y = 5 + 3 * X + n
Y = Y.reshape(-1,1) # reshape as row,1 col

Scatter plot of all points

In [74]:
plt.figure(figsize=(10,5))
plt.scatter(X,Y,color="k",label="Original data")
plt.legend()
plt.title("Linear Regression Example..")
plt.xlabel("X-Independent Variable")
plt.ylabel("Y-Dependent Variable")
plt.show()
Notebook Image
In [59]:
lr = LinearRegression(normalize=True)
In [60]:
lr.fit(X,Y) # Get the best fit from model
Out[60]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)
In [61]:
# lr.predict(X)
In [62]:
plt.figure(figsize=(10,5))
plt.scatter(X,Y,color="k",label="Original data")
plt.plot(X,lr.predict(X),"-r",label="predicted line")
plt.legend()
plt.title("Linear Regression Example..")
plt.xlabel("X-Independent Variable")
plt.ylabel("Y-Dependent Variable")
plt.show()
Notebook Image
In [252]:
t = np.linspace(-4,5,100).reshape(-1,1)
n = np.random.randint(-2,4,100).reshape(-1,1) # some random numeber to add as noise
X = np.arange(100) # for X axis points
X = X.reshape(-1,1) # reshape as row,1 col
Y = (t**2+(1/np.exp(t**-2))) +n
Y = Y.reshape(-1,1) # reshape as row,1 col
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:5: RuntimeWarning: divide by zero encountered in power """
In [254]:
plt.figure(figsize=(10,5))
plt.scatter(X,Y)
plt.title("Original")
plt.show()
Notebook Image
In [333]:
poly = PolynomialFeatures(degree=2)
lin2 = LinearRegression()
In [334]:
X_poly = poly.fit_transform(X)
lin2.fit(X_poly,Y)
Out[334]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
In [335]:
plt.figure(figsize=(10,4))
plt.scatter(X, Y, color = 'blue',label="Original_data") 
plt.plot(X, lin2.predict(X_poly), color = 'red',label="Predicted_data") 
plt.legend()
plt.title('Polynomial Regression') 
plt.show()
Notebook Image

How to choose best degree for polynomial fit

In [337]:
from sklearn.metrics import mean_squared_error
In [360]:
mse = []
for deg in range(2,10):
    poly = PolynomialFeatures(degree=deg)
    lin2 = LinearRegression()

    X_poly = poly.fit_transform(X)
    lin2.fit(X_poly,Y)
    y_pred = lin2.predict(X_poly)
    mse.append(mean_squared_error(Y,y_pred))
print(mse)
[3.1871429290451814, 3.157346008825358, 3.046307634146208, 3.045670304859617, 3.0449822689540365, 3.041038370570643, 3.0509898523023598, 3.182745351888841]
In [361]:
plt.plot(range(2,10),mse,"-or")
plt.xlabel("Degree value")
plt.ylabel("MSE value")
plt.title("Selecting best degree value")
plt.show()
Notebook Image

Above graph shows, 4 is the right deg value, for which we see minimal MSE (mean squared error)

In [377]:
poly = PolynomialFeatures(degree=4)
lin2 = LinearRegression()

X_poly = poly.fit_transform(X)
lin2.fit(X_poly,Y)
y_pred = lin2.predict(X_poly)
mse = mean_squared_error(Y,y_pred)
print(mse)
plt.figure(figsize=(10,4))
plt.scatter(X, Y, color = 'blue',label="Original_data") 
plt.plot(X, lin2.predict(X_poly), color = 'red',label="Predicted_data") 
plt.legend()
plt.title('Polynomial Regression') 
plt.show()
3.046307634146208
Notebook Image
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: