Jovian
⭐️
Sign In

Importing Libraries

In [ ]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

Importing Dataset

In [ ]:
dataset = pd.read_csv('/content/Salary_Data.csv')
dataset.head()
Out[]:
In [ ]:
dataset.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 30 entries, 0 to 29 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 YearsExperience 30 non-null float64 1 Salary 30 non-null float64 dtypes: float64(2) memory usage: 608.0 bytes
In [ ]:
dataset.describe()
Out[]:

Plotting of X and Y

In [ ]:
plt.style.use("seaborn-darkgrid")
plt.scatter(dataset.YearsExperience,dataset.Salary)
plt.xlabel("Years of experience")
plt.ylabel("Salary")

Out[]:
Text(0, 0.5, 'Salary')
Notebook Image

Separating the independent variable and dependent variable

X=YearsExperience

Y=Salary

In [ ]:
X=dataset.iloc[:,:-1].values
Y=dataset.iloc[:,1].values

Linear Regression Class

In [ ]:
class Linear_Regression():
  def __init__(self,learning_rate,iterations=1000):
    self.learning_rate = learning_rate
    self.iterations = iterations
    self.theta = None
    self.bias = 0
  def predict(self,X):
    Y_pred= np.dot(X,self.theta) + self.bias
    return Y_pred
  def fit(self,X,Y):
    self.m , self.n = X.shape
    # self.m = no. of data in dataset
    # self.n = no. of feature in dataset
    # self.theta = parameters initializing with zeros
    self.theta = np.zeros(self.n)
    self.X = X
    self.Y = Y
    for _ in range(self.iterations):
      self.gradient_descent()
    return self
  # gradient descent algorithm 
  def gradient_descent(self):
    # Y_pred = predicted value
    Y_pred = self.predict(self.X)
    dtheta = (1/self.m) * np.dot(self.X.T,(Y_pred - self.Y))
    dbias = (1/self.m) * np.sum(Y_pred - self.Y)
            
            #updating values using gradient 
    self.theta = self.theta - (self.learning_rate)*dtheta
    self.bias = self.bias - (self.learning_rate)*dbias
    return self







Splitting the dataset and Fitting the model

In [ ]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=0)
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)
model = Linear_Regression(0.02,2000)
model.fit(x_train,y_train)
y_pred=model.predict(x_test)
(21, 1) (9, 1) (21,) (9,)

Plotting of predicted line

In [ ]:
plt.scatter(x_test,y_test)
plt.plot(x_test,y_pred,color='red')
plt.xlabel("Year of experience")
plt.ylabel("Salary")
plt.title("Linear Regression from scratch")
Out[]:
Text(0.5, 1.0, 'Linear Regression from scratch')
Notebook Image

Implementation using sklearn Library

In [ ]:
from sklearn.linear_model import LinearRegression
sklearn_model = LinearRegression()
sklearn_model.fit(x_train,y_train)
y_pred_sklearn_model = sklearn_model.predict(x_test)
plt.scatter(x_test,y_test)
plt.plot(x_test,y_pred_sklearn_model,color='blue')
plt.xlabel("Year of experience")
plt.ylabel("Salary")
plt.title("Linear Regression using sklearn")


Out[]:
Text(0.5, 1.0, 'Linear Regression using sklearn')
Notebook Image