Jovian
⭐️
Sign In

Scikit-Learn

In [21]:
import pandas as pd
import numpy as np
import sklearn
In [22]:
np.random.seed(1)

df= pd.DataFrame({
    'x1': np.random.normal(0, 4, 500),
    'x2': np.random.normal(-2, 2, 500),
    'x3': np.random.normal(4, 2, 500)
})
df #dataframe before scaling
Out[22]:
In [23]:
from sklearn import preprocessing 
In [24]:
scaler = preprocessing.StandardScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df, columns=['x1', 'x2', 'x3'])
    #inverse transform is used to scale the data back to orginal form
inv_scaled_df = scaler.inverse_transform(scaled_df)
print(scaled_df) #dataframe after scaling
#print(inv_scaled_df)
#print(df)
x1 x2 x3 0 1.589332 -1.791369 -0.226042 1 -0.672994 0.033836 -2.456129 2 -0.588430 -0.846312 0.420909 3 -1.139616 -0.324437 -0.393153 4 0.821493 -0.290932 -1.554581 .. ... ... ... 495 -0.892409 -0.144487 0.116289 496 0.481019 -2.364563 -0.695052 497 -2.317386 -0.096384 -0.281432 498 -1.174767 0.338718 1.780107 499 -0.071990 -0.216930 -1.427694 [500 rows x 3 columns]
In [25]:
import matplotlib.pyplot as plt
import seaborn as sns
In [26]:
plt.style.use('ggplot')

fig, (ax1, ax2) = plt.subplots(ncols=2)
ax1.set_title('Data before Scaling')
sns.kdeplot(df['x1'], ax=ax1)
sns.kdeplot(df['x2'], ax=ax1)
sns.kdeplot(df['x3'], ax=ax1)
ax2.set_title('Data after StandardScaler')
sns.kdeplot(scaled_df['x1'], ax=ax2)
sns.kdeplot(scaled_df['x2'], ax=ax2)
sns.kdeplot(scaled_df['x3'], ax=ax2)
plt.show()
Notebook Image
In [27]:

df = pd.DataFrame({
    'x1': np.random.randint(-100, 100, 500),
    'x2': np.random.randint(-80, 80, 500),
    'x3': np.random.randint(-150,150, 500),
})
df
Out[27]:
In [28]:
scaler = preprocessing.Normalizer()
norm_df = scaler.fit_transform(df)
norm_df = pd.DataFrame(scaled_df, columns=df.columns)

print('Data frame before Standardization \n{} \n'.format(df))
print('Data frame after Standardization \n{} \n'.format(norm_df))
Data frame before Standardization x1 x2 x3 0 28 -15 34 1 -13 28 92 2 3 -30 -31 3 63 38 -128 4 73 -76 -20 .. .. .. ... 495 95 14 -77 496 94 59 132 497 7 -38 112 498 -26 -58 -83 499 -31 -52 -73 [500 rows x 3 columns] Data frame after Standardization x1 x2 x3 0 1.589332 -1.791369 -0.226042 1 -0.672994 0.033836 -2.456129 2 -0.588430 -0.846312 0.420909 3 -1.139616 -0.324437 -0.393153 4 0.821493 -0.290932 -1.554581 .. ... ... ... 495 -0.892409 -0.144487 0.116289 496 0.481019 -2.364563 -0.695052 497 -2.317386 -0.096384 -0.281432 498 -1.174767 0.338718 1.780107 499 -0.071990 -0.216930 -1.427694 [500 rows x 3 columns]
In [29]:
import pandas as pd
# load training dataset of titanic as df
data_ = pd.read_csv("datasets/Titanic.csv")
print(data_.head())
#Extracting 2 categorial variables from data set
df = data_.loc[:,['Class','Sex','Age']]
df.head(7)


df_encoded = df #initialising
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()
Sex_encoded = encoder.fit_transform(df.loc[:,'Sex'])
Sex_encoded
Unnamed: 0 Class Sex Age Survived Freq 0 1 1st Male Child No 0 1 2 2nd Male Child No 0 2 3 3rd Male Child No 35 3 4 Crew Male Child No 0 4 5 1st Female Child No 0
Out[29]:
array([1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 0])
In [41]:
import numpy as np
import pandas as pd
from sklearn import datasets

# load dataset into Pandas DataFrame
iris = datasets.load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],columns= iris['feature_names'] + ['target'])

from sklearn.preprocessing import StandardScaler
features = iris.feature_names

# Separating out the features
x = iris_df.loc[:, features]
# Separating out the target
y = iris_df.loc[:,['target']]
# Standardizing the features
x = StandardScaler().fit_transform(x)
iris_df.head()
Out[41]:
In [31]:

df = pd.DataFrame({
    'x1': np.random.randint(-100, 100, 500),
    'x2': np.random.randint(-80, 80, 500),
    'x3': np.random.randint(-150,150, 500),
})
#Normalizing data
scaler = preprocessing.Normalizer()
norm_df = scaler.fit_transform(df)
norm_df = pd.DataFrame(norm_df, columns=df.columns)
#Each point is now within 1 unit range
print('Data frame before Standardization \n{} \n'.format(df))
print('Data frame after Standardization \n{} \n'.format(norm_df))
Data frame before Standardization x1 x2 x3 0 61 73 -101 1 -9 -13 -16 2 -49 65 10 3 64 -61 -10 4 61 -43 -92 .. .. .. ... 495 42 27 -97 496 1 -9 41 497 -21 -46 -33 498 20 -28 122 499 90 72 73 [500 rows x 3 columns] Data frame after Standardization x1 x2 x3 0 0.439646 0.526134 -0.727939 1 -0.400099 -0.577920 -0.711287 2 -0.597472 0.792565 0.121933 3 0.719283 -0.685566 -0.112388 4 0.514919 -0.362976 -0.776599 .. ... ... ... 495 0.384981 0.247488 -0.889123 496 0.023816 -0.214346 0.976467 497 -0.347785 -0.761815 -0.546519 498 0.157779 -0.220891 0.962452 499 0.659682 0.527745 0.535075 [500 rows x 3 columns]
In [37]:
z = np.array([1,2,5,8,6,3,])
print(z)
print(z.reshape(-1,1))
[1 2 5 8 6 3] [[1] [2] [5] [8] [6] [3]]
In [42]:

import matplotlib.pyplot as plt
Salaray_data_url = "https://storage.googleapis.com/kaggle-forum-message-attachments/349079/9710/Salary_Data.csv"
salary_dataset = pd.read_csv(Salaray_data_url)
X = salary_dataset.iloc[:,0:1] #year of experience
y = salary_dataset.iloc[:,1] #salary

# Splitting the dataset to train and test using sklearn.model selection tool
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

# Fitting the model using linear regression tool in sklearn
from sklearn.linear_model import LinearRegression
linearRegressor = LinearRegression()
linearRegressor.fit(X_train, y_train)

# Plotting the training data on the regression line
plt.scatter(X_train, y_train, color = 'blue')
plt.plot(X_train, linearRegressor.predict(X_train), color = 'red')
plt.title('Salary vs Experience',fontsize = 14)
plt.xlabel('Years of Experience',fontsize = 12)
plt.ylabel('Salary', fontsize = 12)
plt.show()
Notebook Image
In [48]:

y_pred = linearRegressor.predict(X_test)

# Plotting the training data on the regression line
plt.scatter(X_test,y_test, color = 'blue')
plt.plot(X_train, linearRegressor.predict(X_train), color = 'red')
plt.title('Prediction',fontsize = 14)
plt.xlabel('Years of Experience',fontsize = 12)
plt.ylabel('Salary',fontsize = 12)
plt.show()
Notebook Image
In [54]:

import matplotlib.pyplot as plt
from skimage import data

img = data.astronaut()
plt.imshow(img)
plt.show()

# Dimension of image: pixels in (r,c)
img_size = img.shape
print('Size of image: \n{} \n'.format(img_size))
dim1, dim2 = img.shape[0], img.shape[1]
num_channels = img.shape[2]

# RGB Colour image has three channels: R,G,B
print('No. of channels: \n{}'.format(num_channels))

#dimension

print('Dims: \n{}\t{}'.format(dim1, dim2))

Notebook Image
Size of image: (512, 512, 3) No. of channels: 3 Dims: 512 512
In [60]:

from skimage.color import rgb2gray

#Convering image to grayscale
grayscale_img = rgb2gray(img)
plt.imshow(grayscale_img, cmap=plt.cm.gray)
plt.title("Grayscale Image", fontsize = 12)
plt.show()

# Note: Gray scale image channels = 1 
print('Size of image: \n{} \n'.format(grayscale_img.shape))

#Extracting red channel by array multiplication
# We can tint the image by scaling each of the color channels
# by a different scalar constant. The image has shape (512,512, 3);
# we multiply it by the array [1, 0, 0] of to extract the red channel
img_red = img * [1, 0, 0]
plt.imshow(img_red)
plt.title("Red channel image", fontsize = 12)
plt.show()

from skimage.transform import resize

# Resize the greyscale image to 300 by 300 pixels.
img_resized = resize(grayscale_img, (300, 300),anti_aliasing=True)
plt.imshow(img_resized,cmap='gray')
plt.title("Resized image", fontsize = 12)
plt.show()
print("Shape of the resized image \n", img_resized.shape)

# blue img
img_blue = img * [0, 0, 1]
plt.imshow(img_blue)
plt.title("Blue channel image", fontsize = 12)
plt.show()

# Green Img

img_green = img * [0, 1, 0]
plt.imshow(img_green)
plt.title("Blue channel image", fontsize = 12)
plt.show()

# mixed img

img_mix1 = img * [1, 1, 0]
plt.imshow(img_mix1)
plt.title("Mixed channel image 1", fontsize = 12)
plt.show()

img_mix2 = img * [0, 1, 1]
plt.imshow(img_mix2)
plt.title("Mixed channel image 2", fontsize = 12)
plt.show()

img_mix3 = img * [1, 0, 1]
plt.imshow(img_mix3)
plt.title("Mixed channel image 3", fontsize = 12)
plt.show()
Notebook Image
Size of image: (512, 512)
Notebook Image
Notebook Image
Shape of the resized image (300, 300)
Notebook Image
Notebook Image
Notebook Image
Notebook Image