Jovian
⭐️
Sign In

PCA: for Dimensionality Reduction

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from sklearn.datasets import load_breast_cancer

Load data set

In [3]:
brc = load_breast_cancer()
brc.keys()
Out[3]:
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
In [5]:
brc.data.shape
Out[5]:
(569, 30)
In [6]:
df = pd.DataFrame(brc.data,columns=brc.feature_names)
In [16]:
df["target"] = brc.target
In [19]:
df.head(2)
Out[19]:
In [59]:
df.shape # Total 30 features, excluding target
Out[59]:
(569, 31)

Perform standard scalar

In [20]:
sc = StandardScaler()
In [23]:
scaled_data = sc.fit_transform(df)
scaled_data
Out[23]:
array([[ 1.09706398, -2.07333501,  1.26993369, ...,  2.75062224,
         1.93701461, -1.29767572],
       [ 1.82982061, -0.35363241,  1.68595471, ..., -0.24388967,
         0.28118999, -1.29767572],
       [ 1.57988811,  0.45618695,  1.56650313, ...,  1.152255  ,
         0.20139121, -1.29767572],
       ...,
       [ 0.70228425,  2.0455738 ,  0.67267578, ..., -1.10454895,
        -0.31840916, -1.29767572],
       [ 1.83834103,  2.33645719,  1.98252415, ...,  1.91908301,
         2.21963528, -1.29767572],
       [-1.80840125,  1.22179204, -1.81438851, ..., -0.04813821,
        -0.75120669,  0.77060855]])
In [24]:
scaled_data_df = pd.DataFrame(scaled_data,columns=df.keys())
In [25]:
scaled_data_df
Out[25]:
In [54]:
pc = PCA(n_components=2)
In [55]:
pc.fit(scaled_data)
Out[55]:
PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)
In [56]:
op = pc.fit_transform(scaled_data)
op
Out[56]:
array([[ 9.22577011,  2.11619563],
       [ 2.6558016 , -3.78477586],
       [ 5.89249212, -1.00557896],
       ...,
       [ 1.53007727, -1.95887147],
       [10.40500789,  1.84907792],
       [-5.50486158, -0.76634841]])
In [57]:
sns.scatterplot(op[:,0],op[:,1],hue=df.target,palette="viridis")
Out[57]:
<matplotlib.axes._subplots.AxesSubplot at 0x16a1b0e84c8>
Notebook Image
In [ ]:
 
In [ ]: