Jovian
⭐️
Sign In
In [ ]:
import jovian as jvn
jvn.commit()
[jovian] Saving notebook..

KNN using scikit-learn

Load the iris data set

In [1]:
import sklearn
from sklearn.datasets import load_iris
Creating bunch object with iris dataset and all of it's attributes
In [2]:
iris = load_iris()
In [3]:
type(iris)
Out[3]:
sklearn.utils.Bunch
print the iris dataset
In [4]:
iris.data
Out[4]:
array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [4.9, 3.6, 1.4, 0.1],
       [4.4, 3. , 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [4.5, 2.3, 1.3, 0.3],
       [4.4, 3.2, 1.3, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [5.1, 3.8, 1.9, 0.4],
       [4.8, 3. , 1.4, 0.3],
       [5.1, 3.8, 1.6, 0.2],
       [4.6, 3.2, 1.4, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.3, 1.4, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3. , 4.5, 1.5],
       [6. , 3.4, 4.5, 1.6],
       [6.7, 3.1, 4.7, 1.5],
       [6.3, 2.3, 4.4, 1.3],
       [5.6, 3. , 4.1, 1.3],
       [5.5, 2.5, 4. , 1.3],
       [5.5, 2.6, 4.4, 1.2],
       [6.1, 3. , 4.6, 1.4],
       [5.8, 2.6, 4. , 1.2],
       [5. , 2.3, 3.3, 1. ],
       [5.6, 2.7, 4.2, 1.3],
       [5.7, 3. , 4.2, 1.2],
       [5.7, 2.9, 4.2, 1.3],
       [6.2, 2.9, 4.3, 1.3],
       [5.1, 2.5, 3. , 1.1],
       [5.7, 2.8, 4.1, 1.3],
       [6.3, 3.3, 6. , 2.5],
       [5.8, 2.7, 5.1, 1.9],
       [7.1, 3. , 5.9, 2.1],
       [6.3, 2.9, 5.6, 1.8],
       [6.5, 3. , 5.8, 2.2],
       [7.6, 3. , 6.6, 2.1],
       [4.9, 2.5, 4.5, 1.7],
       [7.3, 2.9, 6.3, 1.8],
       [6.7, 2.5, 5.8, 1.8],
       [7.2, 3.6, 6.1, 2.5],
       [6.5, 3.2, 5.1, 2. ],
       [6.4, 2.7, 5.3, 1.9],
       [6.8, 3. , 5.5, 2.1],
       [5.7, 2.5, 5. , 2. ],
       [5.8, 2.8, 5.1, 2.4],
       [6.4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8],
       [7.7, 3.8, 6.7, 2.2],
       [7.7, 2.6, 6.9, 2.3],
       [6. , 2.2, 5. , 1.5],
       [6.9, 3.2, 5.7, 2.3],
       [5.6, 2.8, 4.9, 2. ],
       [7.7, 2.8, 6.7, 2. ],
       [6.3, 2.7, 4.9, 1.8],
       [6.7, 3.3, 5.7, 2.1],
       [7.2, 3.2, 6. , 1.8],
       [6.2, 2.8, 4.8, 1.8],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2.8, 5.6, 2.1],
       [7.2, 3. , 5.8, 1.6],
       [7.4, 2.8, 6.1, 1.9],
       [7.9, 3.8, 6.4, 2. ],
       [6.4, 2.8, 5.6, 2.2],
       [6.3, 2.8, 5.1, 1.5],
       [6.1, 2.6, 5.6, 1.4],
       [7.7, 3. , 6.1, 2.3],
       [6.3, 3.4, 5.6, 2.4],
       [6.4, 3.1, 5.5, 1.8],
       [6. , 3. , 4.8, 1.8],
       [6.9, 3.1, 5.4, 2.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.9, 3.1, 5.1, 2.3],
       [5.8, 2.7, 5.1, 1.9],
       [6.8, 3.2, 5.9, 2.3],
       [6.7, 3.3, 5.7, 2.5],
       [6.7, 3. , 5.2, 2.3],
       [6.3, 2.5, 5. , 1.9],
       [6.5, 3. , 5.2, 2. ],
       [6.2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]])
Names of the features (column names)
In [5]:
print(iris.feature_names)
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Integers for the species: 0 = setosa, 1 = versicolor, 2 = virginica
In [6]:
print(iris.target)
print(iris.target_names)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] ['setosa' 'versicolor' 'virginica']
In [7]:
print(iris.data.shape)
(150, 4)
Total number of observations = 150 and 4 features
Feature matrix - x and response vector - y
In [8]:
x = iris.data
y = iris.target
In [9]:
#print(x.shape)
#print(y.shape)

Training the model

Split data into training and testing
In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=10)
In [11]:
#shape of train and test objects
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
(90, 4) (60, 4) (90,) (60,)
K = 3
In [12]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=3
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))
In [13]:
scores
Out[13]:
{3: 0.9333333333333333}

k = 1

In [14]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=1
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))

k = 5

In [15]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=5
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))

k = 7

In [16]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=7
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))

k = 15

In [17]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=15
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))

k = 27

In [18]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=27
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))

k = 59

In [19]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=59
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))

k = 11

In [20]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
#k_range = range(1,26)
k=11
scores={}
scores_list=[]
#for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(x_train,y_train)
y_predict=knn.predict(x_test)
scores[k] = metrics.accuracy_score(y_test,y_predict)
scores_list.append(metrics.accuracy_score(y_test,y_predict))
In [21]:
scores
Out[21]:
{11: 0.9666666666666667}

k is in range from 1 to 26 (exclusive)

In [22]:
#import KNeighbor classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
k_range = range(1,26)
#k=11
scores={}
scores_list=[]
for k in k_range:
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(x_train,y_train)
        y_predict=knn.predict(x_test)
        scores[k] = metrics.accuracy_score(y_test,y_predict)
        scores_list.append(metrics.accuracy_score(y_test,y_predict))
In [23]:
scores
Out[23]:
{1: 0.9166666666666666,
 2: 0.9333333333333333,
 3: 0.9333333333333333,
 4: 0.9666666666666667,
 5: 0.95,
 6: 0.9833333333333333,
 7: 0.9666666666666667,
 8: 0.9666666666666667,
 9: 0.9666666666666667,
 10: 0.9666666666666667,
 11: 0.9666666666666667,
 12: 0.9666666666666667,
 13: 0.9666666666666667,
 14: 0.9666666666666667,
 15: 0.95,
 16: 0.9666666666666667,
 17: 0.95,
 18: 0.95,
 19: 0.9333333333333333,
 20: 0.9333333333333333,
 21: 0.9333333333333333,
 22: 0.9333333333333333,
 23: 0.9333333333333333,
 24: 0.9333333333333333,
 25: 0.9166666666666666}
In [27]:
kNN = KNeighborsClassifier(n_neighbors=6)
kNN.fit(x,y)
Out[27]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')
In [28]:
#0 = setosa, 1=versicolor, 2=virginica
classes = {0:'setosa',1:'versicolor',2:'virginica'}

#Making prediction on some unseen data 
#predict for the below two random observations
x_new = [[3,5.3,4.2,2],
         [2.3,4,3,2]]
y_predict = kNN.predict(x_new)

print(classes[y_predict[0]])
print(classes[y_predict[1]])
versicolor setosa