Denetimsiz Öğrenme

Amerika Birleşik Devletleri'ndeki eyaletlerin suç oranlarına göre kümelenmesi istenmektedir.

#Kütüphanelerin Kurulması
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from yellowbrick.cluster import KElbowVisualizer
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder

#Veri Setinin Okunması
df = pd.read_csv("USArrests.csv", index_col=0)

print(df.head())
print("\n")

#Uzaklık Temelli İşlemler için Standartlaştırma
sc = MinMaxScaler((0, 1))
df = sc.fit_transform(df)

#Array olduğu için head yerine [0:5]
df[0:5]

            Murder  Assault  UrbanPop  Rape
Alabama       13.2      236        58  21.2
Alaska        10.0      263        48  44.5
Arizona        8.1      294        80  31.0
Arkansas       8.8      190        50  19.5
California     9.0      276        91  40.6

array([[0.74698795, 0.65410959, 0.44067797, 0.35917313],
       [0.55421687, 0.74657534, 0.27118644, 0.96124031],
       [0.43975904, 0.85273973, 0.81355932, 0.6124031 ],
       [0.48192771, 0.49657534, 0.30508475, 0.31524548],
       [0.4939759 , 0.79109589, 1.        , 0.86046512]])

Denetimsiz Öğrenme

K-Ortalamalar