Clustering - Notebook by Andmac12 (andmac12)

Learn practical skills, build real-world projects, and advance your career

Created 3 years ago

import pandas as pd 
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import mysql.connector
import scipy as sp
from sklearn.cluster import KMeans
from  sklearn.preprocessing import scale

mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  password="expresso",
  database="bank_ad"
)

mycursor = mydb.cursor()
mycursor.execute("SELECT * FROM bank")
database = mycursor.fetchall()

mpl.style.use('seaborn')

## Read raw data e gerar tabela
uri = "Data/telemarketing.csv"
tl_df = pd.read_csv(uri, sep=';')



df = pd.DataFrame({
    'x': [12, 20, 28, 18, 29, 33,24, 45, 45, 52, 51, 52, 55, 53, 55, 40, 53, 55, 61, 64, 69, 74],
    'y': [39, 36, 30, 52, 54, 46, 55, 59, 63, 70, 66, 63, 70, 66, 63, 58, 23, 14, 8, 19, 7, 24],  
})

np.random.seed(200)
z = np.random.randint(0, 2, size=22)
k = np.random.randint(0, 3, size=22)

df['z'] = z.tolist()
df['k'] = k.tolist()
df['k'] = df['k'].replace([0,1,2],['cao', 'gato', 'rato'])
df['z'] = df['z'].replace([0,1],['adotado','canil'])


#sns.scatterplot(x="x", y="y", style="k", data=df)

data = df[['x', 'y']]
X = scale(data)

clustering = KMeans(n_clusters = 3, random_state = 5).fit(X)

animals = np.array(df['k'])
status = np.array(df['z'].unique())
color_theme = np.array(['darkgray', 'lightsalmon', 'powderblue'])
print(status)


plt.scatter(x=df.x, y=df.y, c=color_theme[animals], s=50)

['adotado' 'canil']

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-32-7b6d30533245> in <module>
      5 
      6 
----> 7 plt.scatter(x=df.x, y=df.y, c=color_theme[animals], s=50)

IndexError: arrays used as indices must be of integer (or boolean) type

gp = tl_df[['y','age', 'duration', 'month', 'previous', 'cons.price.idx', 'cons.conf.idx', 'euribor3m']].groupby(['y'])
means = gp.mean()

# for key, item in gp:
#   print(gp.get_group(key), "\n\n")