import numpy as np
import pandas as pd
import sklearn
data=pd.read_csv('datasets/wine.csv')
data.head()
from sklearn import preprocessing
# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()
# Encode labels in column 'Wine Type'.
data['Wine Type']= label_encoder.fit_transform(data['Wine Type'])
data.head(10)
Preparing the data
X = data.drop('Wine Type',axis=1)
y = data['Wine Type']
print(y)
0 0
1 0
2 0
3 0
4 0
..
173 1
174 1
175 1
176 1
177 1
Name: Wine Type, Length: 178, dtype: int32
from sklearn.model_selection import train_test_split
X_train , X_test , y_train,y_test = train_test_split(X,y,train_size=0.80)
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train,y_train)
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
max_depth=None, max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort='deprecated',
random_state=None, splitter='best')
y_pred = model.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred),'\n')
print(classification_report(y_test, y_pred),'\n')
print(sklearn.metrics.accuracy_score(y_test, y_pred)*100,'%')
[[ 8 0 3]
[ 0 12 0]
[ 0 0 13]]
precision recall f1-score support
0 1.00 0.73 0.84 11
1 1.00 1.00 1.00 12
2 0.81 1.00 0.90 13
accuracy 0.92 36
macro avg 0.94 0.91 0.91 36
weighted avg 0.93 0.92 0.91 36
91.66666666666666 %
import numpy as np
import pandas as pd
import sklearn
data=pd.read_csv('datasets/Boston.csv')
data.head()
X = data.drop(['medv'], axis = 1)
y = data['medv']
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.80)
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
model.fit(X_train,y_train)
DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort='deprecated',
random_state=None, splitter='best')
y_pred = model.predict(X_test)
df=pd.DataFrame({'Actuals':y_test,'Predicted':y_pred})
print(df)
Actuals Predicted
479 21.4 20.6
367 23.1 15.0
442 18.4 14.2
28 18.4 19.6
326 23.0 25.0
.. ... ...
420 16.7 12.1
67 22.0 20.9
81 23.9 28.0
327 22.2 19.6
68 17.4 18.2
[102 rows x 2 columns]
from sklearn import metrics
print('Mean Absolute Error :' ,metrics.mean_absolute_error(y_test,y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
MAE : 2.7245098039215696
Mean Squared Error: 16.749117647058828
Root Mean Squared Error: 4.092568587948017