Jovian
⭐️
Sign In

Decision Tree

Classification Tree

In [176]:
import numpy as np 
import pandas as pd 
import sklearn
In [177]:
data=pd.read_csv('datasets/wine.csv')
data.head()
Out[177]:
In [178]:

from sklearn import preprocessing 
  
# label_encoder object knows how to understand word labels. 
label_encoder = preprocessing.LabelEncoder() 
  
# Encode labels in column 'Wine Type'. 
data['Wine Type']= label_encoder.fit_transform(data['Wine Type']) 
In [179]:
data.head(10)
Out[179]:

Preparing the data

In [169]:
X = data.drop('Wine Type',axis=1)
y = data['Wine Type']
In [170]:
print(y)
0 0 1 0 2 0 3 0 4 0 .. 173 1 174 1 175 1 176 1 177 1 Name: Wine Type, Length: 178, dtype: int32
In [171]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train,y_test = train_test_split(X,y,train_size=0.80)
In [172]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train,y_train)
Out[172]:
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')
In [174]:
y_pred = model.predict(X_test)
In [175]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred),'\n')
print(classification_report(y_test, y_pred),'\n')
print(sklearn.metrics.accuracy_score(y_test, y_pred)*100,'%')
[[ 8 0 3] [ 0 12 0] [ 0 0 13]] precision recall f1-score support 0 1.00 0.73 0.84 11 1 1.00 1.00 1.00 12 2 0.81 1.00 0.90 13 accuracy 0.92 36 macro avg 0.94 0.91 0.91 36 weighted avg 0.93 0.92 0.91 36 91.66666666666666 %

Regression Tree

In [120]:
import numpy as np 
import pandas as pd 
import sklearn
In [144]:
data=pd.read_csv('datasets/Boston.csv')
data.head()
Out[144]:
In [145]:
X = data.drop(['medv'], axis = 1)
y = data['medv']
In [146]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.80)
In [147]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
model.fit(X_train,y_train)
Out[147]:
DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')
In [160]:
y_pred = model.predict(X_test)
In [161]:
df=pd.DataFrame({'Actuals':y_test,'Predicted':y_pred})
In [162]:
print(df)
Actuals Predicted 479 21.4 20.6 367 23.1 15.0 442 18.4 14.2 28 18.4 19.6 326 23.0 25.0 .. ... ... 420 16.7 12.1 67 22.0 20.9 81 23.9 28.0 327 22.2 19.6 68 17.4 18.2 [102 rows x 2 columns]
In [164]:
from sklearn import metrics
print('Mean Absolute Error :' ,metrics.mean_absolute_error(y_test,y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

MAE : 2.7245098039215696 Mean Squared Error: 16.749117647058828 Root Mean Squared Error: 4.092568587948017
In [ ]:
 
In [ ]: