Learn practical skills, build real-world projects, and advance your career
Updated 4 years ago
# libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score, KFold
from sklearn import metrics
from sklearn.utils import resample
from sklearn.ensemble import VotingClassifier
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
cancer_df = pd.read_csv("https://github.com/mpourhoma/CS4661/raw/master/Cancer.csv")
cancer_df.head()
#feature columns
feature_cols = ['Clump_Thickness','Uniformity_of_Cell_Size','Uniformity_of_Cell_Shape',
'Marginal_Adhesion','Single_Epithelial_Cell_Size','Bare_Nuclei',
'Bland_Chromatin','Normal_Nucleoli','Mitoses']
X = cancer_df[feature_cols]
# label
y = cancer_df['Malignant_Cancer']
print(X.head())
print(y.head())
Clump_Thickness Uniformity_of_Cell_Size Uniformity_of_Cell_Shape \
0 5 1 1
1 5 4 4
2 3 1 1
3 6 8 8
4 4 1 1
Marginal_Adhesion Single_Epithelial_Cell_Size Bare_Nuclei \
0 1 2 1
1 5 7 10
2 1 2 2
3 1 3 4
4 3 2 1
Bland_Chromatin Normal_Nucleoli Mitoses
0 3 1 1
1 3 2 1
2 3 1 1
3 3 7 1
4 3 1 1
0 0
1 0
2 0
3 0
4 0
Name: Malignant_Cancer, dtype: int64
# splitting the original dataset into training set and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=3)
# print size of test train data
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
(97, 9)
(97,)
(53, 9)
(53,)
# Creating instance for DTree Calssifier
my_decisiontree = DecisionTreeClassifier(random_state=3)
my_decisiontree.fit(X_train, y_train)
y_predict_dt = my_decisiontree.predict(X_test)
# Accuracy
print(classification_report(y_test, y_predict_dt))
# Print
print(y_predict_dt)
precision recall f1-score support
0 0.74 0.91 0.82 22
1 0.92 0.77 0.84 31
accuracy 0.83 53
macro avg 0.83 0.84 0.83 53
weighted avg 0.85 0.83 0.83 53
[0 1 1 1 1 0 1 0 1 1 0 0 1 1 0 1 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 1 0 0 0 0 0
0 1 0 0 0 0 1 0 0 1 0 1 1 0 1 1]