Learn practical skills, build real-world projects, and advance your career
# libraries
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score, KFold
from sklearn import metrics
from sklearn.utils import resample
from sklearn.ensemble import VotingClassifier
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
cancer_df = pd.read_csv("https://github.com/mpourhoma/CS4661/raw/master/Cancer.csv")
cancer_df.head()
#feature columns
feature_cols = ['Clump_Thickness','Uniformity_of_Cell_Size','Uniformity_of_Cell_Shape',
                'Marginal_Adhesion','Single_Epithelial_Cell_Size','Bare_Nuclei',
                'Bland_Chromatin','Normal_Nucleoli','Mitoses']
X = cancer_df[feature_cols] 
# label
y = cancer_df['Malignant_Cancer']
print(X.head())
print(y.head())
Clump_Thickness Uniformity_of_Cell_Size Uniformity_of_Cell_Shape \ 0 5 1 1 1 5 4 4 2 3 1 1 3 6 8 8 4 4 1 1 Marginal_Adhesion Single_Epithelial_Cell_Size Bare_Nuclei \ 0 1 2 1 1 5 7 10 2 1 2 2 3 1 3 4 4 3 2 1 Bland_Chromatin Normal_Nucleoli Mitoses 0 3 1 1 1 3 2 1 2 3 1 1 3 3 7 1 4 3 1 1 0 0 1 0 2 0 3 0 4 0 Name: Malignant_Cancer, dtype: int64
# splitting the original dataset into training set and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=3)
# print size of test train data
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
(97, 9) (97,) (53, 9) (53,)
# Creating instance for DTree Calssifier
my_decisiontree = DecisionTreeClassifier(random_state=3)
my_decisiontree.fit(X_train, y_train)
y_predict_dt = my_decisiontree.predict(X_test)
# Accuracy
print(classification_report(y_test, y_predict_dt))
# Print
print(y_predict_dt)
precision recall f1-score support 0 0.74 0.91 0.82 22 1 0.92 0.77 0.84 31 accuracy 0.83 53 macro avg 0.83 0.84 0.83 53 weighted avg 0.85 0.83 0.83 53 [0 1 1 1 1 0 1 0 1 1 0 0 1 1 0 1 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 1 1]