Jovian
⭐️
Sign In
In [1]:
import math
import csv
In [2]:
def load_csv(filename):
    lines = csv.reader(open(filename,"r"))
    dataset = list(lines)
    headers = dataset.pop(0)
    return dataset,headers
In [3]:
class Node:
    def __init__(self, attribute):
        self.attribute = attribute
        self.children = []
        self.answer = ""
In [4]:
def subtables(data, col, delete):
    dic = {}
    coldata = [row[col] for row in data]
    attr = list(set(coldata))
    for k in attr:
        dic[k] = []
    for y in range(len(data)):
        key = data[y][col]
        if delete:
            del data[y][col]
        dic[key].append(data[y])
    return attr,dic
In [5]:
def entropy(S):
    attr = list(set(S))
    if len(attr) == 1:
        return 0
    counts = [0,0]
    for i in range(2):
        counts[i] = sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
    sums = 0
    for cnt in counts:
        sums += -1*cnt*math.log(cnt,2)
    return sums
In [6]:
def compute_gain(data, col):
    attValues, dic = subtables(data, col, False)
    totalEntropy = entropy([row[-1] for row in data])
    for x in range(len(attValues)):
        ratio = len(dic[attValues[x]])/(len(data)*1.0)
        entro = entropy([row[-1] for row in dic[attValues[x]]])
        totalEntropy -= ratio*entro
    return totalEntropy
In [7]:
def build_tree(data, features):
    lastcol = [row[-1] for row in data]
    if(len(set(lastcol))) == 1:
        node = Node("")
        node.answer = lastcol[0]
        return node
    n = len(data[0]) - 1
    gains = [compute_gain(data, col) for col in range(n)]
    split = gains.index(max(gains))
    node = Node(features[split])
    fea = features[:split]+features[split+1:]
    attr, dic = subtables(data, split, True)
    for x in range (len(attr)):
        child = build_tree(dic[attr[x]], fea)
        node.children.append((attr[x], child))
    return node
In [8]:
def print_tree(node, level):
    if node.answer != "":
        print(" "*level, node.answer)
        return
    print(" "*level, node.attribute)
    for value,n in node.children:
        print(" "*(level+1), value)
        print_tree(n, level+2)
In [9]:
def classify(node, x_test, features):
    if node.answer != "":
        print(node.answer)
        return
    pos = features.index(node.attribute)
    for value,n in node.children:
        if x_test[pos] == value:
            classify(n, x_test, features)
In [10]:
dataset, features = load_csv("datasets/3train.csv")
node = build_tree(dataset, features)
print("Decision Tree Using ID3")
print_tree(node, 0)
testdata, features = load_csv("datasets/3test.csv")
for xtest in testdata:
    print("Test Instances: ", xtest)
    print("Predicted Label: ", end="")
    classify(node, xtest, features)
Decision Tree Using ID3 Outlook rain Wind strong no weak yes overcast yes sunny Humidity high no normal yes Test Instances: ['rain', 'cool', 'normal', 'strong'] Predicted Label: no Test Instances: ['sunny', 'mild', 'normal', 'strong'] Predicted Label: yes
In [ ]: