# rashmi-278/prog3

a year ago
In [1]:
``````import math
import csv``````
In [2]:
``````def load_csv(filename):
dataset = list(lines)
In [3]:
``````class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
In [4]:
``````def subtables(data, col, delete):
dic = {}
coldata = [row[col] for row in data]
attr = list(set(coldata))
for k in attr:
dic[k] = []
for y in range(len(data)):
key = data[y][col]
if delete:
del data[y][col]
dic[key].append(data[y])
return attr,dic``````
In [5]:
``````def entropy(S):
attr = list(set(S))
if len(attr) == 1:
return 0
counts = [0,0]
for i in range(2):
counts[i] = sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums = 0
for cnt in counts:
sums += -1*cnt*math.log(cnt,2)
return sums``````
In [6]:
``````def compute_gain(data, col):
attValues, dic = subtables(data, col, False)
totalEntropy = entropy([row[-1] for row in data])
for x in range(len(attValues)):
ratio = len(dic[attValues[x]])/(len(data)*1.0)
entro = entropy([row[-1] for row in dic[attValues[x]]])
totalEntropy -= ratio*entro
In [7]:
``````def build_tree(data, features):
lastcol = [row[-1] for row in data]
if(len(set(lastcol))) == 1:
node = Node("")
return node
n = len(data[0]) - 1
gains = [compute_gain(data, col) for col in range(n)]
split = gains.index(max(gains))
node = Node(features[split])
fea = features[:split]+features[split+1:]
attr, dic = subtables(data, split, True)
for x in range (len(attr)):
child = build_tree(dic[attr[x]], fea)
node.children.append((attr[x], child))
return node``````
In [8]:
``````def print_tree(node, level):
return
print(" "*level, node.attribute)
for value,n in node.children:
print(" "*(level+1), value)
print_tree(n, level+2)``````
In [9]:
``````def classify(node, x_test, features):
return
pos = features.index(node.attribute)
for value,n in node.children:
if x_test[pos] == value:
classify(n, x_test, features)
``````
In [10]:
``````dataset, features = load_csv("datasets/3train.csv")
node = build_tree(dataset, features)
print("Decision Tree Using ID3")
print_tree(node, 0)
```Decision Tree Using ID3 Outlook rain Wind strong no weak yes overcast yes sunny Humidity high no normal yes Test Instances: ['rain', 'cool', 'normal', 'strong'] Predicted Label: no Test Instances: ['sunny', 'mild', 'normal', 'strong'] Predicted Label: yes ```
`` ``