Learn practical skills, build real-world projects, and advance your career
Created 4 years ago
import math
import csv
def load_csv(filename):
lines = csv.reader(open(filename,"r"))
dataset = list(lines)
headers = dataset.pop(0)
return dataset,headers
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def subtables(data, col, delete):
dic = {}
coldata = [row[col] for row in data]
attr = list(set(coldata))
for k in attr:
dic[k] = []
for y in range(len(data)):
key = data[y][col]
if delete:
del data[y][col]
dic[key].append(data[y])
return attr,dic
def entropy(S):
attr = list(set(S))
if len(attr) == 1:
return 0
counts = [0,0]
for i in range(2):
counts[i] = sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums = 0
for cnt in counts:
sums += -1*cnt*math.log(cnt,2)
return sums