Learn practical skills, build real-world projects, and advance your career
import pprint
import csv
from math import *
lines=list(csv.reader(open('datasets/3train.csv','r')))
lines
[['Outlook', 'Temperature', 'Humidity', 'Wind', 'Target'],
 ['sunny', 'hot', 'high', 'weak', 'no'],
 ['sunny', 'hot', 'high', 'strong', 'no'],
 ['overcast', 'hot', 'high', 'weak', 'yes'],
 ['rain', 'mild', 'high', 'weak', 'yes'],
 ['rain', 'cool', 'normal', 'weak', 'yes'],
 ['rain', 'cool', 'normal', 'strong', 'no'],
 ['overcast', 'cool', 'normal', 'strong', 'yes'],
 ['sunny', 'mild', 'high', 'weak', 'no'],
 ['sunny', 'cool', 'normal', 'weak', 'yes'],
 ['rain', 'mild', 'normal', 'weak', 'yes'],
 ['sunny', 'mild', 'normal', 'strong', 'yes'],
 ['overcast', 'mild', 'high', 'strong', 'yes'],
 ['overcast', 'hot', 'normal', 'weak', 'yes'],
 ['rain', 'mild', 'high', 'strong', 'no']]
data=lines.pop(0)
print(data)
print()
print(lines)
['Outlook', 'Temperature', 'Humidity', 'Wind', 'Target'] [['sunny', 'hot', 'high', 'weak', 'no'], ['sunny', 'hot', 'high', 'strong', 'no'], ['overcast', 'hot', 'high', 'weak', 'yes'], ['rain', 'mild', 'high', 'weak', 'yes'], ['rain', 'cool', 'normal', 'weak', 'yes'], ['rain', 'cool', 'normal', 'strong', 'no'], ['overcast', 'cool', 'normal', 'strong', 'yes'], ['sunny', 'mild', 'high', 'weak', 'no'], ['sunny', 'cool', 'normal', 'weak', 'yes'], ['rain', 'mild', 'normal', 'weak', 'yes'], ['sunny', 'mild', 'normal', 'strong', 'yes'], ['overcast', 'mild', 'high', 'strong', 'yes'], ['overcast', 'hot', 'normal', 'weak', 'yes'], ['rain', 'mild', 'high', 'strong', 'no']]
def entropy(pos,neg):
    if pos==0 or neg==0:
        return 0
    tot=pos+neg
    return -pos/tot*log(pos/tot,2)-neg/tot*log(neg/tot,2)
def gain(lines,attr,pos,neg):
    d,E,acu={},entropy(pos,neg),0
    for i in lines:
        if i[attr] not in d:
            d[i[attr]]={}
        d[i[attr]][i[-1]]=1+d[i[attr]].get(i[-1],0)
    for i in d:
        tot=d[i].get('yes',0)+d[i].get('no',0)
        acu+= tot/(pos+neg)*entropy(d[i].get('yes',0),d[i].get('no',0))
    return E-acu