Learn practical skills, build real-world projects, and advance your career
Created 4 years ago
import pprint
import csv
from math import *
lines=list(csv.reader(open('datasets/3train.csv','r')))
lines
[['Outlook', 'Temperature', 'Humidity', 'Wind', 'Target'],
['sunny', 'hot', 'high', 'weak', 'no'],
['sunny', 'hot', 'high', 'strong', 'no'],
['overcast', 'hot', 'high', 'weak', 'yes'],
['rain', 'mild', 'high', 'weak', 'yes'],
['rain', 'cool', 'normal', 'weak', 'yes'],
['rain', 'cool', 'normal', 'strong', 'no'],
['overcast', 'cool', 'normal', 'strong', 'yes'],
['sunny', 'mild', 'high', 'weak', 'no'],
['sunny', 'cool', 'normal', 'weak', 'yes'],
['rain', 'mild', 'normal', 'weak', 'yes'],
['sunny', 'mild', 'normal', 'strong', 'yes'],
['overcast', 'mild', 'high', 'strong', 'yes'],
['overcast', 'hot', 'normal', 'weak', 'yes'],
['rain', 'mild', 'high', 'strong', 'no']]
data=lines.pop(0)
print(data)
print()
print(lines)
['Outlook', 'Temperature', 'Humidity', 'Wind', 'Target']
[['sunny', 'hot', 'high', 'weak', 'no'], ['sunny', 'hot', 'high', 'strong', 'no'], ['overcast', 'hot', 'high', 'weak', 'yes'], ['rain', 'mild', 'high', 'weak', 'yes'], ['rain', 'cool', 'normal', 'weak', 'yes'], ['rain', 'cool', 'normal', 'strong', 'no'], ['overcast', 'cool', 'normal', 'strong', 'yes'], ['sunny', 'mild', 'high', 'weak', 'no'], ['sunny', 'cool', 'normal', 'weak', 'yes'], ['rain', 'mild', 'normal', 'weak', 'yes'], ['sunny', 'mild', 'normal', 'strong', 'yes'], ['overcast', 'mild', 'high', 'strong', 'yes'], ['overcast', 'hot', 'normal', 'weak', 'yes'], ['rain', 'mild', 'high', 'strong', 'no']]
def entropy(pos,neg):
if pos==0 or neg==0:
return 0
tot=pos+neg
return -pos/tot*log(pos/tot,2)-neg/tot*log(neg/tot,2)
def gain(lines,attr,pos,neg):
d,E,acu={},entropy(pos,neg),0
for i in lines:
if i[attr] not in d:
d[i[attr]]={}
d[i[attr]][i[-1]]=1+d[i[attr]].get(i[-1],0)
for i in d:
tot=d[i].get('yes',0)+d[i].get('no',0)
acu+= tot/(pos+neg)*entropy(d[i].get('yes',0),d[i].get('no',0))
return E-acu