import math def logBaseB (B,N): """ Return the log of N to the base B >>> logBaseB(2,16) 4.0 """ if N == 0: return 1 else: return math.log(N) / math.log(B) def entropy(classes,examples): """ Return the entropy of a set of examples. classes is a list of the classes examples is a list of the class values of the examples """ print("Examples:",examples) classProbs = {} for c in classes: classProbs[c] = 0 for c in examples: classProbs[c] += 1 for c in classes: classProbs[c] = classProbs[c] / float(len(examples)) n = len(classes) print("The maximum entropy is logBaseB(2,",end="",sep="") print(n,")=",format(logBaseB(2,n),".3f"),sep="") ent = 0 for c in classes: #print "c:", c, "p(c):", classProbs[c],"log(p(c))",logBaseB(2,classProbs[c]) ent = ent + -1 * classProbs[c] * logBaseB(2,classProbs[c]) print("Entropy:",format(ent,".3f")) return ent def entropy1(classProbs): """ Returns entropy given the class probabilities. classProbs is a list, where classProb[i] is the proportion of the examples that have the ith class value """ print("") p = [format(c,".3f") for c in classProbs] p.sort() print("Probabilities:", p) n = len(classProbs) print("The maximum entropy is logBaseB(2,",n,")=",format(logBaseB(2,n),".3f"),sep="") ent = 0 for c in classProbs: #print "c", c, "log(c)",logBaseB(2,c) ent = ent + -1 * c * logBaseB(2,c) print("Entropy:",format(ent,".3f")) return ent print("==") x = entropy([1,2],[1,1,1,1,1,1,1,1,1,1,1]) x = entropy1([1.0,0.0]) print("==") x = entropy([1,2,3,4,5],[1,1,2,2,3,3,4,4,5,5]) x = entropy1([.2,.2,.2,.2,.2]) print("==") x = entropy([1,2,3,4,5,6,7,8],[1,1,1,2,3,4,4,5,5,6,6,7,7,8,8]) x = entropy1([3.0/15.0,1.0/15.0,1.0/15.0,2.0/15.0,2.0/15.0,2.0/15.0,2.0/15.0,2.0/15.0]) print("==") x = entropy([1,2],[1,1,1,2,2,2,2,2,2,2,2,2]) x = entropy1([3.0/12.0,9.0/12.0]) print("==") x = entropy([1,2],[1,1,1,1,1,1,1,2]) x = entropy1([7.0/8.0,1.0/8.0]) print("==") x = entropy([1,2],[1,1,1,1,1,1,1,1,1,1,2]) x = entropy1([10.0/11.0,1.0/11.0]) print("==") x = entropy([1,2,3],[1,1,1,1,1,1,2,2,3,3,3,3]) x = entropy1([6/12.0,2/12.0,4/12.0]) print("==") x = entropy([1,2,3],[1,1,1,1,3,3,3,2,3,3,3,3]) x = entropy1([4/12.0,1/12.0,7/12.0]) print("==") x = entropy([1,2],[1,1,1,1,2,2]) x = entropy1([4/6,2/6])