-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnode.py
More file actions
82 lines (71 loc) · 3.48 KB
/
Copy pathnode.py
File metadata and controls
82 lines (71 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import math
class Node:
def __init__(self):
self.value = ""
self.name = ""
self.instances = []
self.branches = []
def count(instances, classes):
return [[c, sum(i.animal_type == c for i in instances)] for c in classes]
def assign_leaf_name(instances, classes, node):
lst = count(instances, classes)
for i in range(lst.__len__()):
if lst[i][1] == max([sublist[-1] for sublist in lst]):
node.name = lst[i][0]
return node
def choose_attr(instances, attributes, classes):
min_entropy = 100
best_attr = -1
for i in range(len(attributes)): # for each attribute
total_entropy = 0
for j in range(len(attributes[i][1])): # for each attribute value
# get instances with that attribute value (slightly changed to fit our class structure)
insts = [inst for inst in instances if inst.__getattribute__(attributes[i][0]) == attributes[i][1][j]]
counts = count(insts, classes) # counts how many instances belong to each class
entropy = 0
size = len(insts)
for c in counts: # computes entropy for each subset
if c[1] > 0:
entropy = entropy - c[1] / size * math.log2(c[1] / size)
total_entropy = total_entropy + size / len(instances) * entropy
# total attribute entropy
if min_entropy > total_entropy:
min_entropy = total_entropy
best_attr = i
return best_attr
def build(instances, attributes, classes, value, default, prev_attr):
node = Node()
node.value = value
# if instances list is empty, assign default as leaf name
if not instances:
node.name = default
return node
# if attribute list is empty, assign type of majority of instances left as leaf name or if all instances
# are of the same type, assign this type as leaf name
elif not attributes or instances.__len__() == max([sublist[-1] for sublist in count(instances, classes)]):
return assign_leaf_name(instances, classes, node)
else:
best_attribute = choose_attr(instances, attributes, classes)
while attributes[best_attribute][0] in prev_attr: # check if chosen attribute's name is in previous best attr
if len(attributes) > 1: # if more than 1 attribute left in list, pop it and choose new one
attributes.pop(best_attribute)
best_attribute = choose_attr(instances, attributes, classes)
else: # if one attribute left
return assign_leaf_name(instances, classes, node)
prev_attr.append(attributes[best_attribute][0]) # add best attribute to previous best attributes
new_attributes = attributes.copy()
branches = []
for a in range(attributes[best_attribute][1].__len__()):
new_instances = [inst for inst in instances if
inst.__getattribute__(attributes[best_attribute][0]) == a]
lst = count(instances, classes)
for i in range(lst.__len__()): # assign type of majority of instances as default
if lst[i][1] == max([sublist[-1] for sublist in lst]):
default = lst[i][0]
new_branch = build(new_instances, new_attributes, classes, a, default, prev_attr)
branches.append(new_branch)
node.value = value
node.name = attributes[best_attribute][0]
node.instances = instances
node.branches = branches
return node