-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidation.py
More file actions
97 lines (79 loc) · 3.21 KB
/
validation.py
File metadata and controls
97 lines (79 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import time
import config as cf
from code import csv_processor as csv
from code.clustering import Clustering
from code.make_rule import Rule
from code.predict import Predict
from sklearn.model_selection import KFold
import numpy as np
def predict(data):
now = time.time()
x = Predict(data, clusters, rules)
corrects = 0
edit = []
editCollection = []
for ix, record in enumerate(data):
if int(record[0]) == x.predict(record, rules)["predict"]:
corrects += 1
else:
pr_rules = x.predict(record, rules)["rule"]
cr_rules = x.get_rule_truth(record)["rule"]
edit += x.detect_cluster(pr_rules, cr_rules, record[1: len(record) ])
print(' Testing Time: {:.2f}s'.format(time.time() - now))
print(" Correct: {}, Total: {}, Accuracy: {:.2f}%\n".format(corrects, len(data), 100*corrects / len(data)))
return 100*corrects / len(data)
def train_func(data):
now = time.time()
# clustering and save
clusters = Clustering(data).clusters
csv.write_file(cf.clusters_path, clusters)
# make_rule and save
rules = Rule(data, clusters).colection_rules()
csv.write_file(cf.rule_path, rules)
# Predict and save data
x = Predict(data, clusters, rules)
corrects = 0
edit = []
editCollection = []
for ix, record in enumerate(data):
if int(record[0]) == x.predict(record, rules)["predict"]:
corrects += 1
else:
pr_rules = x.predict(record, rules)["rule"]
cr_rules = x.get_rule_truth(record)["rule"]
edit += x.detect_cluster(pr_rules, cr_rules, record[1: len(record) ])
print(' Training Time: {:.2f}s'.format(time.time() - now))
print(" Correct: {}, Total: {}, Accuracy: {:.2f}%".format(corrects, len(data), 100*corrects / len(data)))
return 100*corrects / len(data)
if __name__ == "__main__":
# read data
data = csv.read_file(cf.full_path, 'float')
kf = KFold(n_splits=cf.k_fold, shuffle=cf.shuffle)
result_data = [1 for i in range(cf.num_classes) ]
for i in range(cf.num_classes):
data_class = []
for j in data:
if j[0] == i+1: data_class.append(j)
result_data[i] = (data_class, list(kf.split(data_class)))
x = []
y = []
for i in range(cf.k_fold):
print("Fold {}/{} ".format(i+1,cf.k_fold))
train = []
test = []
for j in range(cf.num_classes):
data_class = result_data[j][0]
# result_data[0][1][2][3]
# 1: class, 2: 1 is kf.split object, 2: fold, 3: train or test
for train_ids in result_data[j][1][i][0]:
train.append(data_class[train_ids])
for test_ids in result_data[j][1][i][1]:
test.append(data_class[test_ids])
x.append(train_func(train))
# read data
clusters = csv.read_file(cf.clusters_path, 'float')
rules = csv.read_file(cf.rule_path, 'float')
y.append(predict(test))
print('Result')
print(' Training Accuracy: {:.3f}%'.format(sum(x)/len(x)))
print(' Testing Accuracy: {:.3f}%'.format(sum(y)/len(y)))