-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNullClassifier.cpp
More file actions
66 lines (53 loc) · 1.71 KB
/
NullClassifier.cpp
File metadata and controls
66 lines (53 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include "NullClassifier.hpp"
#include <iostream>
#include <algorithm>
#include <set>
using namespace std;
NullClassifier::NullClassifier()
: trueTotal(0)
{}
void NullClassifier::train(const vector<string> &words, bool label) {
for (const auto &w : words) {
trueCounts[w]++;
trueTotal++;
}
}
void NullClassifier::trainingFinished() {
// normalize counts based on # of words on true examples
// vs negative examples
// remove this if you want to see debug info
return;
// print some debug info
cout << "VocabClassifier debug info" << endl;
cout << "==========================" << endl;
cout << "trueTotal: " << trueTotal << endl;
cout << endl;
// most true and most false words
const int topCount = 25;
// create a set of all words. sets can't have duplicates
set<string> vocabulary;
for (auto it : trueCounts) {
vocabulary.insert(it.first);
}
// compute score for each word
map<string, double> differences;
for (auto word : vocabulary) {
differences[word] = trueCounts[word];
}
// Find the words with the highest value. You don't need to understand
// how this works.
vector<pair<string, double>> mostTrue;
copy(differences.begin(), differences.end(),
back_inserter<vector<pair<string, double> > >(mostTrue));
sort(mostTrue.begin(), mostTrue.end(),
[=](auto& a, auto& b) {
return a.second > b.second;
});
cout << "Most true words:" << endl;
for (int i = 0; i < topCount; i++) {
cout << mostTrue.at(i).second << ": " << mostTrue.at(i).first << endl;
}
}
bool NullClassifier::classify(const vector<string> &text) {
return true;
}