-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataLoader.cpp
More file actions
54 lines (45 loc) · 1.34 KB
/
DataLoader.cpp
File metadata and controls
54 lines (45 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include "DataLoader.hpp"
#include "stopwords.hpp"
#include <istream>
#include <algorithm>
DataLoader::DataLoader(std::string filename)
: csv(filename)
{}
/*
MODIFIES: words_out, isPositive_out
EFFECTS: reads the next data record from the data set. Returns true iff
a record was successfully read. The words in the "review" column
are stored in words_out and isPositive_out is true iff the "sentiment"
column contains "positive".
*/
bool DataLoader::nextRecord(std::vector<std::string> &words_out, bool &isPositive_out) {
std::map<std::string, std::string> row;
csv >> row;
if (!csv) {
return false;
}
const auto words_str = clean(row["review"]);
const auto &positive_str = row["sentiment"];
// Split string into words and remove stopwords
std::istringstream iss(words_str);
std::string s;
while (iss >> s) {
if (stopwords.find(s) == stopwords.end()) {
words_out.push_back(s);
}
}
isPositive_out = positive_str == "positive";
return true;
}
std::string DataLoader::clean(const std::string &s) {
std::string ret;
for (char c : s) {
if (isalpha(c)) {
ret += tolower(c);
} else if (c == ' ') {
ret += c;
}
// otherwise don't add the character at all
}
return ret;
}