Skip to content

Commit af40097

Browse files
committed
fixed some bugs and added new commands
1 parent 9878e98 commit af40097

14 files changed

Lines changed: 233 additions & 83 deletions

File tree

data/csv/bib_core/readme.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

data/metrics/bib_core/readme.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

data/results/bib_core/readme.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/common/PredictionData.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,10 @@ public double getTagDiversity(List<Map<Integer, Double>> tagEntities) {
187187
return diversity;
188188
}
189189

190-
List<Integer> predictionIDs = new ArrayList<Integer>();
190+
List<String> predictionIDs = new ArrayList<String>();
191191
for (String res : this.predictionData) {
192-
predictionIDs.add(Integer.valueOf(res));
192+
//predictionIDs.add(Integer.valueOf(res));
193+
predictionIDs.add(res);
193194
}
194195
int k = predictionIDs.size();
195196
for (int i = 0; i < k; i++) {

src/common/TimeUtil.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ public class TimeUtil {
88
public static final int DAY = 3;
99
public static final int FIFTEEN_DAYS = 4;
1010
public static final int MONTH = 5;
11+
public static final int WEEK = 6;
1112

1213
/**
1314
* get duration count for the dataset.
@@ -43,6 +44,9 @@ public static int getDurationAtGranularity(int duration, int granularityLevel){
4344
case MONTH:
4445
time_count = duration / secondsInMonth;
4546
break;
47+
case WEEK:
48+
time_count = duration / secondsInWeek;
49+
break;
4650
}
4751
return time_count;
4852
}

src/file/BookmarkReader.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,9 @@ public Map<String, Integer> getUserMap() {
332332
}
333333

334334
public int getCountLimit() {
335+
if (this.countLimit == 0) {
336+
return this.getBookmarks().size();
337+
}
335338
return this.countLimit;
336339
}
337340

src/file/PredictionFileReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public boolean readFile(String filename, int k, BookmarkReader wikiReader, Integ
8787
} else {
8888
List<String> realData = Arrays.asList(lineParts[1].split(", "));
8989
List<String> predictionData = Arrays.asList(lineParts[2].split(", "));
90-
if (predictionData.size() > 0) {
90+
if (predictionData.size() > 0 && realData.size() > 0) {
9191
PredictionData data = new PredictionData(userID, resID, realData, predictionData, k);
9292
this.predictions.add(data);
9393
this.predictionCount++;
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package file.preprocessing;
2+
3+
import java.io.BufferedReader;
4+
import java.io.BufferedWriter;
5+
import java.io.File;
6+
import java.io.FileInputStream;
7+
import java.io.FileOutputStream;
8+
import java.io.IOException;
9+
import java.io.InputStreamReader;
10+
import java.io.OutputStreamWriter;
11+
import java.util.ArrayList;
12+
import java.util.HashSet;
13+
import java.util.List;
14+
import java.util.Set;
15+
16+
public class JKULFMProcessor {
17+
18+
private static final String EVENTS_FILE = "./data/schedl/LFM-1b_LEs.txt";
19+
20+
private static Set<String> getFilterUsers(String filterFile) throws Exception {
21+
Set<String> filterUsers = new HashSet<String>();
22+
InputStreamReader reader = new InputStreamReader(new FileInputStream(new File(filterFile)), "UTF8");
23+
BufferedReader br = new BufferedReader(reader);
24+
String line = null;
25+
int i = 0;
26+
while ((line = br.readLine()) != null) {
27+
if (i > 0) {
28+
String userID = line.substring(0, line.indexOf(','));
29+
filterUsers.add(userID);
30+
}
31+
i++;
32+
}
33+
System.out.println("Number of users: " + (i - 1));
34+
br.close();
35+
reader.close();
36+
37+
return filterUsers;
38+
}
39+
40+
private static List<String> getFilterLines(Set<String> filterUsers) throws Exception {
41+
String filePath = EVENTS_FILE;
42+
List<String> filterLines = new ArrayList<String>();
43+
44+
InputStreamReader reader = new InputStreamReader(new FileInputStream(new File(filePath)), "UTF8");
45+
BufferedReader br = new BufferedReader(reader);
46+
String line = null;
47+
int i = 0;
48+
while ((line = br.readLine()) != null) {
49+
String userID = line.substring(0, line.indexOf('\t'));
50+
if (filterUsers.contains(userID)) {
51+
filterLines.add(line);
52+
i++;
53+
}
54+
}
55+
System.out.println("Number of lines: " + i);
56+
br.close();
57+
reader.close();
58+
59+
return filterLines;
60+
}
61+
62+
private static void writeOutputFile(String outputFile, List<String> lines) throws Exception {
63+
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(new File(outputFile)), "UTF8");
64+
BufferedWriter bw = new BufferedWriter(writer);
65+
66+
int i = 0;
67+
for(String l : lines) {
68+
bw.write(l + '\n');
69+
i++;
70+
}
71+
System.out.println("Written lines: " + i);
72+
73+
bw.flush();
74+
bw.close();
75+
writer.close();
76+
}
77+
78+
public static void preprocessFile(String filterFile, String outputFile) {
79+
try {
80+
// get filter users
81+
Set<String> filterUsers = getFilterUsers(filterFile);
82+
83+
// read big file and filter user actions
84+
List<String> filterLines = getFilterLines(filterUsers);
85+
86+
// write file
87+
writeOutputFile(outputFile, filterLines);
88+
} catch(Exception e) {
89+
e.printStackTrace();
90+
}
91+
}
92+
}

src/processing/CFTagRecommender.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ public static double getLAverage(List<Map<Integer, Integer>> neighborMaps) {
251251

252252
private static String timeString;
253253

254-
private static List<Map<Integer, Double>> startBM25CreationForTagPrediction(BookmarkReader reader, int sampleSize, boolean userBased, boolean resBased, int beta) {
254+
private static List<Map<Integer, Double>> startBM25CreationForTagPrediction(BookmarkReader reader, int sampleSize, boolean userBased, boolean resBased, int beta, boolean ignoreResource) {
255255
int size = reader.getBookmarks().size();
256256
int trainSize = size - sampleSize;
257257
Stopwatch timer = new Stopwatch();
@@ -266,7 +266,8 @@ private static List<Map<Integer, Double>> startBM25CreationForTagPrediction(Book
266266
for (int i = trainSize; i < size; i++) {
267267
Bookmark data = reader.getBookmarks().get(i);
268268
Map<Integer, Double> map = null;
269-
map = calculator.getRankedTagList(data.getUserID(), data.getResourceID(), true);
269+
int resID = (ignoreResource ? -1 : data.getResourceID());
270+
map = calculator.getRankedTagList(data.getUserID(), resID, true);
270271
results.add(map);
271272
//System.out.println(data.getTags() + "|" + map.keySet());
272273
}
@@ -277,12 +278,12 @@ private static List<Map<Integer, Double>> startBM25CreationForTagPrediction(Book
277278
return results;
278279
}
279280

280-
public static BookmarkReader predictTags(String filename, int trainSize, int sampleSize, int neighbors, boolean userBased, boolean resBased, int beta) {
281+
public static BookmarkReader predictTags(String filename, int trainSize, int sampleSize, int neighbors, boolean userBased, boolean resBased, int beta, boolean ignoreResource) {
281282
MAX_NEIGHBORS = neighbors;
282-
return predictSample(filename, trainSize, sampleSize, userBased, resBased, beta);
283+
return predictSample(filename, trainSize, sampleSize, userBased, resBased, beta, ignoreResource);
283284
}
284285

285-
public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, boolean userBased, boolean resBased, int beta) {
286+
public static BookmarkReader predictSample(String filename, int trainSize, int sampleSize, boolean userBased, boolean resBased, int beta, boolean ignoreResource) {
286287
Timer timerThread = new Timer();
287288
MemoryThread memoryThread = new MemoryThread();
288289
timerThread.schedule(memoryThread, 0, MemoryThread.TIME_SPAN);
@@ -291,7 +292,7 @@ public static BookmarkReader predictSample(String filename, int trainSize, int s
291292
reader.readFile(filename);
292293

293294
List<Map<Integer, Double>> cfValues = null;
294-
cfValues = startBM25CreationForTagPrediction(reader, sampleSize, userBased, resBased, beta);
295+
cfValues = startBM25CreationForTagPrediction(reader, sampleSize, userBased, resBased, beta, ignoreResource);
295296

296297
List<int[]> predictionValues = new ArrayList<int[]>();
297298
for (int i = 0; i < cfValues.size(); i++) {

0 commit comments

Comments
 (0)