Skip to content

Commit 9d54720

Browse files
committed
fixed lda topic creation
1 parent 6bf8095 commit 9d54720

22 files changed

Lines changed: 387 additions & 181 deletions

src/common/Bookmark.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,21 @@ public static List<Integer> getUsersFromResource(List<Bookmark> lines, int resID
224224
}
225225
return new ArrayList<Integer>(userList);
226226
}
227+
228+
public static List<Integer> getTagsFromResource(List<Bookmark> lines, int resID) {
229+
if (resID == -1) {
230+
return null;
231+
}
232+
Set<Integer> tagList = new HashSet<Integer>();
233+
for (Bookmark data : lines) {
234+
if (data.resID == resID) {
235+
for (int tagID : data.getTags()) {
236+
tagList.add(tagID);
237+
}
238+
}
239+
}
240+
return new ArrayList<Integer>(tagList);
241+
}
227242

228243
public static Map<Integer, Double> getResourcesFromUserWithBLL(List<Bookmark> trainData, List<Bookmark> testData, int userID, List<Map<Integer, Double>> bllValues) {
229244
Map<Integer, Double> resourceMap = new LinkedHashMap<Integer, Double>();

src/common/CooccurenceMatrix.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ License, or (at your option) any later version.
2525
import java.util.List;
2626
import java.util.Map;
2727
import java.util.Set;
28+
import java.util.TreeMap;
2829
import java.util.TreeSet;
2930

3031
import net.sf.javaml.clustering.mcl.SparseMatrix;
@@ -113,11 +114,25 @@ public Map<Integer, Double> getCooccurenceTags(Map<Integer, Integer> tags) {
113114
return resultTags;
114115
}
115116

116-
public Map<Integer, Double> calculateAssociativeComponentsWithTagAssosiation(Map<Integer, Double> sourceTags, Map<Integer, Double> destinationTags, boolean srcCount, boolean destCount) {
117-
Map<Integer, Double> associativeComponents = new LinkedHashMap<Integer, Double>();
117+
public Map<Integer, Double> calculateAssociativeComponentsWithTagAssosiation(Map<Integer, Double> sourceTags, Map<Integer, Double> destinationTags, boolean srcCount, boolean destCount, boolean onlyTopTags) {
118+
Map<Integer, Double> associativeComponents = new LinkedHashMap<Integer, Double>();
119+
Map<Integer, Double> destinationTagsCopy = new LinkedHashMap<Integer, Double>();
120+
if (onlyTopTags) {
121+
Map<Integer, Double> sortedDestinationTags = new TreeMap<Integer, Double>(new DoubleMapComparator(destinationTags));
122+
sortedDestinationTags.putAll(destinationTags);
123+
for (Map.Entry<Integer, Double> entry : sortedDestinationTags.entrySet()) {
124+
if (destinationTagsCopy.size() < 10) {
125+
destinationTagsCopy.put(entry.getKey(), entry.getValue());
126+
} else {
127+
break;
128+
}
129+
}
130+
} else {
131+
destinationTagsCopy.putAll(destinationTags);
132+
}
118133
if (sourceTags != null) {
119134
for (Map.Entry<Integer, Double> tag : sourceTags.entrySet()){
120-
associativeComponents.put(tag.getKey(), (srcCount ? tag.getValue() : 1.0) * this.calculateAssociativeComponent(tag.getKey(), destinationTags, destCount));
135+
associativeComponents.put(tag.getKey(), (srcCount ? tag.getValue() : 1.0) * this.calculateAssociativeComponent(tag.getKey(), destinationTagsCopy, destCount));
121136
}
122137
}
123138
return associativeComponents;

src/common/PredictionData.java

Lines changed: 88 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,23 @@ License, or (at your option) any later version.
2121
package common;
2222

2323
import java.util.ArrayList;
24+
import java.util.LinkedHashMap;
2425
import java.util.List;
2526
import java.util.Map;
2627

2728
public class PredictionData {
2829

2930
private int userID;
31+
private int resID;
3032
private int k;
3133
private List<String> realData;
3234
private List<String> predictionData;
3335

3436
private double numFoundRelevantDocs;
3537

36-
public PredictionData(int userID, List<String> realData, List<String> predictionData, int k) {
38+
public PredictionData(int userID, int resID, List<String> realData, List<String> predictionData, int k) {
3739
this.userID = userID;
40+
this.resID = resID;
3841
this.realData = realData;
3942
this.k = k;
4043
if (k == 0) {
@@ -102,6 +105,10 @@ public double getMAP() {
102105
return 0.0;
103106
}
104107

108+
public double getCoverage() {
109+
return (this.predictionData.size() > 0 ? 1.0 : 0.0);
110+
}
111+
105112
private double getPrecisionK(int k) {
106113
if (k != 0 && k <= this.predictionData.size()) {
107114
List<String> foundRelevantDocs = new ArrayList<String>(this.realData);
@@ -126,15 +133,16 @@ private void determineRelevantDocs() {
126133
}
127134

128135
// Resource-rec metrics
129-
private double getNovelty(int targetRes, List<Integer> resources, List<Map<Integer, Double>> resourceTopics) {
136+
private double getNovelty(int targetRes, List<Integer> resources, List<Map<Integer, Double>> resourceTopics, boolean cosine) {
130137
double novelty = 0.0;
131138

132139
int count = 0;
133140
Map<Integer, Double> targetTopics = resourceTopics.get(targetRes);
134141
for (int res : resources) {
135142
if (targetRes != res) {
136143
Map<Integer, Double> resTopics = resourceTopics.get(res);
137-
double disSim = 1.0 - Utilities.getCosineFloatSim(targetTopics, resTopics);
144+
double sim = (cosine ? Utilities.getCosineFloatSim(targetTopics, resTopics) : Utilities.getJaccardFloatSim(targetTopics, resTopics));
145+
double disSim = 1.0 - sim;
138146
novelty += disSim;
139147
count++;
140148
}
@@ -145,7 +153,7 @@ private double getNovelty(int targetRes, List<Integer> resources, List<Map<Integ
145153
return novelty / count;
146154
}
147155

148-
public double getDiversity(List<Map<Integer, Double>> resourceTopics) {
156+
public double getDiversity(List<Map<Integer, Double>> resourceTopics, boolean cosine) {
149157
double diversity = 0.0;
150158
if (this.predictionData == null || this.predictionData.size() == 0) {
151159
return diversity;
@@ -156,7 +164,7 @@ public double getDiversity(List<Map<Integer, Double>> resourceTopics) {
156164
predictionIDs.add(Integer.valueOf(res));
157165
}
158166
for (int resID : predictionIDs) {
159-
diversity += getNovelty(resID, predictionIDs, resourceTopics);
167+
diversity += getNovelty(resID, predictionIDs, resourceTopics, cosine);
160168
}
161169
return diversity / this.predictionData.size();
162170
}
@@ -172,11 +180,72 @@ public double getSerendipity(List<Map<Integer, Double>> resourceTopics, List<Int
172180

173181
for (String res : this.predictionData) {
174182
int resID = Integer.parseInt(res);
175-
serendipity += getNovelty(resID, knownResources, resourceTopics);
183+
serendipity += getNovelty(resID, knownResources, resourceTopics, true);
176184
}
177185
return serendipity / this.predictionData.size();
178186
}
179187

188+
public double getTagDiversity(List<Map<Integer, Double>> tagEntities) {
189+
double diversity = 0.0;
190+
if (this.predictionData == null || this.predictionData.size() == 0) {
191+
return diversity;
192+
}
193+
194+
List<Integer> predictionIDs = new ArrayList<Integer>();
195+
for (String res : this.predictionData) {
196+
predictionIDs.add(Integer.valueOf(res));
197+
}
198+
int k = predictionIDs.size();
199+
for (int i = 0; i < k; i++) {
200+
Map<Integer, Double> targetEntities = tagEntities.get(i);
201+
for (int j = i + 1; j < k; j++) {
202+
Map<Integer, Double> sourceEntities = tagEntities.get(j);
203+
diversity += (1.0 - Utilities.getJaccardFloatSim(targetEntities, sourceEntities));
204+
}
205+
}
206+
double normConstant = (k * k - k) / 2.0;
207+
if (normConstant > 0.0) {
208+
diversity /= normConstant;
209+
}
210+
return diversity;
211+
}
212+
213+
public double getTagSerendipity(Map<Integer, Integer> tagFrequencyMap, boolean cosine) {
214+
Double serendipity = 0.0;
215+
if (this.predictionData == null || this.predictionData.size() == 0) {
216+
return 0.0;
217+
}
218+
if (tagFrequencyMap == null || tagFrequencyMap.size() == 0) {
219+
return 1.0;
220+
}
221+
if (!cosine) {
222+
double i = 1.0;
223+
double maxIFF = Double.MIN_VALUE;
224+
for (String tag : this.predictionData) {
225+
int tagID = Integer.parseInt(tag);
226+
Integer tagCount = tagFrequencyMap.get(tagID);
227+
double iff = Math.log(((double)tagFrequencyMap.size() + 1.0) / ((tagCount == null ? 0.0 : tagCount.doubleValue()) + 1.0));
228+
double disc = 1.0 / Math.log(1.0 + i++);
229+
serendipity += (disc * iff);
230+
if (iff > maxIFF) {
231+
maxIFF = iff;
232+
}
233+
}
234+
double normConstant = 0.0;
235+
for (double j = 1.0; j <= this.predictionData.size(); j++) {
236+
normConstant += ((1.0 / Math.log(1.0 + j)) * maxIFF);
237+
}
238+
if (normConstant > 0.0) {
239+
serendipity /= normConstant;
240+
}
241+
if (serendipity.isInfinite() || serendipity.isNaN()) {
242+
serendipity = 1.0;
243+
}
244+
} else {
245+
serendipity = (1.0 - Utilities.getCosineSim(getPredictionDataAsMap(), tagFrequencyMap));
246+
}
247+
return serendipity;
248+
}
180249

181250
/**
182251
* Compute the normalized discounted cumulative gain (NDCG) of a list of ranked items.
@@ -235,11 +304,24 @@ public int getUserID() {
235304
return this.userID;
236305
}
237306

307+
public int getResID() {
308+
return this.resID;
309+
}
310+
238311
public List<String> getRealData() {
239312
return this.realData;
240313
}
241314

242315
public List<String> getPredictionData() {
243316
return this.predictionData;
244317
}
318+
319+
public Map<Integer, Integer> getPredictionDataAsMap() {
320+
Map<Integer, Integer> returnMap = new LinkedHashMap<Integer, Integer>();
321+
for (String data : this.predictionData) {
322+
int intVal = Integer.parseInt(data);
323+
returnMap.put(intVal, 1);
324+
}
325+
return returnMap;
326+
}
245327
}

src/common/Utilities.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,26 @@ public static List<Map<Integer, Double>> getUniqueTopicMaps(List<Bookmark> userL
137137
return resMaps;
138138
}
139139

140+
public static List<Map<Integer, Double>> getResourceMapsForTags(List<Bookmark> userLines) {
141+
List<Map<Integer, Double>> tagMaps = new ArrayList<Map<Integer, Double>>();
142+
for (Bookmark data : userLines) {
143+
for (int tagID : data.getTags()) {
144+
Map<Integer, Double> tagMap = null;
145+
if (tagID >= tagMaps.size()) {
146+
tagMap = new LinkedHashMap<Integer, Double>();
147+
tagMaps.add(tagMap);
148+
} else {
149+
tagMap = tagMaps.get(tagID);
150+
}
151+
if (tagMap != null) {
152+
Double count = tagMap.get(data.getWikiID());
153+
tagMap.put(data.getWikiID(), count == null ? 1.0 : count.doubleValue() + 1.0);
154+
}
155+
}
156+
}
157+
return tagMaps;
158+
}
159+
140160
public static List<int[]> createRandomBaseline(int from, int to, int count) {
141161
List<int[]> baseline = new ArrayList<int[]>();
142162

src/engine/BaseLevelLearningEngine.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ public synchronized Map<String, Double> getEntitiesWithLikelihood(String user, S
113113
}
114114
}
115115
if (algorithm == null || algorithm == Algorithm.BLLac || algorithm == Algorithm.BLLacMPr) {
116-
Map<Integer, Double> associativeValues = this.rMatrix.calculateAssociativeComponentsWithTagAssosiation(userCountMap, resCountMap, false, true);
116+
Map<Integer, Double> associativeValues = this.rMatrix.calculateAssociativeComponentsWithTagAssosiation(userCountMap, resCountMap, false, true, false);
117117
for (Map.Entry<Integer, Double> entry : associativeValues.entrySet()) {
118118
Double val = resultMap.get(entry.getKey());
119119
if (!filterTags.contains(entry.getKey())) {

src/file/BookmarkReader.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public class BookmarkReader {
3939

4040
private final int countLimit;
4141
private List<Bookmark> userLines;
42+
private List<Bookmark> testLines;
4243
private List<String> categories;
4344

4445
private List<String> tags;
@@ -57,6 +58,7 @@ public class BookmarkReader {
5758
public BookmarkReader(int countLimit, boolean stemming) {
5859
this.countLimit = countLimit;
5960
this.userLines = new ArrayList<Bookmark>();
61+
this.testLines = null;
6062
this.categories = new ArrayList<String>();
6163

6264
this.tags = new ArrayList<String>();
@@ -229,8 +231,12 @@ public List<Bookmark> getBookmarks() {
229231
return this.userLines;
230232
}
231233

232-
public void setUserLines(List<Bookmark> userLines) {
233-
this.userLines = userLines;
234+
public List<Bookmark> getTestLines() {
235+
return this.testLines;
236+
}
237+
238+
public void setTestLines(List<Bookmark> userLines) {
239+
this.testLines = userLines;
234240
}
235241

236242
public List<String> getCategories() {

src/file/BookmarkSplitter.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public void leaveLastOutSplit(String filename, boolean coldStart) {
9797
userIndex = 1;
9898
}
9999
if (userIndex++ == userSize) {
100-
if (coldStart || (!coldStart && userSize > 1)) {
100+
if (coldStart || userSize > 1) {
101101
testLines.add(data);
102102
} else {
103103
trainLines.add(data);
@@ -198,7 +198,8 @@ public static boolean writeSample(BookmarkReader reader, List<Bookmark> userSamp
198198
bw.write("\"" + bookmark.getTimestamp().replace("\"", "") + "\";\"");
199199
int i = 0;
200200
for (int tag : bookmark.getTags()) {
201-
bw.write(reader.getTags().get(tag).replace("\"", ""));
201+
//bw.write(reader.getTags().get(tag).replace("\"", "")); // enable if you need tag strings!
202+
bw.write(Integer.toString(tag));
202203
if (++i < bookmark.getTags().size()) {
203204
bw.write(',');
204205
}
@@ -232,14 +233,14 @@ public static boolean writeSample(BookmarkReader reader, List<Bookmark> userSamp
232233

233234
public static void splitSample(String filename, String sampleName, int count, int percentage, boolean tagRec) {
234235
BookmarkReader reader = new BookmarkReader(0, false);
235-
reader.readFile(filename);
236-
BookmarkSplitter splitter = new BookmarkSplitter(reader);
236+
reader.readFile(filename);
237237
Collections.sort(reader.getBookmarks());
238+
BookmarkSplitter splitter = new BookmarkSplitter(reader);
238239
for (int i = 1; i <= count; i++) {
239240
if (percentage > 0) {
240241
splitter.leavePercentageOutSplit(sampleName, percentage, true, null, tagRec);
241242
} else {
242-
splitter.leaveLastOutSplit(sampleName, true);
243+
splitter.leaveLastOutSplit(sampleName, false);
243244
}
244245
}
245246
}

src/file/PredictionFileReader.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public boolean readFile(String filename, int k, BookmarkReader wikiReader, Integ
7575
if (lineParts.length > 2) {
7676
List<String> predictionData = Arrays.asList(lineParts[2].split(", "));
7777
if (predictionData.size() > 0) {
78-
PredictionData data = new PredictionData(userID, realData, predictionData, k);
78+
PredictionData data = new PredictionData(userID, resID, realData, predictionData, k);
7979
this.predictions.add(data);
8080
this.predictionCount++;
8181
} else {
@@ -113,7 +113,7 @@ public boolean readMyMediaLiteFile(String filename, int k, int trainSize, Bookma
113113
continue; // skip invalid line
114114
}
115115

116-
int userID = -1;
116+
int userID = -1, resID = -1;
117117
try {
118118
userID = Integer.parseInt(lineParts[0]);
119119
} catch (Exception e) {
@@ -145,7 +145,7 @@ public boolean readMyMediaLiteFile(String filename, int k, int trainSize, Bookma
145145
for (String predictionString : predictionStringData) {
146146
predictionData.add(predictionString.substring(0, predictionString.indexOf(":")));
147147
}
148-
PredictionData data = new PredictionData(userID, realData, predictionData, k);
148+
PredictionData data = new PredictionData(userID, resID, realData, predictionData, k);
149149
this.predictions.add(data);
150150
this.predictionCount++;
151151
} else {

src/file/PredictionFileWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public boolean writeFile(String filename) {
5151
int j = 0;
5252
String resultString = "";
5353
int[] userResults = this.results.get(i);
54-
Bookmark userData = this.reader.getBookmarks().get(i);
54+
Bookmark userData = this.reader.getTestLines().get(i);
5555
List<Integer> userCats = userData.getTags();
5656

5757
resultString += (userData.getUserID() + (userData.getWikiID() == -1 ? "" : "-" + userData.getWikiID()) + "|");

src/file/preprocessing/CoreFiltering.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public BookmarkReader filterOrphansIterative(int userLevel, int resLevel, int ta
9292
}
9393

9494
System.out.println("Kept lines: " + keepData.size());
95-
this.reader.setUserLines(keepData);
95+
this.reader.setTestLines(keepData);
9696
return this.reader;
9797
}
9898
}

0 commit comments

Comments
 (0)