Skip to content

Commit 45535e8

Browse files
committed
added methods for music recommendations
1 parent 6940873 commit 45535e8

14 files changed

Lines changed: 521 additions & 63 deletions

src/common/MapUtil.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,16 @@ public static void normalizeMap(Map<Integer, Double> map) {
3333
e.setValue(Math.exp(e.getValue()) / denom);
3434
}
3535
}
36+
37+
public static void normalizeMap(Map<Integer, Double> map, double beta) {
38+
double denom = 0.0;
39+
for (Map.Entry<Integer, Double> e : map.entrySet()) {
40+
denom += Math.exp(e.getValue());
41+
denom += e.getValue();
42+
}
43+
for (Map.Entry<Integer, Double> e : map.entrySet()) {
44+
e.setValue(Math.exp(e.getValue()) / denom * beta);
45+
e.setValue(e.getValue() / denom * beta);
46+
}
47+
}
3648
}

src/common/PredictionData.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,13 @@ License, or (at your option) any later version.
2121
package common;
2222

2323
import java.util.ArrayList;
24+
import java.util.Collections;
2425
import java.util.LinkedHashMap;
2526
import java.util.List;
2627
import java.util.Map;
2728

29+
import file.BookmarkReader;
30+
2831
public class PredictionData {
2932

3033
private int userID;
@@ -244,6 +247,32 @@ public double getTagSerendipity(Map<Integer, Integer> tagFrequencyMap, boolean c
244247
return serendipity;
245248
}
246249

250+
public double getTagNovelty(Map<Integer, Integer> popMap, BookmarkReader reader) {
251+
double novelty = 0.0;
252+
if (this.predictionData.size() == 0 || popMap == null) {
253+
System.out.println("Error while calculating novelty");
254+
return 1.0;
255+
}
256+
// normalization constant
257+
double popMax = Collections.max(popMap.values()) + 1.0;
258+
popMax = Math.log(popMax) / Math.log(2);
259+
260+
for (String tag : this.predictionData) {
261+
Integer tagID = reader.getTagMap().get(tag);
262+
if (tagID == null) {
263+
System.out.println("Novelty: Tag not found");
264+
}
265+
double pop = 1.0;
266+
if (popMap.containsKey(tagID)) {
267+
pop += popMap.get(tagID);
268+
}
269+
//pop = pop / popMax * (-1);
270+
pop = Math.log(pop) / Math.log(2);
271+
novelty += (pop / popMax);
272+
}
273+
return 1.0 - novelty / this.predictionData.size();
274+
}
275+
247276
/**
248277
* Compute the normalized discounted cumulative gain (NDCG) of a list of ranked items.
249278
*

src/common/Utilities.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ License, or (at your option) any later version.
4545

4646
public class Utilities {
4747

48+
public static int REC_LIMIT = 20;
49+
4850
private final static String REV_START = "<rev xml:space=\"preserve\">";
4951
private final static String REV_END = "</rev>";
5052

@@ -70,6 +72,14 @@ public static boolean isEntityEvaluated(BookmarkReader reader, int id, Integer m
7072
return false;
7173
}
7274

75+
public static Map<Integer, Integer> getPopMap(BookmarkReader reader) {
76+
Map<Integer, Integer> countMap = new LinkedHashMap<Integer, Integer>();
77+
for (int i = 0; i < reader.getTagCounts().size(); i++) {
78+
countMap.put(i, reader.getTagCounts().get(i));
79+
}
80+
return countMap;
81+
}
82+
7383
public static List<Map<Integer, Integer>> getUserMaps(List<Bookmark> userLines) {
7484
List<Map<Integer, Integer>> userMaps = new ArrayList<Map<Integer, Integer>>();
7585
for (Bookmark data : userLines) {
@@ -82,6 +92,19 @@ public static List<Map<Integer, Integer>> getUserMaps(List<Bookmark> userLines)
8292
}
8393
return userMaps;
8494
}
95+
96+
public static List<Map<Integer, Double>> getFloatUserMaps(List<Bookmark> userLines) {
97+
List<Map<Integer, Double>> userMaps = new ArrayList<Map<Integer, Double>>();
98+
for (Bookmark data : userLines) {
99+
int userID = data.getUserID();
100+
if (userID >= userMaps.size()) {
101+
userMaps.add(Utilities.mergeFloatListWithMap(data.getTags(), new LinkedHashMap<Integer, Double>()));
102+
} else {
103+
Utilities.mergeFloatListWithMap(data.getTags(), userMaps.get(userID));
104+
}
105+
}
106+
return userMaps;
107+
}
85108

86109
public static List<Map<Integer, Integer>> getResMaps(List<Bookmark> userLines) {
87110
List<Map<Integer, Integer>> resMaps = new ArrayList<Map<Integer, Integer>>();
@@ -234,6 +257,14 @@ public static Map<Integer, Integer> mergeListWithMap(List<Integer> from, Map<Int
234257
}
235258
return to;
236259
}
260+
261+
public static Map<Integer, Double> mergeFloatListWithMap(List<Integer> from, Map<Integer, Double> to) {
262+
for (Integer value : from) {
263+
Double count = to.get(value);
264+
to.put(value, (count != null ? count + 1 : 1));
265+
}
266+
return to;
267+
}
237268

238269
public static Map<Integer, Double> mergeProbMaps(BookmarkReader reader, Map<Integer, Double> from,
239270
Map<Integer, Double> to, double lambda) {
@@ -484,15 +515,19 @@ public static double getJaccardFloatSim(Map<Integer, Double> targetMap, Map<Inte
484515
public static double getCosineSim(Map<Integer, Integer> targetMap, Map<Integer, Integer> nMap) {
485516
Set<Integer> both = new HashSet<Integer>(targetMap.keySet());
486517
both.retainAll(nMap.keySet());
518+
if (both.size() == 0) {
519+
return 0.0;
520+
}
487521
double scalar = 0.0, norm1 = 0.0, norm2 = 0.0;
488522
for (int k : both)
489523
scalar += (targetMap.get(k) * nMap.get(k));
490524
for (int k : targetMap.keySet())
491525
norm1 += (targetMap.get(k) * targetMap.get(k));
492526
for (int k : nMap.keySet())
493527
norm2 += (nMap.get(k) * nMap.get(k));
494-
if (Math.sqrt(norm1 * norm2) == 0.0)
528+
if (Math.sqrt(norm1 * norm2) == 0.0) {
495529
return 0.0;
530+
}
496531
return scalar / Math.sqrt(norm1 * norm2);
497532
}
498533

src/file/BookmarkReader.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,14 @@ private boolean doReadFile(String path, String filename) {
117117
processUserData(userID, userData, tags, categories, resID);
118118
//}
119119
// reset userdata
120-
userID = lineParts[0].replace("\"", "");
121-
resID = lineParts[1].replace("\"", "");
122-
timestamp = lineParts[2].replace("\"", "");
120+
userID = lineParts[0].replace("\"", "").trim();
121+
resID = lineParts[1].replace("\"", "").trim();
122+
timestamp = lineParts[2].replace("\"", "").trim();
123123
userData = new Bookmark(-1, -1, timestamp);
124124
categories.clear();
125125
tags.clear();
126126
for (String tag : lineParts[3].replace("\"", "").split(",")) {
127-
String stemmedTag = tag.toLowerCase();
127+
String stemmedTag = tag.trim().toLowerCase();
128128
if (!stemmedTag.isEmpty() && !tags.contains(stemmedTag)) {
129129
if (this.stemmer != null) {
130130
this.stemmer.setCurrent(stemmedTag);
@@ -140,21 +140,23 @@ private boolean doReadFile(String path, String filename) {
140140
//if (cat.contains("_")) {
141141
// categories.add(cat.substring(0, cat.indexOf("_")).toLowerCase());
142142
//} else {
143-
categories.add(cat.toLowerCase());
143+
categories.add(cat.trim().toLowerCase());
144144
//}
145145
}
146146
}
147147
}
148148

149-
//if (lineParts.length > 5) { // is there a rating?
150-
// try {
151-
// userData.setRating(Double.parseDouble(lineParts[5].replace("\"", "")));
152-
// } catch (Exception e) { /* do nothing */ }
153-
//}
149+
if (lineParts.length > 5) { // is there a rating?
150+
try {
151+
userData.setRating(Double.parseDouble(lineParts[5].replace("\"", "")));
152+
} catch (Exception e) {
153+
//System.out.println("Rating parse error");
154+
}
155+
}
154156

155157
// TODO ----------------------
156158
// extend common/Bookmark class with fields for title (= lineParts[6]) and description (= lineParts[7])
157-
//if (lineParts.length > 6) { // is there a rating?
159+
//if (lineParts.length > 6) { // is there a title or description?
158160
// try {
159161
// userData.setTitle(lineParts[6].replace("\"", ""));
160162
// } catch (Exception e) { /* do nothing */ }

src/file/PredictionFileReader.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ public boolean readFile(String filename, int k, BookmarkReader wikiReader, Integ
7474
if (parts.length > 1) {
7575
resID = Integer.parseInt(parts[1]);
7676
}
77-
if (!Utilities.isEntityEvaluated(wikiReader, userID, minBookmarks, maxBookmarks, false) || !Utilities.isEntityEvaluated(wikiReader, resID, minResBookmarks, maxResBookmarks, true)) {
78-
continue; // skip this user if it shoudln't be evaluated - # bookmarks case
79-
}
77+
//if (!Utilities.isEntityEvaluated(wikiReader, userID, minBookmarks, maxBookmarks, false) || !Utilities.isEntityEvaluated(wikiReader, resID, minResBookmarks, maxResBookmarks, true)) {
78+
// continue; // skip this user if it shoudln't be evaluated - # bookmarks case
79+
//}
8080
if (categorizer != null) {
8181
if (!categorizer.evaluate(userID)) {
8282
continue; // skip this user if it shoudln't be evaluated - categorizer case

src/file/PredictionFileWriter.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ License, or (at your option) any later version.
3434

3535
public class PredictionFileWriter {
3636

37-
private static final int OUTPUT_LIMIT = 10;
37+
private static final int OUTPUT_LIMIT = 30;
3838

3939
private BookmarkReader reader;
4040
private List<int[]> results;
@@ -63,11 +63,12 @@ public boolean writeFile(String filename) {
6363
}
6464
List<Integer> userTags = userData.getTags();
6565

66-
resultString += (userData.getUserID() + (userData.getResourceID() == -1 ? "" : "-" + userData.getResourceID()) + "|");
66+
resultString += (this.reader.getUsers().get(userData.getUserID()) + "|");
67+
//resultString += (userData.getUserID() + (userData.getResourceID() == -1 ? "" : "-" + userData.getResourceID()) + "|");
6768
for (int c : userTags) {
6869
//if (j++ < OUTPUT_LIMIT) {
69-
//resultString += (categories.get(c) + ", ");
70-
resultString += (c + ", ");
70+
//resultString += (c + ", ");
71+
resultString += (this.reader.getTags().get(c) + ", ");
7172
//} else {
7273
// break;
7374
//}
@@ -80,7 +81,8 @@ public boolean writeFile(String filename) {
8081
j = 0;
8182
for (int c : userResults) {
8283
if (j++ < OUTPUT_LIMIT) {
83-
resultString += (c + ", ");
84+
//resultString += (c + ", ");
85+
resultString += (this.reader.getTags().get(c) + ", ");
8486
} else {
8587
break;
8688
}

src/processing/BLLCalculator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ License, or (at your option) any later version.
4444

4545
public class BLLCalculator {
4646

47-
private final static int REC_LIMIT = 10;
47+
private final static int REC_LIMIT = Utilities.REC_LIMIT;
4848

4949
private BookmarkReader reader;
5050
private double dVal;

src/processing/MPCalculator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,9 @@ public static BookmarkReader predictPopularTags(String filename, int trainSize,
107107

108108
List<int[]> values = null;
109109
if (mp) {
110-
values = getPopularTags(reader, sampleSize, 10);
110+
values = getPopularTags(reader, sampleSize, Utilities.REC_LIMIT);
111111
} else {
112-
values = getPerfectTags(reader, sampleSize, 10);
112+
values = getPerfectTags(reader, sampleSize, Utilities.REC_LIMIT);
113113
}
114114

115115
reader.setTestLines(reader.getBookmarks().subList(trainSize, reader.getBookmarks().size()));

src/processing/MPurCalculator.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ License, or (at your option) any later version.
4040

4141
public class MPurCalculator {
4242

43-
private final static int REC_LIMIT = 10;
43+
private final static int REC_LIMIT = Utilities.REC_LIMIT;
4444

4545
private BookmarkReader reader;
4646
private double beta;
@@ -87,7 +87,8 @@ public Map<Integer, Double> getRankedTagList(int userID, int resID, boolean sort
8787
if (this.userBased && this.userMaps != null && userID < this.userMaps.size()) {
8888
Map<Integer, Integer> userMap = this.userMaps.get(userID);
8989
for (Map.Entry<Integer, Integer> entry : userMap.entrySet()) {
90-
double userVal = this.beta * (Math.exp(entry.getValue().doubleValue()) / this.userDenoms.get(userID));
90+
//double userVal = this.beta * (Math.exp(entry.getValue().doubleValue()) / this.userDenoms.get(userID));
91+
double userVal = entry.getValue().doubleValue();
9192
resultMap.put(entry.getKey(), userVal);
9293
}
9394
}

src/processing/MetricsCalculator.java

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,34 +66,37 @@ public class MetricsCalculator {
6666

6767
public MetricsCalculator(PredictionFileReader reader, String outputFile, int k, BookmarkReader bookmarkReader, boolean recommTags) {
6868
this.reader = reader;
69-
if (recommTags) { // TODO: check
70-
this.bookmarkReader = null;
71-
}
69+
this.bookmarkReader = bookmarkReader;
70+
//if (recommTags) { // TODO: check
71+
// this.bookmarkReader = null;
72+
//}
7273
BufferedWriter bw = null;
73-
//TODO: Enable if you need data for statistical tests
74-
75-
if ((recommTags && (k == 5 || k == 10)) || (!recommTags && k == 20)) {
74+
//TODO: Enable if you need data for statistical tests
75+
if ((recommTags && (k == 5 || k == 10 || k == 20)) || (!recommTags && k == 20)) {
7676
try {
7777
FileWriter writer = new FileWriter(new File(outputFile + "_" + k + ".txt"), true);
7878
bw = new BufferedWriter(writer);
7979
} catch (Exception e) {
8080
e.printStackTrace();
8181
}
8282
}
83-
84-
83+
8584
double count = this.reader.getPredictionCount(); // only user where there are recommendations
8685
//double count = this.reader.getPredictionData().size(); // all users
8786
double recall = 0.0, precision = 0.0, mrr = 0.0, fMeasure = 0.0, map = 0.0, nDCG = 0.0, diversity = 0.0, serendipity = 0.0;
8887

8988
List<Map<Integer, Double>> entityFeatures = null;
9089
List<Map<Integer, Integer>> tagCountMaps = null;
90+
Map<Integer, Integer> popMap = null;
9191
List<Bookmark> trainList = null;
9292
if (this.bookmarkReader != null) {
9393
trainList = this.bookmarkReader.getBookmarks().subList(0, this.bookmarkReader.getCountLimit());
9494
if (recommTags) {
95-
tagCountMaps = Utilities.getResMaps(trainList);
96-
entityFeatures = Utilities.getResourceMapsForTags(trainList);
95+
tagCountMaps = Utilities.getUserMaps(trainList);
96+
popMap = Utilities.getPopMap(this.bookmarkReader);
97+
// TODO: old version from RecSys
98+
//tagCountMaps = Utilities.getResMaps(trainList);
99+
//entityFeatures = Utilities.getResourceMapsForTags(trainList);
97100
} else {
98101
entityFeatures = Utilities.getUniqueTopicMaps(trainList, true); // TODO: check regarding unique!
99102
}
@@ -125,13 +128,18 @@ public MetricsCalculator(PredictionFileReader reader, String outputFile, int k,
125128
double cDiversity = 0.0, cSerendipity = 0.0;
126129
if (this.bookmarkReader != null) {
127130
if (recommTags) {
131+
cDiversity = data.getTagNovelty(popMap, bookmarkReader);
132+
int userID = this.bookmarkReader.getUserMap().get(Integer.toString(data.getUserID()));
133+
cSerendipity = data.getTagNovelty(tagCountMaps.get(userID), bookmarkReader);
134+
/* old version from recsys paper
128135
cDiversity = data.getTagDiversity(entityFeatures);
129136
if (data.getResID() < tagCountMaps.size() && data.getResID() != -1) {
130137
Map<Integer, Integer> tagCountMap = tagCountMaps.get(data.getResID());
131138
cSerendipity = data.getTagSerendipity(tagCountMap, false);
132139
} else {
133140
cSerendipity = 1.0;
134141
}
142+
*/
135143
} else {
136144
List<Integer> knownEntities = Bookmark.getResourcesFromUser(trainList, data.getUserID());
137145
cDiversity = data.getDiversity(entityFeatures, true);

0 commit comments

Comments
 (0)