-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathfeed.js
More file actions
355 lines (353 loc) · 12.4 KB
/
feed.js
File metadata and controls
355 lines (353 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
//feed.js
var xml2js = require('xml2js'),
_ = require('underscore'),
request = require('request'),
URL = require('url'),
Iconv = require('iconv').Iconv;
/**
All you need to do is send a feed URL that can be opened via fs
Options are optional, see xml2js for extensive list
And a callback of course
The returned formats will be structurally the same, but you should still check the 'format' property
**/
function parseURL(feedURL, options, callback) {
if (typeof options === 'function' && !callback) {
callback = options;
options = {};
}
var defaults = {
uri: feedURL,
jar: false,
proxy: false,
followRedirect: true,
timeout: 1000 * 30,
encoding: null,
rssEncoding: 'UTF-8'
};
options = _.extend(defaults, options);
//check that the protocal is either http or https
var u = URL.parse(feedURL);
if (u.protocol === 'http:' || u.protocol === 'https:') {
//make sure to have a 30 second timeout
var req = request(options, function(err, response, xml) {
if (err || xml === null) {
if (err) {
callback(err, null);
} else {
callback('Failed to retrieve source!', null);
}
} else {
if ((typeof response !== "undefined" && response !== null ? response.statusCode : void 0) != null) {
if (response.statusCode >= 400) {
callback("Failed to retrieve source! Invalid response code (" + response.statusCode + ")!", null);
} else {
var iconv = new Iconv(options.rssEncoding, 'UTF-8');
xml = iconv.convert(xml).toString();
parseString(xml, options, callback);
}
} else {
callback("Failed to retrieve source! No response code!!", null);
}
}
});
} else {
callback({
error: "Only http or https protocols are accepted"
}, null);
}
}
module.exports.parseURL = parseURL;
function parseString(xml, options, callback) {
// we need to check that the input in not a null input
if (xml.split('<').length >= 3) {
var parser = new xml2js.Parser({
trim: false,
normalize: true,
mergeAttrs: true
});
parser.addListener('end', function(jsonDOM) {
if (jsonDOM) {
//console.log(jsonDOM.rss.channel[0]);
jsonDOM = normalize(jsonDOM);
var err, output;
if (isRSS(jsonDOM)) {
output = formatRSS(jsonDOM, options);
} else {
output = formatATOM(jsonDOM, options);
}
callback(null, output);
} else {
callback("failed to parse xml", null);
}
});
parser.addListener("error", function(err) {
callback(err, null);
});
parser.parseString(xml);
} else {
callback('malformed xml', null);
}
}
module.exports.parseString = parseString;
//detects if RSS, otherwise assume atom
function isRSS(json) {
return (json.channel != null);
}
// normalizes input to make feed burner work
function normalize(json) {
if (json.rss) {
return json.rss;
}
return json;
}
//xml2js will return commented material in a # tag which can be a pain
//this will remove the # tag and set its child text in it's place
//ment to work on a feed item, so will iterate over json's and check
function flattenComments(json) {
for (key in json) {
if (json[key]['#']) {
json[key] = json[key]['#'];
}
}
return json;
}
//formats the RSS feed to the needed outpu
//also parses FeedBurner
function formatRSS(json, options) {
var output = {
'type': 'rss',
items: []
};
var channel = json.channel;
if (_.isArray(json.channel)) {
channel = json.channel[0];
}
if (channel.title) {
output.title = channel.title[0];
}
if (channel.description) {
output.description = channel.description[0];
}
if (channel.link) {
output.url = channel.link[0];
}
if (channel.lastBuildDate) {
output.last_modified = channel.lastBuildDate[0];
}
if (channel.pubDate) {
output.update = channel.pubDate[0];
}
if (channel.ttl) {
output.ttl = channel.ttl[0];
}
if (channel['itunes:image']) {
output.img = channel['itunes:image'][0].href;
}
//ok, now lets get into the meat of the feed
//just double check that it exists
if (channel.item) {
if (!_.isArray(channel.item)) {
channel.item = [channel.item];
}
_.each(channel.item, function(val, index) {
val = flattenComments(val);
var obj = {}, _ref;
//Tx PaulFreund
if ((options || {}).pipeOriginal) {
obj.original = val;
}
obj.title = (_ref = val.title) != undefined && _ref.length > 0 ? _ref[0] : void 0;
obj.summary = (_ref = val.description) != undefined && _ref.length > 0 ? _ref[0] : void 0;
obj.url = (_ref = val.link) != undefined && _ref.length > 0 ? _ref[0] : void 0;
obj.categories = (_ref = val.category) != undefined && _ref.length > 0 ? _ref[0] : void 0;
// Put the comments instead of the description if there is no description
if (!(obj.summary != null) || obj.summary === '') {
obj.summary = (_ref = (val.comments && val.comments[0])) ? _ref : '';
}
//since we are going to format the date, we want to make sure it exists
if (val.pubDate) {
//lets try basis js date parsing for now
obj.published_at = Date.parse(val.pubDate[0]);
obj.time_ago = DateHelper.time_ago_in_words(obj.published_at);
}
///wordpress author
if (val['dc:creator']) {
obj.author = val['dc:creator'][0];
}
if (val.author) {
obj.author = val.author[0];
}
//now lets handle the GUID
if (val.guid) {
//xml2js parses this kina odd...
var link = val.guid[0]._;
var param = val.guid[0].isPermaLink;
var isPermaLink = true;
obj.guid = {
'link': link,
isPermaLink: param
};
}
if (val['media:content']) {
obj.media = val.media || {};
obj.media.content = val['media:content'];
}
if (val['media:thumbnail']) {
obj.media = val.media || {};
obj.media.thumbnail = val['media:thumbnail'];
}
//now push the obj onto the stack
output.items.push(obj);
});
}
return output;
}
//formats the ATOM feed to the needed output
function formatATOM(json, options) {
var output = {
'type': 'atom',
items: []
};
var channel = json.feed || json;
if (channel.title) {
output.title = channel.title[0]._;
}
if (channel.subtitle)
if (_.isArray(channel.subtitle)) {
if (channel.subtitle[0]._) {
output.desc = channel.subtitle[0]._;
}
} else {
output.desc = channel.subtitle;
}
if (channel.link)
if (_.isArray(channel.link)) {
_.each(channel.link, function(val, index) {
if (val.type && val.type.indexOf("html") > 0) {
output.link = val.href;
}
if (val.rel === "hub") {
output.hub = val.href;
}
});
}
if (channel.id) {
output.id = channel.id[0];
}
if (channel.updated) {
output.last_modified = new Date(channel.updated[0]).toString();
}
if (channel.author) {
output.author = channel.author[0].name[0];
}
//just double check that it exists and that it is an array
if (channel.entry) {
if (!_.isArray(channel.entry)) {
channel.entry = [channel.entry];
}
_.each(channel.entry, function(val, index) {
val = flattenComments(val);
var obj = {}, _ref;
if ((options || {}).pipeOriginal) {
obj.original = val;
}
obj.id = val.id[0];
obj.title = (_ref = val.title) != undefined && _ref.length > 0 ? _ref[0]._ : void 0;
obj.summary = (_ref = val.content[0]) != undefined && _ref.length > 0 ? _ref[0]._ : void 0;
var categories = [];
//just grab the category text
if (val.category) {
if (_.isArray(val.category)) {
_.each(val.category, function(val, i) {
categories.push(val['term']);
});
} else {
categories.push(val.category);
}
}
obj.category = categories;
var link = '';
//just get the alternate link
if (val.link) {
if (_.isArray(val.link)) {
_.each(val.link, function(val, i) {
if (val.rel === 'self') {
link = val.href;
}
});
} else {
link = val.link.href;
}
}
obj.link = link;
//since we are going to format the date, we want to make sure it exists
if (val.published) {
//lets try basis js date parsing for now
obj.published_at = Date.parse(val.published[0]);
obj.time_ago = DateHelper.time_ago_in_words(obj.published_at);
}
if (val['media:content']) {
obj.media = val.media || {};
obj.media.content = val['media:content'];
}
if (val['media:thumbnail']) {
obj.media = val.media || {};
obj.media.thumbnail = val['media:thumbnail'];
}
//now push the obj onto the stack
output.items.push(obj);
});
}
return output;
}
var DateHelper = {
// Takes the format of "Jan 15, 2007 15:45:00 GMT" and converts it to a relative time
// Ruby strftime: %b %d, %Y %H:%M:%S GMT
time_ago_in_words_with_parsing: function(from) {
var date = new Date();
date.setTime(Date.parse(from));
return this.time_ago_in_words(date);
},
// Takes a timestamp and converts it to a relative time
// DateHelper.time_ago_in_words(1331079503000)
time_ago_in_words: function(from) {
return this.distance_of_time_in_words(new Date(), from);
},
distance_of_time_in_words: function(to, from) {
var distance_in_seconds = ((to - from) / 1000);
var distance_in_minutes = Math.floor(distance_in_seconds / 60);
var tense = distance_in_seconds < 0 ? " from now" : " ago";
distance_in_minutes = Math.abs(distance_in_minutes);
if (distance_in_minutes === 0) {
return 'less than a minute' + tense;
}
if (distance_in_minutes === 1) {
return 'a minute' + tense;
}
if (distance_in_minutes < 45) {
return distance_in_minutes + ' minutes' + tense;
}
if (distance_in_minutes < 90) {
return 'about an hour' + tense;
}
if (distance_in_minutes < 1440) {
return 'about ' + Math.floor(distance_in_minutes / 60) + ' hours' + tense;
}
if (distance_in_minutes < 2880) {
return 'a day' + tense;
}
if (distance_in_minutes < 43200) {
return Math.floor(distance_in_minutes / 1440) + ' days' + tense;
}
if (distance_in_minutes < 86400) {
return 'about a month' + tense;
}
if (distance_in_minutes < 525960) {
return Math.floor(distance_in_minutes / 43200) + ' months' + tense;
}
if (distance_in_minutes < 1051199) {
return 'about a year' + tense;
}
return 'over ' + Math.floor(distance_in_minutes / 525960) + ' years';
}
};