|
1 | 1 | #! /usr/bin/env node |
2 | | - |
3 | | -// here we will convert the search.sh script into JS |
| 2 | +import _ from 'lodash'; |
| 3 | +import fs from 'fs'; |
4 | 4 | import { format, sub } from 'date-fns'; |
5 | | -import fs from 'fs/promises'; |
6 | | -import { download, search, sendOutput } from './cli.js'; |
7 | | - |
8 | | -const CATEGORY_ID = 'alzheimers-disease'; |
9 | | -const DATA_DIRECTORY = 'example-data'; |
10 | | - |
11 | | -const MEDRXIV_SOURCE = 'medrxiv'; |
12 | | -const BIORXIV_SOURCE = 'biorxiv'; |
13 | | - |
14 | | -const now = new Date(); |
15 | | -const startOffset = { days: 1 }; |
16 | | -const START_DATE = format(sub(now, startOffset), 'yyyy-MM-dd'); |
17 | | -const END_DATE = format(now, 'yyyy-MM-dd'); |
18 | | - |
19 | | -const BIORXIV_FILE = `${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json`; |
20 | | -const MEDRXIV_FILE = `${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json`; |
21 | | -const COMBINED_FILE = `${DATA_DIRECTORY}/${END_DATE}.json`; |
22 | | -const OUTPUT_FILE = `${DATA_DIRECTORY}/${CATEGORY_ID}.json`; |
23 | | - |
24 | | -// Getting all latest articles from BiorXiv |
25 | | -console.log(`Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); |
26 | | -fs.open(BIORXIV_FILE, 'w'); |
27 | | -const bioOptions = { |
28 | | - source: BIORXIV_SOURCE, |
29 | | - output: BIORXIV_FILE |
30 | | -}; |
31 | | -const bioData = await download(START_DATE, END_DATE, bioOptions); |
32 | | - |
33 | | -// Getting all latest articles from MedrXiv |
34 | | -console.log(`Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}`); |
35 | | -fs.open(MEDRXIV_FILE, 'w'); |
36 | | -const medOptions = { |
37 | | - source: MEDRXIV_SOURCE, |
38 | | - output: MEDRXIV_FILE |
39 | | -}; |
40 | | -const medData = await download(START_DATE, END_DATE, medOptions); |
41 | | - |
42 | | -// Creating a JSON with all the results, both sources combined |
43 | | -console.log('Combining results...'); |
44 | | -fs.open(COMBINED_FILE, 'w'); |
45 | | -const combinedData = bioData.concat(medData); |
46 | | -const combinedOptions = { |
47 | | - output: COMBINED_FILE |
48 | | -}; |
49 | | -await sendOutput(combinedData, combinedOptions); |
50 | | - |
51 | | -// Search for the QUERY keyword in all the downloaded articles & compile the related articles |
52 | | -const QUERY = 'alzheimer'; |
53 | | -fs.open(OUTPUT_FILE, 'w'); |
54 | | -const outputOptions = { |
55 | | - input: COMBINED_FILE, |
56 | | - output: OUTPUT_FILE |
57 | | -}; |
58 | | -console.log(`Searching for ${QUERY}`); |
59 | | -const searchHits = await search(QUERY, outputOptions); |
60 | | -const numSearchHits = searchHits.length; |
61 | | -console.log(`Found ${numSearchHits} hits`); |
| 5 | +import { download } from './download.js'; |
| 6 | +import { Search } from './search.js'; |
| 7 | +import { writeFormattedJSON } from './cli.js'; |
| 8 | + |
| 9 | +/** |
| 10 | + * Download preprint data from BiorXiv and MedrXiv servers and perform search for preprints in each topic. |
| 11 | + * @returns {collection}, a JSON array of search results for each set topic. |
| 12 | + */ |
| 13 | +export async function getData () { |
| 14 | + // Set dates for past month |
| 15 | + const now = new Date(); |
| 16 | + const startOffset = { months: 1 }; |
| 17 | + const start = format(sub(now, startOffset), 'yyyy-MM-dd'); |
| 18 | + const end = format(now, 'yyyy-MM-dd'); |
| 19 | + |
| 20 | + // Reading config file for list of topics |
| 21 | + const config = JSON.parse(fs.readFileSync('example-data/data-config.json')); |
| 22 | + |
| 23 | + // Download all recent papers & combine the arrays |
| 24 | + const data = await Promise.all([ |
| 25 | + download('biorxiv', start, end), |
| 26 | + download('medrxiv', start, end) |
| 27 | + ]); |
| 28 | + |
| 29 | + const articles = _.flatten(data); |
| 30 | + |
| 31 | + // Search using list of topic objects from config |
| 32 | + const searcher = new Search(); |
| 33 | + |
| 34 | + await searcher.articles(articles); |
| 35 | + const doSearches = async config => { |
| 36 | + const { keywords } = config; |
| 37 | + const papers = await searcher.search(keywords, { |
| 38 | + combineWith: 'AND' |
| 39 | + }); |
| 40 | + return _.assign({}, config, { papers }); |
| 41 | + }; |
| 42 | + const collection = await Promise.all(config.map(doSearches)); |
| 43 | + |
| 44 | + // Output all search result papers into data.json |
| 45 | + await writeFormattedJSON(collection, 'example-data/data.json'); |
| 46 | +} |
| 47 | + |
| 48 | +getData(); |
0 commit comments