Skip to content

Commit eec7706

Browse files
authored
Merge pull request #28 from PathwayCommons/debug-scripts
Updated data scripts
2 parents a455c9d + 01b66b0 commit eec7706

2 files changed

Lines changed: 15 additions & 17 deletions

File tree

scripts/recommendations.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ PAPER_RECOMMENDATIONS_PATH=papers/forpaper/
1818
PAPER_RECOMMENDATIONS_LIMIT=500
1919
PAPER_RECOMMENDATIONS_FIELDS=title,year,externalIds,venue,authors,abstract
2020

21+
printf "START: recommendations \n\n"
22+
2123
for i in ${!CATEGORY_IDS[@]}; do
2224
CATEGORY_ID=${CATEGORY_IDS[$i]}
2325
PAPER_ID=${PAPER_IDS[$i]}
2426
RECOMMENDATION_URL="${API_BASE_URL}${PAPER_RECOMMENDATIONS_API_PATH}${API_VERSION_PATH}${PAPER_RECOMMENDATIONS_PATH}${PAPER_ID}?limit=${PAPER_RECOMMENDATIONS_LIMIT}&fields=${PAPER_RECOMMENDATIONS_FIELDS}"
2527

26-
echo "Fetching from Semantic Scholar ${RECOMMENDATIONS_API_PATH}"
28+
echo "Fetching from Semantic Scholar ${RECOMMENDATION_URL}"
2729
echo "Paper: ${PAPER_ID}"
2830
echo "CATEGORY_ID: ${CATEGORY_ID}"
2931

@@ -35,7 +37,7 @@ for i in ${!CATEGORY_IDS[@]}; do
3537

3638
selectedPapers=$(
3739
echo ${recommendedPapers} | jq '[ .[] |
38-
select((.venue == "bioRxiv" or (.venue == "medRxiv")) and (.year == 2022)) |
40+
select((.venue == "bioRxiv" or (.venue == "medRxiv")) and (.year >= 2022)) |
3941
.paperId
4042
] |
4143
.[0:'${MAX_PAPERS}']'
@@ -84,3 +86,4 @@ for i in ${!CATEGORY_IDS[@]}; do
8486
echo ']' >> ${DATA_DIRECTORY}/${CATEGORY_ID}.json
8587
done
8688

89+
printf "END: recommendations \n\n"

scripts/search.sh

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/bin/bash
2+
set -e
23

34
CATEGORY_ID="alzheimers-disease"
45
DATA_DIRECTORY="../example-data"
@@ -9,28 +10,21 @@ BIORXIV_SOURCE="biorxiv"
910
START_DATE=$(node date.js start)
1011
END_DATE=$(node date.js end)
1112

13+
printf "START: search \n\n"
14+
1215
echo "Fetching from ${BIORXIV_SOURCE} between ${START_DATE} and ${END_DATE}"
13-
biorxivPapers=$(node ../src/cli.js download --output="${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json" --source=${BIORXIV_SOURCE} ${START_DATE} ${END_DATE})
14-
numPapersRawBiorxiv=$(echo ${biorxivPapers} | jq 'length')
15-
echo "numPapersRawBiorxiv: ${numPapersRawBiorxiv}"
16+
node ../src/cli.js download --output="${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json" --source=${BIORXIV_SOURCE} ${START_DATE} ${END_DATE}
1617

1718
echo "Fetching from ${MEDRXIV_SOURCE} between ${START_DATE} and ${END_DATE}"
18-
medrxivPapers=$(node ../src/cli.js download --output="${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json" --source=${MEDRXIV_SOURCE} ${START_DATE} ${END_DATE})
19-
numPapersRawMedrxiv=$(echo ${medrxivPapers} | jq 'length')
20-
echo "numPapersRawMedrxiv: ${numPapersRawMedrxiv}"
19+
node ../src/cli.js download --output="${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json" --source=${MEDRXIV_SOURCE} ${START_DATE} ${END_DATE}
2120

2221
echo "Combining results..."
23-
combined=$(jq --slurp '[.[][]]' ${DATA_DIRECTORY}/${END_DATE}_*.json)
24-
# rm "${DATA_DIRECTORY}/${END_DATE}_${BIORXIV_SOURCE}.json"
25-
# rm "${DATA_DIRECTORY}/${END_DATE}_${MEDRXIV_SOURCE}.json"
26-
2722
DATA_FILE="${DATA_DIRECTORY}/${END_DATE}.json"
28-
echo ${combined} | jq > ${DATA_FILE}
29-
23+
jq --slurp '[.[][]]' ${DATA_DIRECTORY}/${END_DATE}_*.json > ${DATA_FILE}
3024

3125
QUERY="alzheimer"
3226
OUTPUT_FILE="${DATA_DIRECTORY}/${CATEGORY_ID}.json"
33-
echo "Searching for ${CATEGORY_ID}"
27+
echo "Searching for ${QUERY}"
3428
searchHits=$(node ../src/cli.js search --strict --input=${DATA_FILE} ${QUERY})
3529
numSearchHits=$(echo ${searchHits} | jq 'length')
3630
echo "Found ${numSearchHits} hits"
@@ -48,5 +42,6 @@ collection=$(
4842
}]'
4943
)
5044

51-
echo ${collection} | jq > ${OUTPUT_FILE}
52-
# rm "${DATA_FILE}"
45+
echo ${collection} | jq '.' > ${OUTPUT_FILE}
46+
47+
printf "END: search \n\n"

0 commit comments

Comments
 (0)