1+ #! /usr/bin/env bash
2+ if [[ $1 == " -h" || $# -lt 3 ]]; then
3+ cat << 'HEREDOC '
4+ NAME
5+ neo4j_load.sh -- loads CSV in bulk to Neo4j and calculate metrics
6+
7+ SYNOPSIS
8+ neo4j_load.sh nodes_file edges_file ernie_admin_password
9+ neo4j_load.sh -h: display this help
10+
11+ DESCRIPTION
12+ # Generates a new DB name
13+ # Bulk imports to a new DB
14+ # Updates Neo4j config file
15+ # Restarts Neo4j
16+ # Calculates metrics via cypher-shell
17+ HEREDOC
18+ exit 1
19+ fi
20+
21+ set -xe
22+ set -o pipefail
23+
24+ # Get a script directory, same as by $(dirname $0)
25+ # script_dir=${0%/*}
26+ # absolute_script_dir=$(cd "${script_dir}" && pwd)
27+ # work_dir=${1:-${absolute_script_dir}/build} # $1 with the default
28+ # if [[ ! -d "${work_dir}" ]]; then
29+ # mkdir "${work_dir}"
30+ # chmod g+w "${work_dir}"
31+ # fi
32+ # cd "${work_dir}"
33+ echo -e " \n## Running under ${USER} @${HOSTNAME} at ${PWD} ##\n"
34+
35+ # if ! which cypher-shell >/dev/null; then
36+ # echo "Please install Neo4j"
37+ # exit 1
38+ # fi
39+
40+ nodes_file=" $1 "
41+ edges_file=" $2 "
42+
43+ # region Generate a unique db_name
44+ name_with_ext=${nodes_file##*/ }
45+ if [[ " ${name_with_ext} " != * .* ]]; then
46+ name_with_ext=${name_with_ext} .
47+ fi
48+
49+ name=${name_with_ext% .* }
50+ file_date1=$( date -r " ${nodes_file} " +%F-%H-%M-%S)
51+ file_date2=$( date -r " ${edges_file} " +%F-%H-%M-%S)
52+ if [[ ${file_date1} > ${file_date2} ]]; then
53+ db_ver=" ${file_date1} "
54+ else
55+ db_ver=" ${file_date2} "
56+ fi
57+ db_name=" ${name%% _* } -v${db_ver} .db"
58+ # endregion
59+
60+ # Hide password from the output
61+ set +x
62+ # The current directory must be writeable for the neo4j user. Otherwise, it'd fail with the
63+ # `java.io.FileNotFoundException: import.report (Permission denied)` error
64+ echo " $3 " | sudo --stdin -u neo4j bash -c " set -xe
65+ echo 'Loading data into ${db_name} ...'
66+ neo4j-admin import --nodes:Publication '${nodes_file} ' --relationships:CITES '${edges_file} ' --database=" ${db_name} "
67+ sed --in-place --expression='s/dbms.active_database=.*/dbms.active_database=${db_name} /' /etc/neo4j/neo4j.conf"
68+
69+ echo " Restarting Neo4j with a new active database ..."
70+ echo " $3 " | sudo --stdin systemctl restart neo4j
71+ set -x
72+
73+ echo " Waiting for the service to become active ..."
74+ declare -i time_limit_s=30
75+ # Ping Neo4j. Even if a service is active it might not be responding yet.
76+ while ! cypher-shell " CALL dbms.components()" 2> /dev/null; do
77+ if (( time_limit_s-- == 0 )) ; then
78+ echo " ERROR: Neo4j failed to start." >&2
79+ exit 2
80+ fi
81+ sleep 1
82+ done
83+
84+ echo " Calculating metrics and indexing ..."
85+ cypher-shell << 'HEREDOC '
86+ CREATE INDEX ON :Publication(endpoint);
87+ CREATE INDEX ON :Publication(nida_support);
88+ CREATE INDEX ON :Publication(other_hhs_support);
89+
90+ // Calculate and store PageRank
91+ CALL algo.pageRank()
92+ YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, write, writeProperty;
93+
94+ // Calculate and store Betweenness Centrality
95+ CALL algo.betweenness(null, null, {writeProperty: 'betweenness'})
96+ YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;
97+
98+ // Calculate and store Closeness Centrality
99+ CALL algo.closeness(null, null, {writeProperty: 'closeness'})
100+ YIELD nodes, loadMillis, computeMillis, writeMillis;
101+
102+ // PageRank statistics
103+ MATCH (n)
104+ RETURN apoc.agg.statistics(n.pagerank);
105+ HEREDOC
106+
107+ # TODO Parallelize
108+ # parallel --halt soon,fail=1 --verbose --line-buffer --pipe cypher-shell ::: "// Calculate and store PageRank
109+ # CALL algo.pageRank()
110+ # YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, write, writeProperty;" \
111+ # "// Calculate and store Betweenness Centrality
112+ # CALL algo.betweenness(null, null, {writeProperty: 'betweenness'})
113+ # YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;" \
114+ # "// Calculate and store Closeness Centrality
115+ # CALL algo.closeness(null, null, {writeProperty: 'closeness'})
116+ # YIELD nodes, loadMillis, computeMillis, writeMillis;" \
117+ # "CREATE INDEX ON :Publication(endpoint);" \
118+ # "CREATE INDEX ON :Publication(nida_support);" \
119+ # "CREATE INDEX ON :Publication(other_hhs_support);"
0 commit comments