Skip to content
This repository was archived by the owner on Jun 18, 2022. It is now read-only.

Commit 774fd10

Browse files
committed
Merge branch 'master' of github.com:NETESOLUTIONS/ERNIE
2 parents bdb6f58 + 6559c1d commit 774fd10

1 file changed

Lines changed: 119 additions & 0 deletions

File tree

Neo4j/neo4j_load.sh

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#!/usr/bin/env bash
2+
if [[ $1 == "-h" || $# -lt 3 ]]; then
3+
cat <<'HEREDOC'
4+
NAME
5+
neo4j_load.sh -- loads CSV in bulk to Neo4j and calculate metrics
6+
7+
SYNOPSIS
8+
neo4j_load.sh nodes_file edges_file ernie_admin_password
9+
neo4j_load.sh -h: display this help
10+
11+
DESCRIPTION
12+
# Generates a new DB name
13+
# Bulk imports to a new DB
14+
# Updates Neo4j config file
15+
# Restarts Neo4j
16+
# Calculates metrics via cypher-shell
17+
HEREDOC
18+
exit 1
19+
fi
20+
21+
set -xe
22+
set -o pipefail
23+
24+
# Get a script directory, same as by $(dirname $0)
25+
#script_dir=${0%/*}
26+
#absolute_script_dir=$(cd "${script_dir}" && pwd)
27+
#work_dir=${1:-${absolute_script_dir}/build} # $1 with the default
28+
#if [[ ! -d "${work_dir}" ]]; then
29+
# mkdir "${work_dir}"
30+
# chmod g+w "${work_dir}"
31+
#fi
32+
#cd "${work_dir}"
33+
echo -e "\n## Running under ${USER}@${HOSTNAME} at ${PWD} ##\n"
34+
35+
#if ! which cypher-shell >/dev/null; then
36+
# echo "Please install Neo4j"
37+
# exit 1
38+
#fi
39+
40+
nodes_file="$1"
41+
edges_file="$2"
42+
43+
# region Generate a unique db_name
44+
name_with_ext=${nodes_file##*/}
45+
if [[ "${name_with_ext}" != *.* ]]; then
46+
name_with_ext=${name_with_ext}.
47+
fi
48+
49+
name=${name_with_ext%.*}
50+
file_date1=$(date -r "${nodes_file}" +%F-%H-%M-%S)
51+
file_date2=$(date -r "${edges_file}" +%F-%H-%M-%S)
52+
if [[ ${file_date1} > ${file_date2} ]]; then
53+
db_ver="${file_date1}"
54+
else
55+
db_ver="${file_date2}"
56+
fi
57+
db_name="${name%%_*}-v${db_ver}.db"
58+
# endregion
59+
60+
# Hide password from the output
61+
set +x
62+
# The current directory must be writeable for the neo4j user. Otherwise, it'd fail with the
63+
# `java.io.FileNotFoundException: import.report (Permission denied)` error
64+
echo "$3" | sudo --stdin -u neo4j bash -c "set -xe
65+
echo 'Loading data into ${db_name} ...'
66+
neo4j-admin import --nodes:Publication '${nodes_file}' --relationships:CITES '${edges_file}' --database="${db_name}"
67+
sed --in-place --expression='s/dbms.active_database=.*/dbms.active_database=${db_name}/' /etc/neo4j/neo4j.conf"
68+
69+
echo "Restarting Neo4j with a new active database ..."
70+
echo "$3" | sudo --stdin systemctl restart neo4j
71+
set -x
72+
73+
echo "Waiting for the service to become active ..."
74+
declare -i time_limit_s=30
75+
# Ping Neo4j. Even if a service is active it might not be responding yet.
76+
while ! cypher-shell "CALL dbms.components()" 2>/dev/null; do
77+
if ((time_limit_s-- == 0)); then
78+
echo "ERROR: Neo4j failed to start." >&2
79+
exit 2
80+
fi
81+
sleep 1
82+
done
83+
84+
echo "Calculating metrics and indexing ..."
85+
cypher-shell <<'HEREDOC'
86+
CREATE INDEX ON :Publication(endpoint);
87+
CREATE INDEX ON :Publication(nida_support);
88+
CREATE INDEX ON :Publication(other_hhs_support);
89+
90+
// Calculate and store PageRank
91+
CALL algo.pageRank()
92+
YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, write, writeProperty;
93+
94+
// Calculate and store Betweenness Centrality
95+
CALL algo.betweenness(null, null, {writeProperty: 'betweenness'})
96+
YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;
97+
98+
// Calculate and store Closeness Centrality
99+
CALL algo.closeness(null, null, {writeProperty: 'closeness'})
100+
YIELD nodes, loadMillis, computeMillis, writeMillis;
101+
102+
// PageRank statistics
103+
MATCH (n)
104+
RETURN apoc.agg.statistics(n.pagerank);
105+
HEREDOC
106+
107+
# TODO Parallelize
108+
#parallel --halt soon,fail=1 --verbose --line-buffer --pipe cypher-shell ::: "// Calculate and store PageRank
109+
# CALL algo.pageRank()
110+
# YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, write, writeProperty;" \
111+
# "// Calculate and store Betweenness Centrality
112+
# CALL algo.betweenness(null, null, {writeProperty: 'betweenness'})
113+
# YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;" \
114+
# "// Calculate and store Closeness Centrality
115+
# CALL algo.closeness(null, null, {writeProperty: 'closeness'})
116+
# YIELD nodes, loadMillis, computeMillis, writeMillis;" \
117+
# "CREATE INDEX ON :Publication(endpoint);" \
118+
# "CREATE INDEX ON :Publication(nida_support);" \
119+
# "CREATE INDEX ON :Publication(other_hhs_support);"

0 commit comments

Comments
 (0)