forked from ClickHouse/ClickBench
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark.sh
More file actions
executable file
·250 lines (225 loc) · 6.18 KB
/
benchmark.sh
File metadata and controls
executable file
·250 lines (225 loc) · 6.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#!/bin/bash
set -e
# This benchmark should run on Ubuntu 20.04
# Install
ROOT=$(pwd)
if [[ -n "$1" ]]; then
url="$1"
else
url='https://apache-doris-releases.oss-accelerate.aliyuncs.com/apache-doris-4.1.0-rc01-bin-x64.tar.gz'
fi
# Download
file_name="$(basename ${url})"
if [[ "$url" == "http"* ]]; then
if [[ ! -f $file_name ]]; then
wget --continue --progress=dot:giga ${url}
else
echo "$file_name already exists, no need to download."
fi
fi
dir_name="${file_name/.tar.gz/}"
# Try to stop Doris and remove it first if execute this script multiple times
set +e
"$dir_name"/"$dir_name"/fe/bin/stop_fe.sh
"$dir_name"/"$dir_name"/be/bin/stop_be.sh
rm -rf "$dir_name"
set -e
# Uncompress
mkdir "$dir_name"
tar zxf "$file_name" -C "$dir_name"
DORIS_HOME="$ROOT/$dir_name/$dir_name"
export DORIS_HOME
# Install dependencies
sudo apt-get update -y
sudo apt-get install -y openjdk-17-jdk mysql-client
export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-$(dpkg --print-architecture)/"
export PATH=$JAVA_HOME/bin:$PATH
sudo systemctl disable unattended-upgrades
sudo systemctl stop unattended-upgrades
"$DORIS_HOME"/fe/bin/start_fe.sh --daemon
# Start Backend
sudo sysctl -w vm.max_map_count=2000000
ulimit -n 65535
"$DORIS_HOME"/be/bin/start_be.sh --daemon
# Wait for Frontend ready
for _ in {1..300}
do
fe_version=$(mysql -h127.0.0.1 -P9030 -uroot -e 'show frontends' | cut -f16 | sed -n '2,$p')
if [[ -n "${fe_version}" ]] && [[ "${fe_version}" != "NULL" ]]; then
echo "Frontend version: ${fe_version}"
break
else
echo 'Wait for Frontend ready ...'
sleep 2
fi
done
# Setup cluster, add Backend to cluster
mysql -h 127.0.0.1 -P9030 -uroot -e "ALTER SYSTEM ADD BACKEND '127.0.0.1:9050' "
# Wait for Backend ready
for _ in {1..300}
do
be_version=$(mysql -h127.0.0.1 -P9030 -uroot -e 'show backends' | cut -f22 | sed -n '2,$p')
if [[ -n "${be_version}" ]]; then
echo "Backend version: ${be_version}"
break
else
echo 'Wait for Backend ready ...'
sleep 2
fi
done
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
# Create Database and table
mysql -h 127.0.0.1 -P9030 -uroot -e "CREATE DATABASE hits"
sleep 5
mysql -h 127.0.0.1 -P9030 -uroot hits <"$ROOT"/create.sql
# Download data
BE_DATA_DIR="$DORIS_HOME/be/"
mkdir -p "$BE_DATA_DIR/user_files_secure"
seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue --progress=dot:giga https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
mv *.parquet "$BE_DATA_DIR/user_files_secure"
BE_ID=$(mysql -h127.0.0.1 -P9030 -uroot -N -e 'show backends' | awk '{print $1}' | head -1)
CORES=$(nproc)
PARALLEL_NUM=$((CORES / 4))
if [ "$PARALLEL_NUM" -lt 1 ]; then
echo "Computed parallel_pipeline_task_num ($PARALLEL_NUM) is less than 1 based on $CORES cores; clamping to 1."
PARALLEL_NUM=1
fi
echo "Setting parallel_pipeline_task_num to $PARALLEL_NUM (cpu cores: $CORES, computed as CORES/4 with min 1)"
echo "start loading hits.parquet using TVF, estimated to take about 3 minutes ..."
START=$(date +%s)
mysql -h 127.0.0.1 -P9030 -uroot hits -e "SET parallel_pipeline_task_num = $PARALLEL_NUM;\
INSERT INTO hits SELECT
CounterID,
DATE_ADD('1970-01-01', INTERVAL EventDate DAY) AS EventDate,
UserID,
FROM_UNIXTIME(EventTime) AS EventTime,
WatchID,
JavaEnable,
Title,
GoodEvent,
ClientIP,
RegionID,
CounterClass,
OS,
UserAgent,
URL,
Referer,
IsRefresh,
RefererCategoryID,
RefererRegionID,
URLCategoryID,
URLRegionID,
ResolutionWidth,
ResolutionHeight,
ResolutionDepth,
FlashMajor,
FlashMinor,
FlashMinor2,
NetMajor,
NetMinor,
UserAgentMajor,
UserAgentMinor,
CookieEnable,
JavascriptEnable,
IsMobile,
MobilePhone,
MobilePhoneModel,
Params,
IPNetworkID,
TraficSourceID,
SearchEngineID,
SearchPhrase,
AdvEngineID,
IsArtifical,
WindowClientWidth,
WindowClientHeight,
ClientTimeZone,
FROM_UNIXTIME(ClientEventTime) AS ClientEventTime,
SilverlightVersion1,
SilverlightVersion2,
SilverlightVersion3,
SilverlightVersion4,
PageCharset,
CodeVersion,
IsLink,
IsDownload,
IsNotBounce,
FUniqID,
OriginalURL,
HID,
IsOldCounter,
IsEvent,
IsParameter,
DontCountHits,
WithHash,
HitColor,
FROM_UNIXTIME(LocalEventTime) AS LocalEventTime,
Age,
Sex,
Income,
Interests,
Robotness,
RemoteIP,
WindowName,
OpenerName,
HistoryLength,
BrowserLanguage,
BrowserCountry,
SocialNetwork,
SocialAction,
HTTPError,
SendTiming,
DNSTiming,
ConnectTiming,
ResponseStartTiming,
ResponseEndTiming,
FetchTiming,
SocialSourceNetworkID,
SocialSourcePage,
ParamPrice,
ParamOrderID,
ParamCurrency,
ParamCurrencyID,
OpenstatServiceName,
OpenstatCampaignID,
OpenstatAdID,
OpenstatSourceID,
UTMSource,
UTMMedium,
UTMCampaign,
UTMContent,
UTMTerm,
FromTag,
HasGCLID,
RefererHash,
URLHash,
CLID
FROM local(
\"file_path\" = \"user_files_secure/hits_*.parquet\",
\"backend_id\" = \"$BE_ID\",
\"format\" = \"parquet\"
)
"
END=$(date +%s)
LOADTIME=$(echo "$END - $START" | bc)
echo "Load time: $LOADTIME"
echo "$LOADTIME" > loadtime
du -bs "$DORIS_HOME"/be/storage/ | cut -f1 | tee storage_size
echo "Data size: $(cat storage_size)"
mysql -h 127.0.0.1 -P9030 -uroot hits -e "set global enable_sql_cache = false"
# Dataset contains 99997497 rows, storage size is about 13319588503 bytes
mysql -h 127.0.0.1 -P9030 -uroot hits -e "SELECT count(*) FROM hits"
# Run queries
TRIES=3
while read -r query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
for i in $(seq 1 $TRIES); do
mysql -vvv -h127.1 -P9030 -uroot hits -e "${query}" 2>&1 | tee -a log.txt
done
done <queries.sql
cat log.txt |
grep -P 'rows? in set|Empty set|^ERROR' |
sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) min )?([0-9.]+) sec\).*?$/\2 \3/' |
awk '{ if ($2 != "") { print $1 * 60 + $2 } else { print $1 } }' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'