|
1 | | -# Two data sets(text and numeric) are available, app argument indicates to use which |
2 | | -#app=micro-sketch #use text dataset, avg record size: 60 bytes |
3 | | -#app=micro-statistics #use numeric dataset, avg record size: 200 bytes |
4 | | -hibench.streamingbench.app micro-sketch |
| 1 | +######################################################### |
| 2 | +# General Stream Config |
| 3 | +######################################################### |
5 | 4 |
|
6 | | -# Text dataset can be scaled in terms of record size |
7 | | -hibench.streamingbench.prepare.textdataset_recordsize_factor |
| 5 | +# Note to ensure benchName to be consistent with datagen type. Numeric data for statistics and text data for others |
| 6 | +# (available benchname: identity, repartition) TDB: sample project grep wordcount distinctcount statistics |
| 7 | +hibench.streambench.testCase identity |
8 | 8 |
|
9 | | -# Two modes of generator: push,periodic |
10 | | -# Push means to send data to kafka cluster as fast as it could |
11 | | -# Periodic means sending data according to sending rate specification |
12 | | -#hibench.streamingbench.prepare.mode push |
13 | | -hibench.streamingbench.prepare.mode periodic |
| 9 | +# zookeeper address for Kakfa serverce, (default: HOSTNAME:HOSTPORT) |
| 10 | +hibench.streambench.zkHost HOSTNAME:HOSTPORT |
14 | 11 |
|
15 | | -# Under push mode: number of total records that will be generated |
16 | | -hibench.streamingbench.prepare.push.records 900000000 |
| 12 | +# Probability used in sample test case |
| 13 | +hibench.streambench.sampleProbability 0.1 |
17 | 14 |
|
18 | | -# Following three params are under periodic mode |
19 | | -# Bytes to push per interval |
20 | | -hibench.streamingbench.prepare.periodic.recordPerInterval 600000 |
| 15 | +# Indicate whether in debug mode for correctness verfication (default: false) |
| 16 | +hibench.streambench.debugMode false |
21 | 17 |
|
22 | | -# Interval time (in ms) |
23 | | -hibench.streamingbench.prepare.periodic.intervalSpan 5000 |
| 18 | +# JARS |
| 19 | +hibench.streambench.datagen.jar ${hibench.home}/src/streambench/datagen/target/streaming-bench-datagen-5.0-SNAPSHOT-jar-with-dependencies.jar |
| 20 | +hibench.streambench.sparkbench.jar ${hibench.home}/src/streambench/sparkbench/target/streaming-bench-spark-5.0-SNAPSHOT-${hibench.spark.version}-jar-with-dependencies.jar |
| 21 | +hibench.streambench.stormbench.jar ${hibench.home}/src/streambench/stormbench/target/streaming-bench-storm-5.0-SNAPSHOT.jar |
| 22 | +hibench.streambench.gearpump.jar ${hibench.home}/src/streambench/gearpumpbench/target/streaming-bench-gearpump-5.0-SNAPSHOT-jar-with-dependencies.jar |
| 23 | +hibench.streambench.flinkbench.jar ${hibench.home}/src/streambench/flinkbench/target/streaming-bench-flink-5.0-SNAPSHOT-jar-with-dependencies.jar |
24 | 24 |
|
25 | | -# Total round count of data send |
26 | | -hibench.streamingbench.prepare.periodic.totalRound 100 |
| 25 | +######################################################### |
| 26 | +# Kafka Config |
| 27 | +######################################################### |
27 | 28 |
|
28 | | -# zookeeper host:port of kafka cluster |
| 29 | +# Kafka home |
| 30 | +hibench.streambench.kafka.home /PATH/TO/KAFKA/HOME |
29 | 31 |
|
30 | | -#example: hostname:9092 |
31 | | -hibench.streamingbench.zookeeper.host HOSTNAME:HOSTPORT |
| 32 | +# the topic that spark will receive input data (default: ${hibench.streambench.testCase}) |
| 33 | +hibench.streambench.kafka.topic ${hibench.streambench.testCase} |
32 | 34 |
|
33 | | -#Parallel config |
34 | | -# number of nodes that will receive kafka input |
35 | | -hibench.streamingbench.receiver_nodes 4 |
| 35 | +# number of partitions of generated topic (default 20) |
| 36 | +hibench.streambench.kafka.topicPartitions 20 |
36 | 37 |
|
37 | | -############### |
38 | | -#Benchmark args |
39 | | -#Note to ensure benchName to be consistent with datagen type. Numeric data for statistics and text data for others |
40 | | -# available benchname: identity sample project grep wordcount distinctcount statistics |
| 38 | +# consumer group of the consumer for kafka (default: HiBench) |
| 39 | +hibench.streambench.kafka.consumerGroup HiBench |
41 | 40 |
|
42 | | -hibench.streamingbench.benchname identity |
| 41 | +# Kafka broker lists, written in mode "host:port,host:port,..." (default: HOSTNAME:HOSTPORT) |
| 42 | +hibench.streambench.kafka.brokerList HOSTNAME:HOSTPORT |
43 | 43 |
|
44 | | -#common args |
45 | | -# the topic that spark will receive input data |
46 | | -hibench.streamingbench.topic_name ${hibench.streamingbench.benchname} |
| 44 | +# Set the starting offset of kafkaConsumer (default: largest) |
| 45 | +hibench.streambench.kafka.offsetReset largest |
| 46 | +######################################################### |
| 47 | +# Data Generator Config |
| 48 | +######################################################### |
47 | 49 |
|
48 | | -# Spark stream batch interval (in seconds) |
49 | | -hibench.streamingbench.batch_interval 10 |
| 50 | +# Interval span in millisecond (default: 50) |
| 51 | +hibench.streambench.datagen.intervalSpan 50 |
50 | 52 |
|
51 | | -# consumer group of the spark consumer for kafka |
52 | | -hibench.streamingbench.consumer_group HiBench |
| 53 | +# Number of records to generate per interval span (default: 5) |
| 54 | +hibench.streambench.datagen.recordsPerInterval 5 |
53 | 55 |
|
54 | | -# expected number of records to be processed |
55 | | -hibench.streamingbench.record_count 900000000 |
| 56 | +# Number of total records that will be generated (default: -1 means infinity) |
| 57 | +hibench.streambench.datagen.totalRecords -1 |
56 | 58 |
|
57 | | -#sketch/distinctcount/statistics arg |
58 | | -# the field index of the record that will be extracted |
59 | | -hibench.streamingbench.field_index 1 |
| 59 | +# Total round count of data send (default: -1 means infinity) |
| 60 | +hibench.streambench.datagen.totalRounds -1 |
60 | 61 |
|
61 | | -#sketch/wordcount/distinctcount/statistics arg |
62 | | -# the seperator between fields of a single record |
63 | | -hibench.streamingbench.separator \\s+ |
| 62 | +# default path to store seed files (default: ${hibench.hdfs.data.dir}/Streaming) |
| 63 | +hibench.streambench.datagen.dir ${hibench.hdfs.data.dir}/Streaming |
64 | 64 |
|
65 | | -#sample arg |
66 | | -# probability that a record will be taken as a sample |
67 | | -hibench.streamingbench.prob 0.1 |
| 65 | +# fixed length of record (default: 200) |
| 66 | +hibench.streambench.datagen.recordLength 200 |
68 | 67 |
|
69 | | -#grep arg |
70 | | -# the substring that will be checked to see if contained in a record |
71 | | -hibench.streamingbench.pattern the |
| 68 | +# Number of KafkaProducer running on different thread (default: 1) |
| 69 | +# The limitation of a single KafkaProducer is about 100Mb/s |
| 70 | +hibench.streambench.datagen.producerNumber 1 |
72 | 71 |
|
73 | | -#common arg |
74 | | -# indicate RDD storage level. |
75 | | -# 1 for memory only 1 copy. Others for default mem_disk_ser 2 copies |
76 | | -hibench.streamingbench.copies 2 |
| 72 | +hibench.streambench.fixWindowDuration 30000 |
77 | 73 |
|
78 | | -# indicate whether to test the write ahead log new feature |
79 | | -# set true to test WAL feature |
80 | | -hibench.streamingbench.testWAL false |
| 74 | +hibench.streambench.fixWindowSlideStep 30000 |
| 75 | +######################################################### |
| 76 | +# Spark Streaming Config |
| 77 | +######################################################### |
81 | 78 |
|
82 | | -# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty |
83 | | -hibench.streamingbench.checkpoint_path |
| 79 | +# Number of nodes that will receive kafka input (default: 4) |
| 80 | +hibench.streambench.spark.receiverNumber 4 |
84 | 81 |
|
85 | | -#common arg |
86 | | -# indicate whether in debug mode for correctness verfication |
87 | | -hibench.streamingbench.debug false |
| 82 | +# Spark streaming Batchnterval in millisecond (default 100) |
| 83 | +hibench.streambench.spark.batchInterval 100 |
88 | 84 |
|
89 | | -# whether to use direct approach or not ( sparkstreaming only ) |
90 | | -hibench.streamingbench.direct_mode true |
| 85 | +# Indicate RDD storage level. (default: 2) |
| 86 | +# 0 = StorageLevel.MEMORY_ONLY |
| 87 | +# 1 = StorageLevel.MEMORY_AND_DISK_SER |
| 88 | +# other = StorageLevel.MEMORY_AND_DISK_SER_2 |
| 89 | +hibench.streambench.spark.storageLevel 2 |
91 | 90 |
|
92 | | -# Kafka broker lists, used for direct mode, written in mode "host:port,host:port,..." |
| 91 | +# indicate whether to test the write ahead log new feature (default: false) |
| 92 | +hibench.streambench.spark.enableWAL false |
93 | 93 |
|
94 | | -# example: hostname:9092 |
95 | | -hibench.streamingbench.brokerList HOSTNAME:HOSTPORT |
| 94 | +# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty (default: /var/tmp) |
| 95 | +hibench.streambench.spark.checkpointPath /var/tmp |
96 | 96 |
|
97 | | -hibench.streamingbench.broker_list_with_quote "${hibench.streamingbench.brokerList}" |
| 97 | +# whether to use direct approach or not (dafault: true) |
| 98 | +hibench.streambench.spark.useDirectMode true |
98 | 99 |
|
99 | | -# storm bench conf |
| 100 | +######################################################### |
| 101 | +# Flink Config |
| 102 | +######################################################### |
| 103 | +hibench.streambench.flink.home /PATH/TO/FLINK/HOME |
100 | 104 |
|
101 | | -# STORM_BIN_HOME |
102 | | -hibench.streamingbench.storm.home /PATH/TO/STORM/HOME |
| 105 | +# default parallelism of flink job |
| 106 | +hibench.streambench.flink.parallelism 20 |
103 | 107 |
|
104 | | -# Kafka home |
105 | | -hibench.streamingbench.kafka.home /PATH/TO/KAFKA/HOME |
| 108 | +hibench.streambench.flink.bufferTimeout 5 |
106 | 109 |
|
| 110 | +hibench.streambench.flink.checkpointDuration 1000 |
107 | 111 |
|
108 | | -#Cluster config |
109 | | -# nimbus of storm cluster |
110 | | -hibench.streamingbench.storm.nimbus HOSTNAME_OF_STORM |
111 | | -hibench.streamingbench.storm.nimbusAPIPort 6627 |
| 112 | +######################################################### |
| 113 | +# Storm Config |
| 114 | +######################################################### |
112 | 115 |
|
113 | | -# time interval to contact nimbus to judge if finished |
114 | | -hibench.streamingbench.storm.nimbusContactInterval 10 |
| 116 | +# STORM_BIN_HOME |
| 117 | +hibench.streambench.storm.home /PATH/TO/STORM/HOME |
115 | 118 |
|
| 119 | +# nimbus of storm cluster |
| 120 | +hibench.streambench.storm.nimbus HOSTNAME_OF_STORM_NIMBUS |
| 121 | +hibench.streambench.storm.nimbusAPIPort 6627 |
116 | 122 |
|
117 | | -#Parallel config |
| 123 | +# time interval to contact nimbus to judge if finished |
| 124 | +hibench.streambench.storm.nimbusContactInterval 10 |
118 | 125 |
|
119 | 126 | # number of workers of Storm. Number of most bolt threads is also equal to this param. |
120 | | -hibench.streamingbench.storm.worker_count 12 |
| 127 | +hibench.streambench.storm.worker_count 12 |
121 | 128 |
|
122 | 129 | # number of kafka spout threads of Storm |
123 | | -hibench.streamingbench.storm.spout_threads 12 |
| 130 | +hibench.streambench.storm.spout_threads 12 |
124 | 131 |
|
125 | 132 | # number of bolt threads altogether |
126 | | -hibench.streamingbench.storm.bolt_threads 12 |
| 133 | +hibench.streambench.storm.bolt_threads 12 |
127 | 134 |
|
128 | 135 | # kafka arg indicating whether to read data from kafka from the start or go on to read from last position |
129 | | -hibench.streamingbench.storm.read_from_start true |
| 136 | +hibench.streambench.storm.read_from_start true |
130 | 137 |
|
131 | 138 | # whether to turn on ack |
132 | | -hibench.streamingbench.storm.ackon true |
| 139 | +hibench.streambench.storm.ackon true |
| 140 | + |
| 141 | +######################################################### |
| 142 | +# Gearpump Config |
| 143 | +######################################################### |
| 144 | + |
| 145 | +hibench.streambench.gearpump.home /PATH/TO/GEARPUMP/HOME |
| 146 | + |
| 147 | +hibench.streambench.gearpump.executors 1 |
| 148 | + |
| 149 | +hibench.streambench.gearpump.parallelism 1 |
133 | 150 |
|
134 | | -# Added for default rules: |
135 | | -hibench.streamingbench.jars ${hibench.streamingbench.sparkbench.jar} |
136 | | -hibench.streamingbench.sparkbench.jar ${hibench.home}/src/streambench/sparkbench/target/streaming-bench-spark_0.1-5.0-SNAPSHOT-${hibench.spark.version}-jar-with-dependencies.jar |
137 | | -hibench.streamingbench.stormbench.jar ${hibench.home}/src/streambench/stormbench/target/streaming-bench-storm-0.1-SNAPSHOT-jar-with-dependencies.jar |
138 | | -hibench.streamingbench.datagen.jar ${hibench.home}/src/streambench/datagen/target/datagen-0.0.1-jar-with-dependencies.jar |
139 | | -hibench.streamingbench.storm.bin ${hibench.streamingbench.storm.home}/bin |
140 | | -hibench.streamingbench.zkhelper.jar ${hibench.home}/src/streambench/zkHelper/target/streaming-bench-zkhelper-0.1-SNAPSHOT-jar-with-dependencies.jar |
141 | 151 |
|
142 | | -# default path setting for store of data1 & data2 |
143 | | -hibench.streamingbench.datagen.dir ${hibench.hdfs.data.dir}/Streaming |
144 | 152 |
|
145 | | -# partition size settings |
146 | | -hibench.streamingbench.partitions 1 |
|
0 commit comments