Add streamingbench conf templates

carsonwang · carsonwang · commit e7e5675706e9 · 2016-11-07T16:53:20.000+08:00
diff --git a/conf/flink.conf.template b/conf/flink.conf.template
@@ -0,0 +1,8 @@
+hibench.streambench.flink.home                   /PATH/TO/YOUR/FLINK/HOME
+
+hibench.flink.master                             HOSTNAME:PORT
+
+# Default parallelism of flink job
+hibench.streambench.flink.parallelism            20
+hibench.streambench.flink.bufferTimeout          10
+hibench.streambench.flink.checkpointDuration     1000
diff --git a/conf/gearpump.conf.template b/conf/gearpump.conf.template
@@ -0,0 +1,5 @@
+hibench.streambench.gearpump.home                /PATH/TO/YOUR/GEARPUMP/HOME
+
+hibench.streambench.gearpump.parallelism         12
+
+hibench.streambench.gearpump.executors           12
diff --git a/conf/hibench.conf b/conf/hibench.conf
@@ -1,15 +1,11 @@
-
 hibench.scale.profile                   small
 
 hibench.default.map.parallelism         12
 hibench.default.shuffle.parallelism     12
 
-
-
 #======================================================
 # Report files
 #======================================================
-
 # default report formats
 hibench.report.formats		"%-12s %-10s %-8s %-20s %-20s %-20s %-20s\n"
 
@@ -39,7 +35,6 @@ hibench.streambench.flinkbench.jar      ${hibench.home}/flinkbench/streaming/tar
 #======================================================
 # workload home/input/ouput path
 #======================================================
-
 hibench.hive.home		${hibench.home}/hadoopbench/sql/target/${hibench.hive.release}
 hibench.hive.release		hive-0.12.0-bin
 hibench.hivebench.template.dir	${hibench.home}/hadoopbench/sql/hive_template
@@ -52,6 +47,10 @@ hibench.mahout.release.cdh5	    mahout-0.9-cdh5.1.0
 hibench.mahout.release		      ${hibench.mahout.release.${hibench.hadoop.release}}
 hibench.mahout.home		          ${hibench.home}/hadoopbench/mahout/target/${hibench.mahout.release}
 
+hibench.workload.input
+hibench.workload.output
+hibench.workload.dir.name.input         Input
+hibench.workload.dir.name.output        Output
 
 hibench.nutch.dir.name.input	${hibench.workload.dir.name.input}
 hibench.nutch.dir.name.output	${hibench.workload.dir.name.output}
@@ -62,9 +61,53 @@ hibench.nutch.home		${hibench.home}/hadoopbench/nutchindexing/target/${hibench.n
 hibench.dfsioe.dir.name.input	${hibench.workload.dir.name.input}
 hibench.dfsioe.dir.name.output	${hibench.workload.dir.name.output}
 
-hibench.workload.dir.name.input         Input
-hibench.workload.dir.name.output        Output
 
+#======================================================
+# Streaming General
+#======================================================
+# Indicate whether in debug mode for correctness verfication (default: false)
+hibench.streambench.debugMode false
+hibench.streambench.sampleProbability 0.1
+hibench.streambench.fixWindowDuration            10000
+hibench.streambench.fixWindowSlideStep           10000
 
-hibench.workload.input
-hibench.workload.output
+
+#======================================================
+# Kafka for streaming benchmarks
+#======================================================
+hibench.streambench.kafka.home                  /PATH/TO/YOUR/KAFKA/HOME
+# zookeeper host:port of kafka cluster, host1:port1,host2:port2...
+hibench.streambench.zkHost
+# Kafka broker lists, written in mode host:port,host:port,..
+hibench.streambench.kafka.brokerList
+hibench.streambench.kafka.consumerGroup          HiBench
+# number of partitions of generated topic (default 20)
+hibench.streambench.kafka.topicPartitions       20
+# consumer group of the consumer for kafka (default: HiBench)
+hibench.streambench.kafka.consumerGroup HiBench
+# Set the starting offset of kafkaConsumer (default: largest)
+hibench.streambench.kafka.offsetReset largest
+
+
+#======================================================
+# Data generator for streaming benchmarks
+#======================================================
+# Interval span in millisecond (default: 50)
+hibench.streambench.datagen.intervalSpan         50
+# Number of records to generate per interval span (default: 5)
+hibench.streambench.datagen.recordsPerInterval   5
+# fixed length of record (default: 200)
+hibench.streambench.datagen.recordLength         200
+# Number of KafkaProducer running on different thread (default: 1)
+hibench.streambench.datagen.producerNumber       1
+# Total round count of data send (default: -1 means infinity)
+hibench.streambench.datagen.totalRounds          -1
+# Number of total records that will be generated (default: -1 means infinity)
+hibench.streambench.datagen.totalRecords        -1
+# default path to store seed files (default: ${hibench.hdfs.data.dir}/Streaming)
+hibench.streambench.datagen.dir                         ${hibench.hdfs.data.dir}/Streaming
+# default path setting for genearate data1 & data2
+hibench.streambench.datagen.data1.name                  Seed
+hibench.streambench.datagen.data1.dir                   ${hibench.streambench.datagen.dir}/${hibench.streambench.datagen.data1.name}
+hibench.streambench.datagen.data2_cluster.dir           ${hibench.streambench.datagen.dir}/Kmeans/Cluster
+hibench.streambench.datagen.data2_samples.dir           ${hibench.streambench.datagen.dir}/Kmeans/Samples
diff --git a/conf/spark.conf.template b/conf/spark.conf.template
@@ -1,5 +1,5 @@
 # Spark home
-hibench.spark.home      /PATH/TO/YOUR/SPARK/ROOT
+hibench.spark.home      /PATH/TO/YOUR/SPARK/HOME
 
 # Spark version. Supported value: spark1.6, spark2.0
 hibench.spark.version   spark1.6
@@ -23,3 +23,28 @@ spark.default.parallelism     ${hibench.default.map.parallelism}
 
 # set spark sql's default shuffle partitions according to hibench's parallelism value
 spark.sql.shuffle.partitions  ${hibench.default.map.parallelism}
+
+
+#======================================================
+# Spark Streaming
+#======================================================
+# Spark streaming Batchnterval in millisecond (default 100)
+hibench.streambench.spark.batchInterval          100
+
+# Number of nodes that will receive kafka input (default: 4)
+hibench.streambench.spark.receiverNumber        4
+
+# Indicate RDD storage level. (default: 2)
+# 0 = StorageLevel.MEMORY_ONLY
+# 1 = StorageLevel.MEMORY_AND_DISK_SER
+# other = StorageLevel.MEMORY_AND_DISK_SER_2
+hibench.streambench.spark.storageLevel 2
+
+# indicate whether to test the write ahead log new feature (default: false)
+hibench.streambench.spark.enableWAL false
+
+# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty (default: /var/tmp)
+hibench.streambench.spark.checkpointPath /var/tmp
+
+# whether to use direct approach or not (dafault: true)
+hibench.streambench.spark.useDirectMode true
diff --git a/conf/storm.conf.template b/conf/storm.conf.template
@@ -0,0 +1,25 @@
+# nimbus of storm cluster
+hibench.streambench.storm.nimbus                HOSTNAME_OF_STORM_NIMBUS
+hibench.streambench.storm.nimbusAPIPort         6627
+
+hibench.streambench.storm.home                  /PATH/TO/YOUR/STORM/HOME
+
+# number of workers of Storm. Number of most bolt threads is also equal to this param.
+hibench.streambench.storm.worker_count           12
+
+# number of kafka spout threads of Storm
+hibench.streambench.storm.spout_threads          12
+
+# number of bolt threads altogether
+hibench.streambench.storm.bolt_threads           12
+
+hibench.streambench.storm.localshuffle           true
+
+# time interval to contact nimbus to judge if finished
+hibench.streambench.storm.nimbusContactInterval 10
+
+# kafka arg indicating whether to read data from kafka from the start or go on to read from last position
+hibench.streambench.storm.read_from_start       true
+
+# whether to turn on ack
+hibench.streambench.storm.ackon         true