HiBench/conf/hibench.conf at 205a4e27ed2d443db5d92e3aaecaab56830e0665 · Intel-bigdata/HiBench · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Data scale profile. Available value is tiny, small, large, huge, gigantic and bigdata.
# The definition of these profiles can be found in the workload's conf file i.e. conf/workloads/micro/wordcount.conf
hibench.scale.profile                large
# Mapper number in hadoop, partition number in Spark
hibench.default.map.parallelism         8

# Reducer nubmer in hadoop, shuffle partition number in Spark
hibench.default.shuffle.parallelism     8


#======================================================
# Report files
#======================================================
# default report formats
hibench.report.formats		"%-12s %-10s %-8s %-20s %-20s %-20s %-20s\n"

# default report dir path
hibench.report.dir		${hibench.home}/report

# default report file name
hibench.report.name		hibench.report

# input/output format settings. Available formats: Text, Sequence, Null.
sparkbench.inputformat         Sequence
sparkbench.outputformat        Sequence

# hibench config folder
hibench.configure.dir		${hibench.home}/conf

# default hibench HDFS root
hibench.hdfs.data.dir		${hibench.hdfs.master}/HiBench

# path of hibench jars
hibench.hibench.datatool.dir	          ${hibench.home}/autogen/target/autogen-8.0-SNAPSHOT-jar-with-dependencies.jar
hibench.common.jar                      ${hibench.home}/common/target/hibench-common-8.0-SNAPSHOT-jar-with-dependencies.jar
hibench.sparkbench.jar                  ${hibench.home}/sparkbench/assembly/target/sparkbench-assembly-8.0-SNAPSHOT-dist.jar
hibench.streambench.stormbench.jar      ${hibench.home}/stormbench/streaming/target/stormbench-streaming-8.0-SNAPSHOT.jar
hibench.streambench.gearpump.jar        ${hibench.home}/gearpumpbench/streaming/target/gearpumpbench-streaming-8.0-SNAPSHOT-jar-with-dependencies.jar
hibench.streambench.flinkbench.jar      ${hibench.home}/flinkbench/streaming/target/flinkbench-streaming-8.0-SNAPSHOT-jar-with-dependencies.jar

#======================================================
# workload home/input/ouput path
#======================================================
hibench.hive.home		${hibench.home}/hadoopbench/sql/target/${hibench.hive.release}
hibench.hive.release		apache-hive-0.14.0-bin
hibench.hivebench.template.dir	${hibench.home}/hadoopbench/sql/hive_template
hibench.bayes.dir.name.input	${hibench.workload.dir.name.input}
hibench.bayes.dir.name.output	${hibench.workload.dir.name.output}

hibench.mahout.release.apache   apache-mahout-distribution-0.11.0
hibench.mahout.release.hdp      apache-mahout-distribution-0.11.0
hibench.mahout.release.cdh5	    mahout-0.9-cdh5.1.0
hibench.mahout.release		      ${hibench.mahout.release.${hibench.hadoop.release}}
hibench.mahout.home		          ${hibench.home}/hadoopbench/mahout/target/${hibench.mahout.release}

hibench.masters.hostnames
hibench.slaves.hostnames

hibench.workload.input
hibench.workload.output
hibench.workload.dir.name.input         Input
hibench.workload.dir.name.output        Output

hibench.nutch.dir.name.input	${hibench.workload.dir.name.input}
hibench.nutch.dir.name.output	${hibench.workload.dir.name.output}
hibench.nutch.nutchindexing.dir	${hibench.home}/hadoopbench/nutchindexing/
hibench.nutch.release		nutch-1.2
hibench.nutch.home		${hibench.home}/hadoopbench/nutchindexing/target/${hibench.nutch.release}

hibench.dfsioe.dir.name.input	${hibench.workload.dir.name.input}
hibench.dfsioe.dir.name.output	${hibench.workload.dir.name.output}


#======================================================
# Streaming General
#======================================================
# Indicate whether in debug mode for correctness verfication (default: false)
hibench.streambench.debugMode false
hibench.streambench.sampleProbability 0.1
hibench.streambench.fixWindowDuration            10000
hibench.streambench.fixWindowSlideStep           10000


#======================================================
# Kafka for streaming benchmarks
#======================================================
hibench.streambench.kafka.home                  /PATH/TO/YOUR/KAFKA/HOME
# zookeeper host:port of kafka cluster, host1:port1,host2:port2...
hibench.streambench.zkHost
# Kafka broker lists, written in mode host:port,host:port,..
hibench.streambench.kafka.brokerList
hibench.streambench.kafka.consumerGroup          HiBench
# number of partitions of generated topic (default 20)
hibench.streambench.kafka.topicPartitions       20
# consumer group of the consumer for kafka (default: HiBench)
hibench.streambench.kafka.consumerGroup HiBench
# Set the starting offset of kafkaConsumer (default: largest)
hibench.streambench.kafka.offsetReset largest


#======================================================
# Data generator for streaming benchmarks
#======================================================
# Interval span in millisecond (default: 50)
hibench.streambench.datagen.intervalSpan         50
# Number of records to generate per interval span (default: 5)
hibench.streambench.datagen.recordsPerInterval   5
# fixed length of record (default: 200)
hibench.streambench.datagen.recordLength         200
# Number of KafkaProducer running on different thread (default: 1)
hibench.streambench.datagen.producerNumber       1
# Total round count of data send (default: -1 means infinity)
hibench.streambench.datagen.totalRounds          -1
# Number of total records that will be generated (default: -1 means infinity)
hibench.streambench.datagen.totalRecords        -1
# default path to store seed files (default: ${hibench.hdfs.data.dir}/Streaming)
hibench.streambench.datagen.dir                         ${hibench.hdfs.data.dir}/Streaming
# default path setting for genearate data1 & data2
hibench.streambench.datagen.data1.name                  Seed
hibench.streambench.datagen.data1.dir                   ${hibench.streambench.datagen.dir}/${hibench.streambench.datagen.data1.name}
hibench.streambench.datagen.data2_cluster.dir           ${hibench.streambench.datagen.dir}/Kmeans/Cluster
hibench.streambench.datagen.data2_samples.dir           ${hibench.streambench.datagen.dir}/Kmeans/Samples

#======================================================
# MetricsReader for streaming benchmarks
#======================================================
# Number of sample records for `MetricsReader` (default: 5000000)
hibench.streambench.metricsReader.sampleNum      5000000
# Number of thread for `MetricsReader` (default: 20)
hibench.streambench.metricsReader.threadNum      20
# The dir where stored the report of benchmarks (default: ${hibench.home}/report)
hibench.streambench.metricsReader.outputDir      ${hibench.home}/report