Skip to content
This repository was archived by the owner on Dec 15, 2025. It is now read-only.

Commit d695c64

Browse files
committed
Merge branch 'master5.0' into mergeStreamingWithMaster
2 parents e3ee0f1 + 83992e1 commit d695c64

49 files changed

Lines changed: 884 additions & 121 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ target/
1111
derby.log
1212
metastore_db/
1313
report/
14+
.classpath
15+
.project
16+
.settings/

.travis.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
language: java
2+
jdk:
3+
- openjdk7
4+
- oraclejdk8
5+
install:
6+
- cd src
7+
before_script:
8+
- "echo $JAVA_OPTS"
9+
- "export JAVA_OPTS=-Xmx512m"
10+
env:
11+
- SPARK_VERSION=1.5
12+
- SPARK_VERSION=1.6
13+
script:
14+
- mvn clean package -q -Dmaven.javadoc.skip=true -Dspark${SPARK_VERSION} -DMR2

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# HiBench Suite #
1+
# HiBench Suite [![Build Status](https://travis-ci.org/intel-hadoop/HiBench.svg?branch=master)](https://travis-ci.org/intel-hadoop/HiBench)
22
## The bigdata micro benchmark suite ##
33

44

@@ -121,4 +121,3 @@ Note:
121121

122122
### [Advanced Configurations](https://github.com/intel-hadoop/HiBench/wiki/Advanced-Configurations) ###
123123
### [Possible issues](https://github.com/intel-hadoop/HiBench/wiki/Possible-issues) ###
124-

bin/functions/hibench_prop_env_mapping.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,15 @@
112112
WT_FILE_SIZE="hibench.dfsioe.write.file_size",
113113
MAP_JAVA_OPTS="hibench.dfsioe.map.java_opts",
114114
RED_JAVA_OPTS="hibench.dfsioe.red.java_opts",
115+
# For NWeight
116+
MODEL_INPUT="hibench.nweight.model_path",
117+
EDGES="hibench.workload.edges",
118+
DEGREE="hibench.nweight.degree",
119+
MAX_OUT_EDGES="hibench.nweight.max_out_edges",
120+
NUM_PARTITION="hibench.nweight.partitions",
121+
STORAGE_LEVEL="hibench.nweight.storage_level",
122+
DISABLE_KRYO="hibench.nweight.disable_kryo",
123+
MODEL="hibench.nweight.model",
115124

116125
# For streaming bench
117126
STREAMING_TESTCASE="hibench.streambench.testCase",

bin/functions/workload-functions.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,8 +408,9 @@ function ensure-nutchindexing-release () {
408408
mkdir $NUTCH_HOME_WORKLOAD/temp
409409
unzip -q $NUTCH_HOME_WORKLOAD/nutch-1.2.job -d $NUTCH_HOME_WORKLOAD/temp
410410
rm -f $NUTCH_HOME_WORKLOAD/temp/lib/jcl-over-slf4j-*.jar
411+
rm -f $NUTCH_HOME_WORKLOAD/temp/lib/slf4j-log4j*.jar
411412
cp ${NUTCH_DIR}/target/dependency/jcl-over-slf4j-*.jar $NUTCH_HOME_WORKLOAD/temp/lib
412-
rm -f $NUTCH_ROOT/nutch-1.2.job
413+
rm -f $NUTCH_HOME_WORKLOAD/nutch-1.2.job
413414
cd $NUTCH_HOME_WORKLOAD/temp
414415
zip -qr $NUTCH_HOME_WORKLOAD/nutch-1.2.job *
415416
rm -rf $NUTCH_HOME_WORKLOAD/temp
@@ -431,8 +432,9 @@ set ${MAP_CONFIG_NAME}=$NUM_MAPS;
431432
set ${REDUCER_CONFIG_NAME}=$NUM_REDS;
432433
set hive.stats.autogather=false;
433434
${HIVE_SQL_COMPRESS_OPTS}
434-
435+
DROP TABLE IF EXISTS uservisits;
435436
CREATE EXTERNAL TABLE uservisits (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
437+
DROP TABLE IF EXISTS uservisits_aggre;
436438
CREATE EXTERNAL TABLE uservisits_aggre ( sourceIP STRING, sumAdRevenue DOUBLE) STORED AS SEQUENCEFILE LOCATION '$OUTPUT_HDFS/uservisits_aggre';
437439
INSERT OVERWRITE TABLE uservisits_aggre SELECT sourceIP, SUM(adRevenue) FROM uservisits GROUP BY sourceIP;
438440
EOF
@@ -453,8 +455,11 @@ set hive.stats.autogather=false;
453455
454456
${HIVE_SQL_COMPRESS_OPTS}
455457
458+
DROP TABLE IF EXISTS rankings;
456459
CREATE EXTERNAL TABLE rankings (pageURL STRING, pageRank INT, avgDuration INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/rankings';
460+
DROP TABLE IF EXISTS uservisits_copy;
457461
CREATE EXTERNAL TABLE uservisits_copy (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
462+
DROP TABLE IF EXISTS rankings_uservisits_join;
458463
CREATE EXTERNAL TABLE rankings_uservisits_join ( sourceIP STRING, avgPageRank DOUBLE, totalRevenue DOUBLE) STORED AS SEQUENCEFILE LOCATION '$OUTPUT_HDFS/rankings_uservisits_join';
459464
INSERT OVERWRITE TABLE rankings_uservisits_join SELECT sourceIP, avg(pageRank), sum(adRevenue) as totalRevenue FROM rankings R JOIN (SELECT sourceIP, destURL, adRevenue FROM uservisits_copy UV WHERE (datediff(UV.visitDate, '1999-01-01')>=0 AND datediff(UV.visitDate, '2000-01-01')<=0)) NUV ON (R.pageURL = NUV.destURL) group by sourceIP order by totalRevenue DESC;
460465
EOF
@@ -475,7 +480,9 @@ set hive.stats.autogather=false;
475480
476481
${HIVE_SQL_COMPRESS_OPTS}
477482
483+
DROP TABLE IF EXISTS uservisits;
478484
CREATE EXTERNAL TABLE uservisits (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$INPUT_HDFS/uservisits';
485+
DROP TABLE IF EXISTS uservisits_copy;
479486
CREATE EXTERNAL TABLE uservisits_copy (sourceIP STRING,destURL STRING,visitDate STRING,adRevenue DOUBLE,userAgent STRING,countryCode STRING,languageCode STRING,searchWord STRING,duration INT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS SEQUENCEFILE LOCATION '$OUTPUT_HDFS/uservisits_copy';
480487
INSERT OVERWRITE TABLE uservisits_copy SELECT * FROM uservisits;
481488
EOF

conf/00-default-properties.conf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ hibench.randomtextwriter.bytestotal.hadoop1.name test.randomtextwrite.total_by
150150
hibench.randomtextwriter.bytestotal.hadoop2.name mapreduce.randomtextwriter.totalbytes
151151
hibench.randomtextwriter.bytestotal.name ${hibench.randomtextwriter.bytestotal.${hibench.hadoop.version}.name}
152152

153+
hibench.nweight.model_path ${hibench.dependency.dir}/sparkbench/src/main/scala/com/intel/sparkbench/nweight/model/user-features
154+
153155
# Workload Input/Output name setting for compress/uncompress mode
154156
hibench.workload.dir.name.compress_disable.input Input
155157
hibench.workload.dir.name.compress_disable.output Output
@@ -260,4 +262,4 @@ spark.sql.shuffle.partitions ${hibench.default.map.parallelism}
260262
#=======================================================
261263
# Flink
262264
#=======================================================
263-
hibench.flink.master FLINK_JM_HOST:PORT
265+
hibench.flink.master FLINK_JM_HOST:PORT

conf/10-data-scale-profile.conf

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,4 +212,24 @@ hibench.dfsioe.gigantic.write.file_size 400
212212
hibench.dfsioe.bigdata.read.number_of_files 2048
213213
hibench.dfsioe.bigdata.read.file_size 1000
214214
hibench.dfsioe.bigdata.write.number_of_files 2048
215-
hibench.dfsioe.bigdata.write.file_size 1000
215+
hibench.dfsioe.bigdata.write.file_size 1000
216+
217+
#NWeight
218+
hibench.nweight.tiny.edges 100000
219+
hibench.nweight.tiny.degree 3
220+
hibench.nweight.tiny.max_out_edges 30
221+
hibench.nweight.small.edges 1000000
222+
hibench.nweight.small.degree 3
223+
hibench.nweight.small.max_out_edges 30
224+
hibench.nweight.large.edges 10000000
225+
hibench.nweight.large.degree 3
226+
hibench.nweight.large.max_out_edges 30
227+
hibench.nweight.huge.edges 100000000
228+
hibench.nweight.huge.degree 3
229+
hibench.nweight.huge.max_out_edges 30
230+
hibench.nweight.gigantic.edges 425000000
231+
hibench.nweight.gigantic.degree 3
232+
hibench.nweight.gigantic.max_out_edges 30
233+
hibench.nweight.bigdata.edges 4250000000
234+
hibench.nweight.bigdata.degree 3
235+
hibench.nweight.bigdata.max_out_edges 30

docker/base/base-core

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
USER root
33

44
## add proxy config inside FIREWALL
5-
65
#==============================
76
# System Basic Tools Installation
87
#==============================
@@ -54,15 +53,12 @@ RUN apt-get install -y python-numpy python-matplotlib
5453

5554
# Install Java
5655
RUN \
57-
echo oracle-java${JDK_VERSION}-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
58-
add-apt-repository -y ppa:webupd8team/java && \
56+
add-apt-repository -y ppa:openjdk-r/ppa && \
5957
apt-get update && \
60-
apt-get install -y oracle-java${JDK_VERSION}-installer && \
61-
rm -rf /var/lib/apt/lists/* && \
62-
rm -rf /var/cache/oracle-jdk${JDK_VERSION}-installer
58+
apt-get install -y openjdk-${JDK_VERSION}-jdk
6359

6460
# Define commonly used JAVA_HOME variable
65-
ENV JAVA_HOME /usr/lib/jvm/java-${JDK_VERSION}-oracle
61+
ENV JAVA_HOME /usr/lib/jvm/java-${JDK_VERSION}-openjdk-amd64
6662
ENV PATH $PATH:$JAVA_HOME/bin
6763

6864

@@ -102,6 +98,9 @@ RUN mv apache-maven-* /usr/local/apache-maven
10298
# define environment variables for maven
10399
ENV M2_HOME /usr/local/apache-maven
104100
ENV PATH $PATH:/usr/local/apache-maven/bin
101+
# copy local maven repository to docker image
102+
#RUN rm -rf /root/.m2
103+
#ADD .m2 /root/.m2
105104

106105

107106
#==============================
@@ -119,5 +118,9 @@ RUN mv /root/HiBench* ${HIBENCH_HOME}
119118
RUN rm -f HiBench-${HIBENCH_VERSION}.zip
120119
COPY conf/99-user_defined_properties.conf ${HIBENCH_HOME}/conf/
121120
# start building HiBench
121+
RUN apt-get update && apt-get install -y thrift-compiler
122+
#RUN cd ${HIBENCH_HOME}/src && \
123+
#mvn clean package -D spark1.5 -D MR2
124+
# /bin/build-all.sh can be used to built hibench for all known Spark and MR versions
122125
RUN ${HIBENCH_HOME}/bin/build-all.sh
123126

docker/cdh-docker/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,6 @@ COPY scripts/hadoop-env.sh /etc/hadoop/conf/hadoop-env.sh
6060
#Format HDFS
6161
COPY scripts/restart-hadoop-spark.sh /usr/bin/restart-hadoop-spark.sh
6262
RUN chmod +x /usr/bin/restart-hadoop-spark.sh
63-
64-
# start HADOOP/SPARK
65-
CMD bash -C '/usr/bin/restart-hadoop-spark.sh'; 'bash'
63+
#Copy RunExample File
64+
COPY scripts/runexample.sh /root/runexample.sh
65+
RUN chmod +x /root/runexample.sh

docker/cdh-docker/conf/core-site.xml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
<!--
12
# Licensed to the Apache Software Foundation (ASF) under one or more
23
# contributor license agreements. See the NOTICE file distributed with
34
# this work for additional information regarding copyright ownership.
@@ -12,11 +13,9 @@
1213
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1314
# See the License for the specific language governing permissions and
1415
# limitations under the License.
16+
-->
1517

1618

17-
<?xml version="1.0"?>
18-
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
19-
2019
<configuration>
2120

2221
<!--

0 commit comments

Comments
 (0)