Intel-bigdata
diff --git a/‎.travis.yml‎
Lines changed: 62 additions & 23 deletions b/‎.travis.yml‎
Lines changed: 62 additions & 23 deletions
diff --git a/‎travis/core-site.xml‎ ‎travis/artifacts/hadoop26/core-site.xml‎travis/core-site.xml renamed to travis/artifacts/hadoop26/core-site.xml b/‎travis/core-site.xml‎ ‎travis/artifacts/hadoop26/core-site.xml‎travis/core-site.xml renamed to travis/artifacts/hadoop26/core-site.xml
diff --git a/‎travis/hdfs-site.xml‎ ‎travis/artifacts/hadoop26/hdfs-site.xml‎travis/hdfs-site.xml renamed to travis/artifacts/hadoop26/hdfs-site.xml b/‎travis/hdfs-site.xml‎ ‎travis/artifacts/hadoop26/hdfs-site.xml‎travis/hdfs-site.xml renamed to travis/artifacts/hadoop26/hdfs-site.xml
diff --git a/‎travis/mapred-site.xml‎ ‎travis/artifacts/hadoop26/mapred-site.xml‎travis/mapred-site.xml renamed to travis/artifacts/hadoop26/mapred-site.xml b/‎travis/mapred-site.xml‎ ‎travis/artifacts/hadoop26/mapred-site.xml‎travis/mapred-site.xml renamed to travis/artifacts/hadoop26/mapred-site.xml
diff --git a/‎travis/spark-env.sh‎ ‎travis/artifacts/hadoop26/spark-env.sh‎travis/spark-env.sh renamed to travis/artifacts/hadoop26/spark-env.sh b/‎travis/spark-env.sh‎ ‎travis/artifacts/hadoop26/spark-env.sh‎travis/spark-env.sh renamed to travis/artifacts/hadoop26/spark-env.sh
diff --git a/‎travis/yarn-site.xml‎ ‎travis/artifacts/hadoop26/yarn-site.xml‎travis/yarn-site.xml renamed to travis/artifacts/hadoop26/yarn-site.xml b/‎travis/yarn-site.xml‎ ‎travis/artifacts/hadoop26/yarn-site.xml‎travis/yarn-site.xml renamed to travis/artifacts/hadoop26/yarn-site.xml
diff --git a/‎travis/artifacts/hadoop32/core-site.xml‎
Lines changed: 26 additions & 0 deletions b/‎travis/artifacts/hadoop32/core-site.xml‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎travis/artifacts/hadoop32/hdfs-site.xml‎
Lines changed: 36 additions & 0 deletions b/‎travis/artifacts/hadoop32/hdfs-site.xml‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎travis/artifacts/hadoop32/mapred-site.xml‎
Lines changed: 53 additions & 0 deletions b/‎travis/artifacts/hadoop32/mapred-site.xml‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎travis/artifacts/hadoop32/spark-env.sh‎
Lines changed: 71 additions & 0 deletions b/‎travis/artifacts/hadoop32/spark-env.sh‎
Lines changed: 71 additions & 0 deletions
@@ -1,6 +1,9 @@
+dist: trusty
 sudo: required
 language: java
 jdk:
+  - openjdk11
+  - openjdk8
   - openjdk7
 before_install:
   - cat /etc/hosts # optionally check the content *before*
@@ -10,32 +13,68 @@ before_install:
   - cat /proc/cpuinfo | grep cores | wc -l
   - free -h
 install:
-  - hibench=$(pwd)
-  - cd /opt/
-  - wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.0-bin-hadoop2.6.tgz
-  - tar -xzf spark-1.6.0-bin-hadoop2.6.tgz
-  - wget https://archive.apache.org/dist/hadoop/core/hadoop-2.6.5/hadoop-2.6.5.tar.gz
-  - tar -xzf hadoop-2.6.5.tar.gz
-  - cd ${hibench}
-  - cp ./travis/spark-env.sh /opt/spark-1.6.0-bin-hadoop2.6/conf/
-  - cp ./travis/core-site.xml /opt/hadoop-2.6.5/etc/hadoop/
-  - cp ./travis/hdfs-site.xml /opt/hadoop-2.6.5/etc/hadoop/
-  - cp ./travis/mapred-site.xml /opt/hadoop-2.6.5/etc/hadoop/
-  - cp ./travis/yarn-site.xml /opt/hadoop-2.6.5/etc/hadoop/
-  - cp ./travis/hibench.conf ./conf/
-  - cp ./travis/benchmarks.lst ./conf/
+  - |
+    export java_ver=$(./travis/jdk_ver.sh)
+    if [[ "$java_ver" == 11 ]]; then
+        export HADOOP_VER=3.2.0
+        export SPARK_VER=2.4.3
+        export SPARK_PACKAGE_TYPE=without-hadoop-scala-2.12
+    elif [[ "$java_ver" == 8 ]]; then
+        export HADOOP_VER=3.2.0
+        export SPARK_VER=2.4.3
+        export SPARK_PACKAGE_TYPE=without-hadoop
+    elif [[ "$java_ver" == 7 ]]; then
+        export HADOOP_VER=2.6.5
+        export SPARK_VER=1.6.0
+        export SPARK_PACKAGE_TYPE=hadoop2.6
+    else
+        exit 1
+    fi
+
+    # Folders where are stored Spark and Hadoop depending on version required
+    export SPARK_BINARIES_FOLDER=spark-$SPARK_VER-bin-$SPARK_PACKAGE_TYPE
+    export HADOOP_BINARIES_FOLDER=hadoop-$HADOOP_VER
+    export HADOOP_CONF_DIR=/opt/$HADOOP_BINARIES_FOLDER/etc/hadoop/
+
+    if [[ "$HADOOP_VER" =~ "3.2" ]]; then
+        export HADOOP_IDENT_STRING=root
+        export HDFS_NAMENODE_USER=root
+        export HDFS_DATANODE_USER=root
+        export HDFS_SECONDARYNAMENODE_USER=root
+        export YARN_RESOURCEMANAGER_USER=root
+        export YARN_NODEMANAGER_USER=root
+        export SPARK_CONF_DIR=/opt/$SPARK_BINARIES_FOLDER/conf/
+        export HADOOP_HOME=/opt/$HADOOP_BINARIES_FOLDER
+        export SPARK_HOME=/opt/$SPARK_BINARIES_FOLDER
+        export HADOOP_MAPRED_HOME=$HADOOP_HOME
+        export HADOOP_COMMON_HOME=$HADOOP_HOME
+        export HADOOP_HDFS_HOME=$HADOOP_HOME
+        export YARN_HOME=$HADOOP_HOME
+        export HADOOP_INSTALL=$HADOOP_HOME
+        export SPARK_DIST_CLASSPATH=$(/opt/$HADOOP_BINARIES_FOLDER/bin/hadoop classpath)
+    fi
+
+    sudo -E ./travis/install_hadoop_spark.sh
+    sudo -E ./travis/config_hadoop_spark.sh
 before_script:
   - "export JAVA_OPTS=-Xmx512m"
 cache:
   directories:
   - $HOME/.m2
 script:
-  - mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.2 -Dscala=2.11
-  - mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.0 -Dscala=2.11
-  - mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=1.6 -Dscala=2.10
-  - sudo -E ./travis/configssh.sh
-  - sudo -E ./travis/restart_hadoop_spark.sh
-  - cp ./travis/hadoop.conf ./conf/
-  - cp ./travis/spark.conf ./conf/
-  - /opt/hadoop-2.6.5/bin/yarn node -list 2
-  - sudo -E ./bin/run_all.sh
+  - |
+    if [[ "$java_ver" == 11 ]]; then
+        mvn clean package -q -Psparkbench -Phadoopbench -Dmaven.javadoc.skip=true -Dhadoop=3.2 -Dspark=2.4 -Dscala=2.12 -Dexclude-streaming
+    elif [[ "$java_ver" == 8 ]]; then
+        mvn clean package -q -Dmaven.javadoc.skip=true -Dhadoop=3.2 -Dspark=2.4 -Dscala=2.11
+    elif [[ "$java_ver" == 7 ]]; then
+        mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.2 -Dscala=2.11
+        mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=2.0 -Dscala=2.11
+        mvn clean package -q -Dmaven.javadoc.skip=true -Dspark=1.6 -Dscala=2.10
+    else
+        exit 1
+    fi
+
+    sudo -E ./travis/configssh.sh
+    sudo -E ./travis/restart_hadoop_spark.sh
+    sudo -E ./bin/run_all.sh
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+        <property>
+           <name>fs.default.name</name>
+           <value>hdfs://localhost:9000</value>
+        </property>
+
+</configuration>
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+        <property>
+           <name>dfs.replication</name>
+           <value>1</value>
+         </property>
+         <property>
+           <name>dfs.namenode.name.dir</name>
+           <value>/usr/local/hdfs/namenode</value>
+         </property>
+         <property>
+           <name>dfs.datanode.data.dir</name>
+           <value>/usr/local/hdfs/datanode</value>
+         </property>
+  <property>
+    <name>dfs.client.use.datanode.hostname</name>
+    <value>true</value>
+  </property>
+</configuration>
@@ -0,0 +1,53 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+        <property>
+           <name>mapreduce.framework.name</name>
+           <value>yarn</value>
+        </property>
+
+	 <property>
+	    <name>mapreduce.application.classpath</name>
+	    <value>$HADOOP_HOME/share/hadoop/common/*,
+		     $HADOOP_HOME/share/hadoop/common/lib/*,
+		     $HADOOP_HOME/share/hadoop/hdfs/*,
+		     $HADOOP_HOME/share/hadoop/hdfs/lib/*,
+		     $HADOOP_HOME/share/hadoop/yarn/*,
+		     $HADOOP_HOME/share/hadoop/yarn/lib/*,
+		     $HADOOP_HOME/share/hadoop/mapreduce/*,
+		     $HADOOP_HOME/share/hadoop/mapreduce/lib/*
+	    </value>
+	  </property>
+
+	  <property>
+	    <name>yarn.app.mapreduce.am.env</name>
+	    <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
+	  </property>
+
+	  <property>
+	    <name>mapreduce.map.env</name>
+	    <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
+	  </property>
+
+	  <property>
+	    <name>mapreduce.reduce.env</name>
+	    <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
+	  </property>
+
+</configuration>
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read when launching programs locally with
+# ./bin/run-example or ./bin/spark-submit
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
+# - SPARK_CLASSPATH, default classpath entries to append
+
+# Options read by executors and drivers running inside the cluster
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
+# - SPARK_CLASSPATH, default classpath entries to append
+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
+
+# Options read in YARN client mode
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
+# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’)
+# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
+# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
+
+# Options for the daemons used in the standalone deploy mode
+export SPARK_MASTER_IP= localhost
+export SPARK_DIST_CLASSPATH=$(/opt/$HADOOP_BINARIES_FOLDER/bin/hadoop classpath)
+#i, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
+# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
+# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)