Skip to content
This repository was archived by the owner on Dec 15, 2025. It is now read-only.

Commit aada472

Browse files
committed
clean up some confs
1 parent b26bb83 commit aada472

5 files changed

Lines changed: 93 additions & 176 deletions

File tree

bin/build-all.sh

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,5 @@ DIR=`dirname "$0"`
1919
DIR=`cd "${DIR}/.."; pwd`
2020

2121
CURDIR=`pwd`
22-
cd $DIR/src
23-
mvn clean package && \
24-
cd $DIR/src/sparkbench && \
25-
( mkdir jars | true )
26-
27-
for mr in MR1 MR2; do
28-
for spark_version in 1.2 1.3 1.4 1.5 1.6; do
29-
cp target/*-jar-with-dependencies.jar jars
30-
mvn clean package -D spark$spark_version -D $mr
31-
if [ $? -ne 0 ]; then
32-
echo "Build failed for spark$spark_version and $mr, please check!"
33-
exit 1
34-
fi
35-
done
36-
done
37-
cp jars/*.jar target/ && \
38-
rm -rf jars
39-
40-
cd $DIR/src/streambench/sparkbench && \
41-
( mkdir jars | true )
42-
for spark_version in 1.3 1.4 1.5 1.6; do
43-
cp target/*-jar-with-dependencies.jar jars
44-
mvn clean package -D spark$spark_version
45-
if [ $? -ne 0 ]; then
46-
echo "Build failed for spark$spark_version, please check!"
47-
exit 1
48-
fi
49-
done
50-
cp jars/*.jar target/ && \
51-
rm -rf jars
52-
53-
result=$?
54-
cd $CURDIR
55-
56-
if [ $result -ne 0 ]; then
57-
echo "Build failed, please check!"
58-
else
59-
echo "Build all done!"
60-
fi
22+
cd $DIR
23+
mvn clean package

bin/functions/hibench_prop_env_mapping.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,11 @@
2020
HiBenchEnvPropMappingMandatory=dict(
2121
JAVA_BIN="java.bin",
2222
HADOOP_HOME="hibench.hadoop.home",
23-
SPARK_HOME="hibench.spark.home",
2423
HDFS_MASTER="hibench.hdfs.master",
25-
SPARK_MASTER="hibench.spark.master",
26-
HADOOP_VERSION="hibench.hadoop.version",
2724
HADOOP_RELEASE="hibench.hadoop.release",
2825
HADOOP_EXAMPLES_JAR="hibench.hadoop.examples.jar",
2926
HADOOP_EXECUTABLE="hibench.hadoop.executable",
3027
HADOOP_CONF_DIR="hibench.hadoop.configure.dir",
31-
SPARK_VERSION="hibench.spark.version",
3228
HIBENCH_HOME="hibench.home",
3329
HIBENCH_CONF="hibench.configure.dir",
3430

@@ -47,6 +43,8 @@
4743
)
4844

4945
HiBenchEnvPropMapping=dict(
46+
SPARK_HOME="hibench.spark.home",
47+
SPARK_MASTER="hibench.spark.master",
5048
SPARK_EXAMPLES_JAR="hibench.spark.examples.jar",
5149

5250
HIVE_HOME="hibench.hive.home",

bin/functions/load_config.py

Lines changed: 78 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -379,31 +379,6 @@ def probe_hadoop_release():
379379
"hibench.hadoop.release"] != "cdh4", "Hadoop release CDH4 is not supported in HiBench6.0, please upgrade to CDH5 or use Apache Hadoop/HDP"
380380

381381

382-
def probe_spark_version():
383-
# probe spark version
384-
if not HibenchConf.get("hibench.spark.version", ""):
385-
spark_home = HibenchConf.get("hibench.spark.home", "")
386-
assert spark_home, "`hibench.spark.home` undefined, please fix it and retry"
387-
try:
388-
release_file = join(spark_home, "RELEASE")
389-
with open(release_file) as f:
390-
spark_version_raw = f.readlines()[0]
391-
# spark_version_raw="Spark 1.2.2-SNAPSHOT (git revision
392-
# f9d8c5e) built for Hadoop 1.0.4\n" # version sample
393-
spark_version = spark_version_raw.split()[1].strip()
394-
HibenchConfRef["hibench.spark.version"] = "Probed from file %s, parsed by value:%s" % (
395-
release_file, spark_version_raw)
396-
except IOError as e: # no release file, fall back to hard way
397-
log("Probing spark verison, may last long at first time...")
398-
shell_cmd = '( cd %s; mvn help:evaluate -Dexpression=project.version 2> /dev/null | grep -v "INFO" | tail -n 1)' % spark_home
399-
spark_version = shell(shell_cmd, timeout=600).strip()
400-
HibenchConfRef["hibench.spark.version"] = "Probed by shell command: %s, value: %s" % (
401-
shell_cmd, spark_version)
402-
403-
assert spark_version, "Spark version probe failed, please override `hibench.spark.version` to explicitly define this property"
404-
HibenchConf["hibench.spark.version"] = "spark" + spark_version[:3]
405-
406-
407382
def probe_hadoop_examples_jars():
408383
# probe hadoop example jars
409384
if not HibenchConf.get("hibench.hadoop.examples.jar", ""):
@@ -511,6 +486,47 @@ def probe_spark_master_webui_port():
511486
def probe_spark_worker_webui_port():
512487
return probe_spark_conf_value("SPARK_WORKER_WEBUI_PORT", "8081")
513488

489+
def probe_masters_slaves_by_Yarn():
490+
yarn_executable = os.path.join(os.path.dirname(
491+
HibenchConf['hibench.hadoop.executable']), "yarn")
492+
cmd = "( " + yarn_executable + \
493+
" node -list 2> /dev/null | grep RUNNING )"
494+
try:
495+
worker_hostnames = [
496+
line.split(":")[0] for line in shell(cmd).split("\n")]
497+
HibenchConf['hibench.slaves.hostnames'] = " ".join(
498+
worker_hostnames)
499+
HibenchConfRef[
500+
'hibench.slaves.hostnames'] = "Probed by parsing results from: " + cmd
501+
502+
# parse yarn resource manager from hadoop conf
503+
yarn_site_file = os.path.join(
504+
HibenchConf["hibench.hadoop.configure.dir"], "yarn-site.xml")
505+
with open(yarn_site_file) as f:
506+
file_content = f.read()
507+
match_address = re.findall(
508+
"\<property\>\s*\<name\>\s*yarn.resourcemanager.address[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>",
509+
file_content)
510+
match_hostname = re.findall(
511+
"\<property\>\s*\<name\>\s*yarn.resourcemanager.hostname[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>",
512+
file_content)
513+
if match_address:
514+
resourcemanager_hostname = match_address[0][0]
515+
HibenchConf[
516+
'hibench.masters.hostnames'] = resourcemanager_hostname
517+
HibenchConfRef[
518+
'hibench.masters.hostnames'] = "Parsed from " + yarn_site_file
519+
elif match_hostname:
520+
resourcemanager_hostname = match_hostname[0][0]
521+
HibenchConf[
522+
'hibench.masters.hostnames'] = resourcemanager_hostname
523+
HibenchConfRef[
524+
'hibench.masters.hostnames'] = "Parsed from " + yarn_site_file
525+
else:
526+
assert 0, "Unknown resourcemanager, please check `hibench.hadoop.configure.dir` and \"yarn-site.xml\" file"
527+
except Exception as e:
528+
assert 0, "Get workers from yarn-site.xml page failed, reason:%s\nplease set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually" % e
529+
514530

515531
def probe_masters_slaves_hostnames():
516532

@@ -522,80 +538,44 @@ def probe_masters_slaves_hostnames():
522538
"") and HibenchConf.get(
523539
"hibench.slaves.hostnames",
524540
"")): # no pre-defined hostnames, let's probe
525-
spark_master = HibenchConf['hibench.spark.master']
526-
# local mode
527-
if spark_master.startswith("local"):
528-
HibenchConf['hibench.masters.hostnames'] = '' # no master
529-
# localhost as slaves
530-
HibenchConf['hibench.slaves.hostnames'] = 'localhost'
531-
HibenchConfRef['hibench.masters.hostnames'] = HibenchConfRef[
532-
'hibench.slaves.hostnames'] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
533-
# spark standalone mode
534-
elif spark_master.startswith("spark"):
535-
HibenchConf['hibench.masters.hostnames'] = spark_master[8:].split(":")[
536-
0]
537-
HibenchConfRef[
538-
'hibench.masters.hostnames'] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
539-
try:
540-
log(spark_master, HibenchConf['hibench.masters.hostnames'])
541-
master_port = probe_spark_master_webui_port()
542-
worker_port = probe_spark_worker_webui_port()
543-
# Make the assumption that the master is in internal network, and force not to use any proxies
544-
with closing(urllib.urlopen('http://%s:%s' % (HibenchConf['hibench.masters.hostnames'], master_port), proxies={})) as page:
545-
worker_hostnames = [
546-
re.findall(
547-
"http:\/\/([a-zA-Z\-\._0-9]+):%s" %
548-
worker_port,
549-
x)[0] for x in page.readlines() if "%s" %
550-
worker_port in x and "worker" in x]
551-
HibenchConf['hibench.slaves.hostnames'] = " ".join(
552-
worker_hostnames)
553-
HibenchConfRef['hibench.slaves.hostnames'] = "Probed by parsing " + \
554-
'http://%s:%s' % (HibenchConf['hibench.masters.hostnames'], master_port)
555-
except Exception as e:
556-
assert 0, "Get workers from spark master's web UI page failed, \nPlease check your configurations, network settings, proxy settings, or set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually, master_port: %s, slave_port:%s" % (master_port, worker_port)
557-
# yarn mode
558-
elif spark_master.startswith("yarn"):
559-
yarn_executable = os.path.join(os.path.dirname(
560-
HibenchConf['hibench.hadoop.executable']), "yarn")
561-
cmd = "( " + yarn_executable + \
562-
" node -list 2> /dev/null | grep RUNNING )"
563-
try:
564-
worker_hostnames = [
565-
line.split(":")[0] for line in shell(cmd).split("\n")]
566-
HibenchConf['hibench.slaves.hostnames'] = " ".join(
567-
worker_hostnames)
541+
if not (HibenchConf.get("hibench.spark.master", "")):
542+
probe_masters_slaves_by_Yarn()
543+
else:
544+
spark_master = HibenchConf['hibench.spark.master']
545+
# local mode
546+
if spark_master.startswith("local"):
547+
HibenchConf['hibench.masters.hostnames'] = '' # no master
548+
# localhost as slaves
549+
HibenchConf['hibench.slaves.hostnames'] = 'localhost'
550+
HibenchConfRef['hibench.masters.hostnames'] = HibenchConfRef[
551+
'hibench.slaves.hostnames'] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
552+
# spark standalone mode
553+
elif spark_master.startswith("spark"):
554+
HibenchConf['hibench.masters.hostnames'] = spark_master[8:].split(":")[
555+
0]
568556
HibenchConfRef[
569-
'hibench.slaves.hostnames'] = "Probed by parsing results from: " + cmd
570-
571-
# parse yarn resource manager from hadoop conf
572-
yarn_site_file = os.path.join(
573-
HibenchConf["hibench.hadoop.configure.dir"], "yarn-site.xml")
574-
with open(yarn_site_file) as f:
575-
file_content = f.read()
576-
match_address = re.findall(
577-
"\<property\>\s*\<name\>\s*yarn.resourcemanager.address[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>",
578-
file_content)
579-
match_hostname = re.findall(
580-
"\<property\>\s*\<name\>\s*yarn.resourcemanager.hostname[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>",
581-
file_content)
582-
if match_address:
583-
resourcemanager_hostname = match_address[0][0]
584-
HibenchConf[
585-
'hibench.masters.hostnames'] = resourcemanager_hostname
586-
HibenchConfRef[
587-
'hibench.masters.hostnames'] = "Parsed from " + yarn_site_file
588-
elif match_hostname:
589-
resourcemanager_hostname = match_hostname[0][0]
590-
HibenchConf[
591-
'hibench.masters.hostnames'] = resourcemanager_hostname
592-
HibenchConfRef[
593-
'hibench.masters.hostnames'] = "Parsed from " + yarn_site_file
594-
else:
595-
assert 0, "Unknown resourcemanager, please check `hibench.hadoop.configure.dir` and \"yarn-site.xml\" file"
596-
except Exception as e:
597-
assert 0, "Get workers from yarn-site.xml page failed, reason:%s\nplease set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually" % e
598-
557+
'hibench.masters.hostnames'] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
558+
try:
559+
log(spark_master, HibenchConf['hibench.masters.hostnames'])
560+
master_port = probe_spark_master_webui_port()
561+
worker_port = probe_spark_worker_webui_port()
562+
# Make the assumption that the master is in internal network, and force not to use any proxies
563+
with closing(urllib.urlopen('http://%s:%s' % (HibenchConf['hibench.masters.hostnames'], master_port), proxies={})) as page:
564+
worker_hostnames = [
565+
re.findall(
566+
"http:\/\/([a-zA-Z\-\._0-9]+):%s" %
567+
worker_port,
568+
x)[0] for x in page.readlines() if "%s" %
569+
worker_port in x and "worker" in x]
570+
HibenchConf['hibench.slaves.hostnames'] = " ".join(
571+
worker_hostnames)
572+
HibenchConfRef['hibench.slaves.hostnames'] = "Probed by parsing " + \
573+
'http://%s:%s' % (HibenchConf['hibench.masters.hostnames'], master_port)
574+
except Exception as e:
575+
assert 0, "Get workers from spark master's web UI page failed, \nPlease check your configurations, network settings, proxy settings, or set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually, master_port: %s, slave_port:%s" % (master_port, worker_port)
576+
# yarn mode
577+
elif spark_master.startswith("yarn"):
578+
probe_masters_slaves_by_Yarn()
599579
# reset hostnames according to gethostbyaddr
600580
names = set(
601581
HibenchConf['hibench.masters.hostnames'].split() +
@@ -658,7 +638,6 @@ def generate_optional_value():
658638

659639
probe_java_bin()
660640
probe_hadoop_release()
661-
probe_spark_version()
662641
probe_hadoop_examples_jars()
663642
probe_hadoop_examples_test_jars()
664643
probe_sleep_job_jar()
@@ -676,7 +655,6 @@ def export_config(workload_name, framework_name):
676655

677656
spark_conf_dir = join(conf_dir, "sparkbench")
678657
spark_prop_conf_filename = join(spark_conf_dir, "spark.conf")
679-
samza_prop_conf_filename = join(spark_conf_dir, "samza.conf")
680658
sparkbench_prop_conf_filename = join(spark_conf_dir, "sparkbench.conf")
681659

682660
if not os.path.exists(spark_conf_dir):
@@ -702,7 +680,6 @@ def export_config(workload_name, framework_name):
702680
"SPARKBENCH_PROPERTIES_FILES=%s\n" %
703681
sparkbench_prop_conf_filename)
704682
f.write("SPARK_PROP_CONF=%s\n" % spark_prop_conf_filename)
705-
f.write("SAMZA_PROP_CONF=%s\n" % samza_prop_conf_filename)
706683
f.write("WORKLOAD_RESULT_FOLDER=%s\n" % join(conf_dir, ".."))
707684
f.write("HIBENCH_WORKLOAD_CONF=%s\n" % conf_filename)
708685
f.write("export HADOOP_EXECUTABLE\n")
@@ -721,14 +698,6 @@ def export_config(workload_name, framework_name):
721698
f.write("# Source: %s\n" % source)
722699
f.write("\n".join(sorted(items)))
723700
f.write("\n\n")
724-
# generate configure for samza
725-
with open(samza_prop_conf_filename, 'w') as f:
726-
for source in sorted(sources.keys()):
727-
items = [x for x in sources[source] if x.startswith("samza.")]
728-
if items:
729-
f.write("# Source: %s\n" % source)
730-
f.write("\n".join(sorted(items)))
731-
f.write("\n\n")
732701
# generate configure for spark
733702
with open(sparkbench_prop_conf_filename, 'w') as f:
734703
for source in sorted(sources.keys()):

bin/functions/workload-functions.sh

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,7 @@ function gen_report() { # dump the result to report file
110110

111111
function rmr-hdfs(){ # rm -r for hdfs
112112
assert $1 "dir parameter missing"
113-
if [ $HADOOP_VERSION == "hadoop1" ] && [ "$HADOOP_RELEASE" == "apache" ]; then
114-
RMDIR_CMD="fs -rmr -skipTrash"
115-
else
116-
RMDIR_CMD="fs -rm -r -skipTrash"
117-
fi
113+
RMDIR_CMD="fs -rm -r -skipTrash"
118114
local CMD="$HADOOP_EXECUTABLE --config $HADOOP_CONF_DIR $RMDIR_CMD $1"
119115
echo -e "${BCyan}hdfs rm -r: ${Cyan}${CMD}${Color_Off}" > /dev/stderr
120116
execute_withlog ${CMD}
@@ -151,11 +147,7 @@ function upload-to-hdfs(){
151147

152148
function dus-hdfs(){ # du -s for hdfs
153149
assert $1 "dir parameter missing"
154-
if [ $HADOOP_VERSION == "hadoop1" ] && [ "$HADOOP_RELEASE" == "apache" ]; then
155-
DUS_CMD="fs -dus"
156-
else
157-
DUS_CMD="fs -du -s"
158-
fi
150+
DUS_CMD="fs -du -s"
159151
local CMD="$HADOOP_EXECUTABLE --config $HADOOP_CONF_DIR $DUS_CMD $1"
160152
echo -e "${BPurple}hdfs du -s: ${Purple}${CMD}${Color_Off}" > /dev/stderr
161153
execute_withlog ${CMD}
@@ -376,17 +368,15 @@ function ensure-nutchindexing-release () {
376368
cp $NUTCH_ROOT/nutch/bin/nutch $NUTCH_HOME_WORKLOAD/bin
377369

378370
# Patching jcl-over-slf4j version against cdh or hadoop2
379-
if [ $HADOOP_VERSION == "hadoop2" ] || [ ${HADOOP_RELEASE:0:3} == "cdh" ]; then
380-
mkdir $NUTCH_HOME_WORKLOAD/temp
381-
unzip -q $NUTCH_HOME_WORKLOAD/nutch-1.2.job -d $NUTCH_HOME_WORKLOAD/temp
382-
rm -f $NUTCH_HOME_WORKLOAD/temp/lib/jcl-over-slf4j-*.jar
383-
rm -f $NUTCH_HOME_WORKLOAD/temp/lib/slf4j-log4j*.jar
384-
cp ${NUTCH_DIR}/target/dependency/jcl-over-slf4j-*.jar $NUTCH_HOME_WORKLOAD/temp/lib
385-
rm -f $NUTCH_HOME_WORKLOAD/nutch-1.2.job
386-
cd $NUTCH_HOME_WORKLOAD/temp
387-
zip -qr $NUTCH_HOME_WORKLOAD/nutch-1.2.job *
388-
rm -rf $NUTCH_HOME_WORKLOAD/temp
389-
fi
371+
mkdir $NUTCH_HOME_WORKLOAD/temp
372+
unzip -q $NUTCH_HOME_WORKLOAD/nutch-1.2.job -d $NUTCH_HOME_WORKLOAD/temp
373+
rm -f $NUTCH_HOME_WORKLOAD/temp/lib/jcl-over-slf4j-*.jar
374+
rm -f $NUTCH_HOME_WORKLOAD/temp/lib/slf4j-log4j*.jar
375+
cp ${NUTCH_DIR}/target/dependency/jcl-over-slf4j-*.jar $NUTCH_HOME_WORKLOAD/temp/lib
376+
rm -f $NUTCH_HOME_WORKLOAD/nutch-1.2.job
377+
cd $NUTCH_HOME_WORKLOAD/temp
378+
zip -qr $NUTCH_HOME_WORKLOAD/nutch-1.2.job *
379+
rm -rf $NUTCH_HOME_WORKLOAD/temp
390380

391381
echo $NUTCH_HOME_WORKLOAD
392382
}

conf/spark.conf.template

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
# Spark home
22
hibench.spark.home /PATH/TO/YOUR/SPARK/HOME
33

4-
# Spark version. Supported value: spark1.6, spark2.0
5-
hibench.spark.version spark1.6
6-
74
# Spark master
85
# standalone mode: `spark://xxx:7077`
96
# YARN mode: `yarn-client`

0 commit comments

Comments
 (0)