@@ -379,31 +379,6 @@ def probe_hadoop_release():
379379 "hibench.hadoop.release" ] != "cdh4" , "Hadoop release CDH4 is not supported in HiBench6.0, please upgrade to CDH5 or use Apache Hadoop/HDP"
380380
381381
382- def probe_spark_version ():
383- # probe spark version
384- if not HibenchConf .get ("hibench.spark.version" , "" ):
385- spark_home = HibenchConf .get ("hibench.spark.home" , "" )
386- assert spark_home , "`hibench.spark.home` undefined, please fix it and retry"
387- try :
388- release_file = join (spark_home , "RELEASE" )
389- with open (release_file ) as f :
390- spark_version_raw = f .readlines ()[0 ]
391- # spark_version_raw="Spark 1.2.2-SNAPSHOT (git revision
392- # f9d8c5e) built for Hadoop 1.0.4\n" # version sample
393- spark_version = spark_version_raw .split ()[1 ].strip ()
394- HibenchConfRef ["hibench.spark.version" ] = "Probed from file %s, parsed by value:%s" % (
395- release_file , spark_version_raw )
396- except IOError as e : # no release file, fall back to hard way
397- log ("Probing spark verison, may last long at first time..." )
398- shell_cmd = '( cd %s; mvn help:evaluate -Dexpression=project.version 2> /dev/null | grep -v "INFO" | tail -n 1)' % spark_home
399- spark_version = shell (shell_cmd , timeout = 600 ).strip ()
400- HibenchConfRef ["hibench.spark.version" ] = "Probed by shell command: %s, value: %s" % (
401- shell_cmd , spark_version )
402-
403- assert spark_version , "Spark version probe failed, please override `hibench.spark.version` to explicitly define this property"
404- HibenchConf ["hibench.spark.version" ] = "spark" + spark_version [:3 ]
405-
406-
407382def probe_hadoop_examples_jars ():
408383 # probe hadoop example jars
409384 if not HibenchConf .get ("hibench.hadoop.examples.jar" , "" ):
@@ -511,6 +486,47 @@ def probe_spark_master_webui_port():
511486def probe_spark_worker_webui_port ():
512487 return probe_spark_conf_value ("SPARK_WORKER_WEBUI_PORT" , "8081" )
513488
489+ def probe_masters_slaves_by_Yarn ():
490+ yarn_executable = os .path .join (os .path .dirname (
491+ HibenchConf ['hibench.hadoop.executable' ]), "yarn" )
492+ cmd = "( " + yarn_executable + \
493+ " node -list 2> /dev/null | grep RUNNING )"
494+ try :
495+ worker_hostnames = [
496+ line .split (":" )[0 ] for line in shell (cmd ).split ("\n " )]
497+ HibenchConf ['hibench.slaves.hostnames' ] = " " .join (
498+ worker_hostnames )
499+ HibenchConfRef [
500+ 'hibench.slaves.hostnames' ] = "Probed by parsing results from: " + cmd
501+
502+ # parse yarn resource manager from hadoop conf
503+ yarn_site_file = os .path .join (
504+ HibenchConf ["hibench.hadoop.configure.dir" ], "yarn-site.xml" )
505+ with open (yarn_site_file ) as f :
506+ file_content = f .read ()
507+ match_address = re .findall (
508+ "\<property\>\s*\<name\>\s*yarn.resourcemanager.address[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>" ,
509+ file_content )
510+ match_hostname = re .findall (
511+ "\<property\>\s*\<name\>\s*yarn.resourcemanager.hostname[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>" ,
512+ file_content )
513+ if match_address :
514+ resourcemanager_hostname = match_address [0 ][0 ]
515+ HibenchConf [
516+ 'hibench.masters.hostnames' ] = resourcemanager_hostname
517+ HibenchConfRef [
518+ 'hibench.masters.hostnames' ] = "Parsed from " + yarn_site_file
519+ elif match_hostname :
520+ resourcemanager_hostname = match_hostname [0 ][0 ]
521+ HibenchConf [
522+ 'hibench.masters.hostnames' ] = resourcemanager_hostname
523+ HibenchConfRef [
524+ 'hibench.masters.hostnames' ] = "Parsed from " + yarn_site_file
525+ else :
526+ assert 0 , "Unknown resourcemanager, please check `hibench.hadoop.configure.dir` and \" yarn-site.xml\" file"
527+ except Exception as e :
528+ assert 0 , "Get workers from yarn-site.xml page failed, reason:%s\n please set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually" % e
529+
514530
515531def probe_masters_slaves_hostnames ():
516532
@@ -522,80 +538,44 @@ def probe_masters_slaves_hostnames():
522538 "" ) and HibenchConf .get (
523539 "hibench.slaves.hostnames" ,
524540 "" )): # no pre-defined hostnames, let's probe
525- spark_master = HibenchConf ['hibench.spark.master' ]
526- # local mode
527- if spark_master .startswith ("local" ):
528- HibenchConf ['hibench.masters.hostnames' ] = '' # no master
529- # localhost as slaves
530- HibenchConf ['hibench.slaves.hostnames' ] = 'localhost'
531- HibenchConfRef ['hibench.masters.hostnames' ] = HibenchConfRef [
532- 'hibench.slaves.hostnames' ] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
533- # spark standalone mode
534- elif spark_master .startswith ("spark" ):
535- HibenchConf ['hibench.masters.hostnames' ] = spark_master [8 :].split (":" )[
536- 0 ]
537- HibenchConfRef [
538- 'hibench.masters.hostnames' ] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
539- try :
540- log (spark_master , HibenchConf ['hibench.masters.hostnames' ])
541- master_port = probe_spark_master_webui_port ()
542- worker_port = probe_spark_worker_webui_port ()
543- # Make the assumption that the master is in internal network, and force not to use any proxies
544- with closing (urllib .urlopen ('http://%s:%s' % (HibenchConf ['hibench.masters.hostnames' ], master_port ), proxies = {})) as page :
545- worker_hostnames = [
546- re .findall (
547- "http:\/\/([a-zA-Z\-\._0-9]+):%s" %
548- worker_port ,
549- x )[0 ] for x in page .readlines () if "%s" %
550- worker_port in x and "worker" in x ]
551- HibenchConf ['hibench.slaves.hostnames' ] = " " .join (
552- worker_hostnames )
553- HibenchConfRef ['hibench.slaves.hostnames' ] = "Probed by parsing " + \
554- 'http://%s:%s' % (HibenchConf ['hibench.masters.hostnames' ], master_port )
555- except Exception as e :
556- assert 0 , "Get workers from spark master's web UI page failed, \n Please check your configurations, network settings, proxy settings, or set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually, master_port: %s, slave_port:%s" % (master_port , worker_port )
557- # yarn mode
558- elif spark_master .startswith ("yarn" ):
559- yarn_executable = os .path .join (os .path .dirname (
560- HibenchConf ['hibench.hadoop.executable' ]), "yarn" )
561- cmd = "( " + yarn_executable + \
562- " node -list 2> /dev/null | grep RUNNING )"
563- try :
564- worker_hostnames = [
565- line .split (":" )[0 ] for line in shell (cmd ).split ("\n " )]
566- HibenchConf ['hibench.slaves.hostnames' ] = " " .join (
567- worker_hostnames )
541+ if not (HibenchConf .get ("hibench.spark.master" , "" )):
542+ probe_masters_slaves_by_Yarn ()
543+ else :
544+ spark_master = HibenchConf ['hibench.spark.master' ]
545+ # local mode
546+ if spark_master .startswith ("local" ):
547+ HibenchConf ['hibench.masters.hostnames' ] = '' # no master
548+ # localhost as slaves
549+ HibenchConf ['hibench.slaves.hostnames' ] = 'localhost'
550+ HibenchConfRef ['hibench.masters.hostnames' ] = HibenchConfRef [
551+ 'hibench.slaves.hostnames' ] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
552+ # spark standalone mode
553+ elif spark_master .startswith ("spark" ):
554+ HibenchConf ['hibench.masters.hostnames' ] = spark_master [8 :].split (":" )[
555+ 0 ]
568556 HibenchConfRef [
569- 'hibench.slaves.hostnames' ] = "Probed by parsing results from: " + cmd
570-
571- # parse yarn resource manager from hadoop conf
572- yarn_site_file = os .path .join (
573- HibenchConf ["hibench.hadoop.configure.dir" ], "yarn-site.xml" )
574- with open (yarn_site_file ) as f :
575- file_content = f .read ()
576- match_address = re .findall (
577- "\<property\>\s*\<name\>\s*yarn.resourcemanager.address[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>" ,
578- file_content )
579- match_hostname = re .findall (
580- "\<property\>\s*\<name\>\s*yarn.resourcemanager.hostname[.\w\s]*\<\/name\>\s*\<value\>([a-zA-Z\-\._0-9]+)(:\d+)?\<\/value\>" ,
581- file_content )
582- if match_address :
583- resourcemanager_hostname = match_address [0 ][0 ]
584- HibenchConf [
585- 'hibench.masters.hostnames' ] = resourcemanager_hostname
586- HibenchConfRef [
587- 'hibench.masters.hostnames' ] = "Parsed from " + yarn_site_file
588- elif match_hostname :
589- resourcemanager_hostname = match_hostname [0 ][0 ]
590- HibenchConf [
591- 'hibench.masters.hostnames' ] = resourcemanager_hostname
592- HibenchConfRef [
593- 'hibench.masters.hostnames' ] = "Parsed from " + yarn_site_file
594- else :
595- assert 0 , "Unknown resourcemanager, please check `hibench.hadoop.configure.dir` and \" yarn-site.xml\" file"
596- except Exception as e :
597- assert 0 , "Get workers from yarn-site.xml page failed, reason:%s\n please set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually" % e
598-
557+ 'hibench.masters.hostnames' ] = "Probed by the evidence of 'hibench.spark.master=%s'" % spark_master
558+ try :
559+ log (spark_master , HibenchConf ['hibench.masters.hostnames' ])
560+ master_port = probe_spark_master_webui_port ()
561+ worker_port = probe_spark_worker_webui_port ()
562+ # Make the assumption that the master is in internal network, and force not to use any proxies
563+ with closing (urllib .urlopen ('http://%s:%s' % (HibenchConf ['hibench.masters.hostnames' ], master_port ), proxies = {})) as page :
564+ worker_hostnames = [
565+ re .findall (
566+ "http:\/\/([a-zA-Z\-\._0-9]+):%s" %
567+ worker_port ,
568+ x )[0 ] for x in page .readlines () if "%s" %
569+ worker_port in x and "worker" in x ]
570+ HibenchConf ['hibench.slaves.hostnames' ] = " " .join (
571+ worker_hostnames )
572+ HibenchConfRef ['hibench.slaves.hostnames' ] = "Probed by parsing " + \
573+ 'http://%s:%s' % (HibenchConf ['hibench.masters.hostnames' ], master_port )
574+ except Exception as e :
575+ assert 0 , "Get workers from spark master's web UI page failed, \n Please check your configurations, network settings, proxy settings, or set `hibench.masters.hostnames` and `hibench.slaves.hostnames` manually, master_port: %s, slave_port:%s" % (master_port , worker_port )
576+ # yarn mode
577+ elif spark_master .startswith ("yarn" ):
578+ probe_masters_slaves_by_Yarn ()
599579 # reset hostnames according to gethostbyaddr
600580 names = set (
601581 HibenchConf ['hibench.masters.hostnames' ].split () +
@@ -658,7 +638,6 @@ def generate_optional_value():
658638
659639 probe_java_bin ()
660640 probe_hadoop_release ()
661- probe_spark_version ()
662641 probe_hadoop_examples_jars ()
663642 probe_hadoop_examples_test_jars ()
664643 probe_sleep_job_jar ()
@@ -676,7 +655,6 @@ def export_config(workload_name, framework_name):
676655
677656 spark_conf_dir = join (conf_dir , "sparkbench" )
678657 spark_prop_conf_filename = join (spark_conf_dir , "spark.conf" )
679- samza_prop_conf_filename = join (spark_conf_dir , "samza.conf" )
680658 sparkbench_prop_conf_filename = join (spark_conf_dir , "sparkbench.conf" )
681659
682660 if not os .path .exists (spark_conf_dir ):
@@ -702,7 +680,6 @@ def export_config(workload_name, framework_name):
702680 "SPARKBENCH_PROPERTIES_FILES=%s\n " %
703681 sparkbench_prop_conf_filename )
704682 f .write ("SPARK_PROP_CONF=%s\n " % spark_prop_conf_filename )
705- f .write ("SAMZA_PROP_CONF=%s\n " % samza_prop_conf_filename )
706683 f .write ("WORKLOAD_RESULT_FOLDER=%s\n " % join (conf_dir , ".." ))
707684 f .write ("HIBENCH_WORKLOAD_CONF=%s\n " % conf_filename )
708685 f .write ("export HADOOP_EXECUTABLE\n " )
@@ -721,14 +698,6 @@ def export_config(workload_name, framework_name):
721698 f .write ("# Source: %s\n " % source )
722699 f .write ("\n " .join (sorted (items )))
723700 f .write ("\n \n " )
724- # generate configure for samza
725- with open (samza_prop_conf_filename , 'w' ) as f :
726- for source in sorted (sources .keys ()):
727- items = [x for x in sources [source ] if x .startswith ("samza." )]
728- if items :
729- f .write ("# Source: %s\n " % source )
730- f .write ("\n " .join (sorted (items )))
731- f .write ("\n \n " )
732701 # generate configure for spark
733702 with open (sparkbench_prop_conf_filename , 'w' ) as f :
734703 for source in sorted (sources .keys ()):
0 commit comments