Skip to content
This repository was archived by the owner on Dec 15, 2025. It is now read-only.

Commit feeca9a

Browse files
author
Meng, Peng
authored
Merge pull request #518 from jtengyp/linear
change the form of arguments of Linear Regression to OptionParser
2 parents 0155d9b + 6544741 commit feeca9a

4 files changed

Lines changed: 53 additions & 24 deletions

File tree

bin/functions/hibench_prop_env_mapping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,10 @@
106106
NUM_ITERATIONS_ALS="hibench.als.num_iterations",
107107
LAMBDA_ALS="hibench.als.Lambda",
108108
KYRO_ALS="hibench.als.kyro",
109-
110109
# For PCA
111110
NUM_EXAMPLES_PCA="hibench.pca.examples",
112111
NUM_FEATURES_PCA="hibench.pca.features",
113112
MAX_RESULT_SIZE_PCA ="hibench.pca.maxresultsize",
114-
115113
# For Gradient Boosting Tree
116114
NUM_EXAMPLES_GBT="hibench.gbt.examples",
117115
NUM_FEATURES_GBT="hibench.gbt.features",
@@ -136,6 +134,8 @@
136134
# For Linear Regression
137135
NUM_EXAMPLES_LINEAR="hibench.linear.examples",
138136
NUM_FEATURES_LINEAR="hibench.linear.features",
137+
NUM_ITERATIONS_LINEAR="hibench.linear.numIterations",
138+
STEPSIZE_LINEAR="hibench.linear.stepSize",
139139
# For LDA
140140
NUM_DOCUMENTS_LDA="hibench.lda.num_of_documents",
141141
NUM_VOCABULARY_LDA="hibench.lda.num_of_vocabulary",

bin/workloads/ml/linear/spark/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ rmr_hdfs $OUTPUT_HDFS || true
2626

2727
SIZE=`dir_size $INPUT_HDFS`
2828
START_TIME=`timestamp`
29-
run_spark_job com.intel.hibench.sparkbench.ml.LinearRegression ${INPUT_HDFS}
29+
run_spark_job com.intel.hibench.sparkbench.ml.LinearRegression --numIterations $NUM_ITERATIONS_LINEAR --stepSize $STEPSIZE_LINEAR $INPUT_HDFS
3030
END_TIME=`timestamp`
3131

3232
gen_report ${START_TIME} ${END_TIME} ${SIZE}

conf/workloads/ml/linear.conf

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
1-
hibench.linear.tiny.examples 50000
2-
hibench.linear.tiny.features 10000
3-
hibench.linear.small.examples 100000
4-
hibench.linear.small.features 20000
5-
hibench.linear.large.examples 200000
6-
hibench.linear.large.features 30000
7-
hibench.linear.huge.examples 300000
8-
hibench.linear.huge.features 50000
9-
hibench.linear.gigantic.examples 500000
10-
hibench.linear.gigantic.features 80000
11-
hibench.linear.bigdata.examples 1000000
12-
hibench.linear.bigdata.features 100000
1+
hibench.linear.tiny.examples 50000
2+
hibench.linear.tiny.features 10000
3+
hibench.linear.small.examples 100000
4+
hibench.linear.small.features 20000
5+
hibench.linear.large.examples 200000
6+
hibench.linear.large.features 30000
7+
hibench.linear.huge.examples 300000
8+
hibench.linear.huge.features 50000
9+
hibench.linear.gigantic.examples 500000
10+
hibench.linear.gigantic.features 80000
11+
hibench.linear.bigdata.examples 1000000
12+
hibench.linear.bigdata.features 100000
1313

1414
hibench.linear.examples ${hibench.linear.${hibench.scale.profile}.examples}
1515
hibench.linear.features ${hibench.linear.${hibench.scale.profile}.features}
1616
hibench.linear.partitions ${hibench.default.map.parallelism}
1717

18+
hibench.linear.numIterations 100
19+
hibench.linear.stepSize 0.00001
20+
1821
hibench.workload.input ${hibench.hdfs.data.dir}/Linear/Input
1922
hibench.workload.output ${hibench.hdfs.data.dir}/Linear/Output

sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/LinearRegression.scala

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,50 @@ import org.apache.spark.mllib.regression.LinearRegressionModel
2424
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
2525
import org.apache.spark.rdd.RDD
2626

27+
import scopt.OptionParser
28+
2729
object LinearRegression {
2830

31+
case class Params(
32+
dataPath: String = null,
33+
numIterations: Int = 100,
34+
stepSize: Double = 0.00000001
35+
)
36+
2937
def main(args: Array[String]): Unit = {
30-
var inputPath = ""
38+
val defaultParams = Params()
3139

32-
if (args.length == 1) {
33-
inputPath = args(0)
40+
val parser = new OptionParser[Params]("Linear"){
41+
head("Linear Regression: an example of linear regression with SGD optimizer")
42+
opt[Int]("numIterations")
43+
.text(s"numIterations, default: ${defaultParams.numIterations}")
44+
.action((x,c) => c.copy(numIterations = x))
45+
opt[Double]("stepSize")
46+
.text(s"stepSize, default: ${defaultParams.stepSize}")
47+
.action((x,c) => c.copy(stepSize = x))
48+
arg[String]("<dataPath>")
49+
.required()
50+
.text("Input path for data")
51+
.action((x,c) => c.copy(dataPath = x))
3452
}
35-
36-
val conf = new SparkConf().setAppName("LinearRegressionWithSGD")
53+
parser.parse(args, defaultParams) match {
54+
case Some(params) => run(params)
55+
case _ => sys.exit(1)
56+
}
57+
}
58+
59+
def run(params: Params): Unit = {
60+
val conf = new SparkConf().setAppName(s"LinearRegressionWithSGD with $params")
3761
val sc = new SparkContext(conf)
62+
63+
val dataPath = params.dataPath
64+
val numIterations = params.numIterations
65+
val stepSize = params.stepSize
3866

39-
// Load training data in LIBSVM format.
40-
val data: RDD[LabeledPoint] = sc.objectFile(inputPath)
67+
// Load training data in LabeledPoint format.
68+
val data: RDD[LabeledPoint] = sc.objectFile(dataPath)
4169

4270
// Building the model
43-
val numIterations = 100
44-
val stepSize = 0.00000001
4571
val model = LinearRegressionWithSGD.train(data, numIterations, stepSize)
4672

4773
// Evaluate model on training examples and compute training error

0 commit comments

Comments
 (0)