Skip to content
This repository was archived by the owner on Dec 15, 2025. It is now read-only.

Commit d8ff0cd

Browse files
committed
change the form of arguments of SVM to OptionParser
1 parent ff65611 commit d8ff0cd

5 files changed

Lines changed: 163 additions & 30 deletions

File tree

bin/functions/hibench_prop_env_mapping.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@
9696
# For SVM
9797
NUM_EXAMPLES_SVM="hibench.svm.examples",
9898
NUM_FEATURES_SVM="hibench.svm.examples",
99+
NUM_ITERATIONS_SVM="hibench.svm.numIterations",
100+
STEPSIZE_SVM="hibench.svm.stepSize",
101+
REGPARAM_SVM="hibench.svm.regParam",
99102
# For ALS
100103
NUM_USERS_ALS="hibench.als.users",
101104
NUM_PRODUCTS_ALS="hibench.als.products",

bin/workloads/ml/svm/spark/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ rmr_hdfs $OUTPUT_HDFS || true
2626

2727
SIZE=`dir_size $INPUT_HDFS`
2828
START_TIME=`timestamp`
29-
run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample ${INPUT_HDFS}
29+
run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample --numIterations $NUM_ITERATIONS_SVM --stepSize $STEPSIZE_SVM --regParam $REGPARAM_SVM $INPUT_HDFS
3030
END_TIME=`timestamp`
3131

3232
gen_report ${START_TIME} ${END_TIME} ${SIZE}

conf/workloads/ml/svm.conf

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
1-
hibench.svm.tiny.examples 1000
2-
hibench.svm.tiny.features 1000
3-
hibench.svm.small.examples 10000
4-
hibench.svm.small.features 10000
5-
hibench.svm.large.examples 50000
6-
hibench.svm.large.features 100000
7-
hibench.svm.huge.examples 120000
8-
hibench.svm.huge.features 300000
9-
hibench.svm.gigantic.examples 140000
10-
hibench.svm.gigantic.features 300000
11-
hibench.svm.bigdata.examples 150000
12-
hibench.svm.bigdata.features 300000
1+
hibench.svm.tiny.examples 1000
2+
hibench.svm.tiny.features 1000
3+
hibench.svm.small.examples 10000
4+
hibench.svm.small.features 10000
5+
hibench.svm.large.examples 50000
6+
hibench.svm.large.features 100000
7+
hibench.svm.huge.examples 120000
8+
hibench.svm.huge.features 300000
9+
hibench.svm.gigantic.examples 140000
10+
hibench.svm.gigantic.features 300000
11+
hibench.svm.bigdata.examples 150000
12+
hibench.svm.bigdata.features 300000
1313

1414

1515
hibench.svm.examples ${hibench.svm.${hibench.scale.profile}.examples}
1616
hibench.svm.features ${hibench.svm.${hibench.scale.profile}.features}
1717
hibench.svm.partitions ${hibench.default.map.parallelism}
1818

19-
hibench.workload.input ${hibench.hdfs.data.dir}/SVM/Input
20-
hibench.workload.output ${hibench.hdfs.data.dir}/SVM/Output
19+
hibench.svm.numIterations 100
20+
hibench.svm.stepSize 1.0
21+
hibench.svm.regParam 0.01
22+
23+
hibench.workload.input ${hibench.hdfs.data.dir}/SVM/Input
24+
hibench.workload.output ${hibench.hdfs.data.dir}/SVM/Output
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package com.intel.hibench.sparkbench.ml
19+
20+
import org.apache.spark.{SparkConf, SparkContext}
21+
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
22+
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
23+
import org.apache.spark.rdd.RDD
24+
import org.apache.spark.mllib.regression.LabeledPoint
25+
26+
import scopt.OptionParser
27+
28+
object SVMWithSGD {
29+
30+
case class Params(
31+
numIterations: Int = 100,
32+
stepSize: Double = 1.0,
33+
regParam: Double = 0.01,
34+
dataPath: String = null
35+
)
36+
37+
def main(args: Array[String]): Unit = {
38+
val defaultParams = Params()
39+
40+
val parser = new OptionParser[Params]("SVM") {
41+
head("SVM: an example of SVM for classification.")
42+
opt[Int]("numIterations")
43+
.text(s"numIterations, default: ${defaultParams.numIterations}")
44+
.action((x,c) => c.copy(numIterations = x))
45+
opt[Double]("stepSize")
46+
.text(s"stepSize, default: ${defaultParams.stepSize}")
47+
.action((x,c) => c.copy(stepSize = x))
48+
opt[Double]("regParam")
49+
.text(s"regParam, default: ${defaultParams.regParam}")
50+
.action((x,c) => c.copy(regParam = x))
51+
arg[String]("<dataPath>")
52+
.required()
53+
.text("data path of SVM")
54+
.action((x, c) => c.copy(dataPath = x))
55+
}
56+
parser.parse(args, defaultParams) match {
57+
case Some(params) => run(params)
58+
case _ => sys.exit(1)
59+
}
60+
}
61+
62+
def run(params: Params): Unit = {
63+
64+
val conf = new SparkConf().setAppName(s"SVM with $params")
65+
val sc = new SparkContext(conf)
66+
67+
val dataPath = params.dataPath
68+
val numIterations = params.numIterations
69+
val stepSize = params.stepSize
70+
val regParam = params.regParam
71+
72+
val data: RDD[LabeledPoint] = sc.objectFile(dataPath)
73+
74+
// Split data into training (60%) and test (40%).
75+
val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
76+
val training = splits(0).cache()
77+
val test = splits(1)
78+
79+
// Run training algorithm to build the model
80+
val model = SVMWithSGD.train(training, numIterations, stepSize, regParam)
81+
82+
// Clear the default threshold.
83+
model.clearThreshold()
84+
85+
// Compute raw scores on the test set.
86+
val scoreAndLabels = test.map { point =>
87+
val score = model.predict(point.features)
88+
(score, point.label)
89+
}
90+
91+
// Get evaluation metrics.
92+
val metrics = new BinaryClassificationMetrics(scoreAndLabels)
93+
val auROC = metrics.areaUnderROC()
94+
95+
println("Area under ROC = " + auROC)
96+
97+
sc.stop()
98+
}
99+
}

sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/SVMWithSGDExample.scala

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,40 +15,69 @@
1515
* limitations under the License.
1616
*/
1717

18-
// scalastyle:off println
1918
package com.intel.hibench.sparkbench.ml
2019

2120
import org.apache.spark.{SparkConf, SparkContext}
22-
// $example on$
2321
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
2422
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
25-
import org.apache.spark.mllib.util.MLUtils
2623
import org.apache.spark.rdd.RDD
2724
import org.apache.spark.mllib.regression.LabeledPoint
28-
// $example off$
25+
26+
import scopt.OptionParser
2927

3028
object SVMWithSGDExample {
3129

30+
case class Params(
31+
numIterations: Int = 100,
32+
stepSize: Double = 1.0,
33+
regParam: Double = 0.01,
34+
dataPath: String = null
35+
)
36+
3237
def main(args: Array[String]): Unit = {
33-
var inputPath = ""
34-
if (args.length == 1) {
35-
inputPath = args(0)
36-
}
38+
val defaultParams = Params()
39+
40+
val parser = new OptionParser[Params]("SVM") {
41+
head("SVM: an example of SVM for classification.")
42+
opt[Int]("numIterations")
43+
.text(s"numIterations, default: ${defaultParams.numIterations}")
44+
.action((x,c) => c.copy(numIterations = x))
45+
opt[Double]("stepSize")
46+
.text(s"stepSize, default: ${defaultParams.stepSize}")
47+
.action((x,c) => c.copy(stepSize = x))
48+
opt[Double]("regParam")
49+
.text(s"regParam, default: ${defaultParams.regParam}")
50+
.action((x,c) => c.copy(regParam = x))
51+
arg[String]("<dataPath>")
52+
.required()
53+
.text("data path of SVM")
54+
.action((x, c) => c.copy(dataPath = x))
55+
}
56+
parser.parse(args, defaultParams) match {
57+
case Some(params) => run(params)
58+
case _ => sys.exit(1)
59+
}
60+
}
3761

38-
val conf = new SparkConf().setAppName("SVMWithSGDExample")
62+
def run(params: Params): Unit = {
63+
64+
val conf = new SparkConf().setAppName(s"SVM with $params")
3965
val sc = new SparkContext(conf)
4066

41-
// $example on$
42-
val data: RDD[LabeledPoint] = sc.objectFile(inputPath)
67+
val dataPath = params.dataPath
68+
val numIterations = params.numIterations
69+
val stepSize = params.stepSize
70+
val regParam = params.regParam
71+
72+
val data: RDD[LabeledPoint] = sc.objectFile(dataPath)
4373

4474
// Split data into training (60%) and test (40%).
4575
val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
4676
val training = splits(0).cache()
4777
val test = splits(1)
4878

4979
// Run training algorithm to build the model
50-
val numIterations = 100
51-
val model = SVMWithSGD.train(training, numIterations)
80+
val model = SVMWithSGD.train(training, numIterations, stepSize, regParam)
5281

5382
// Clear the default threshold.
5483
model.clearThreshold()
@@ -65,8 +94,6 @@ object SVMWithSGDExample {
6594

6695
println("Area under ROC = " + auROC)
6796

68-
// Save and load model
6997
sc.stop()
7098
}
7199
}
72-
// scalastyle:on println

0 commit comments

Comments
 (0)