Skip to content
This repository was archived by the owner on Dec 15, 2025. It is now read-only.

Commit 25b298c

Browse files
committed
change the form of arguments of SVD to OptionParser
1 parent af6bd6c commit 25b298c

4 files changed

Lines changed: 76 additions & 33 deletions

File tree

bin/functions/hibench_prop_env_mapping.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@
132132
# For SVD
133133
NUM_EXAMPLES_SVD="hibench.svd.examples",
134134
NUM_FEATURES_SVD="hibench.svd.features",
135+
NUM_SINGULAR_VALUES_SVD="hibench.svd.singularvalues",
136+
COMPUTEU_SVD="hibench.svd.computeU",
135137
MAXRESULTSIZE_SVD="hibench.svd.maxresultsize",
136138
# For Linear Regression
137139
NUM_EXAMPLES_LINEAR="hibench.linear.examples",

bin/workloads/ml/svd/spark/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ rmr_hdfs $OUTPUT_HDFS || true
2626

2727
SIZE=`dir_size $INPUT_HDFS`
2828
START_TIME=`timestamp`
29-
run_spark_job com.intel.hibench.sparkbench.ml.SVDExample $INPUT_HDFS $NUM_FEATURES_SVD $MAXRESULTSIZE_SVD
29+
run_spark_job com.intel.hibench.sparkbench.ml.SVDExample --numFeatures $NUM_FEATURES_SVD --numSingularValues $NUM_SINGULAR_VALUES_SVD --computeU $COMPUTEU_SVD --maxResultSize $MAXRESULTSIZE_SVD $INPUT_HDFS
3030
END_TIME=`timestamp`
3131

3232
gen_report ${START_TIME} ${END_TIME} ${SIZE}

conf/workloads/ml/svd.conf

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,36 @@
1-
hibench.svd.tiny.examples 100
2-
hibench.svd.tiny.features 1000
3-
hibench.svd.tiny.maxresultsize "1g"
4-
hibench.svd.small.examples 1000
5-
hibench.svd.small.features 2000
6-
hibench.svd.small.maxresultsize "1g"
7-
hibench.svd.large.examples 2000
8-
hibench.svd.large.features 4000
9-
hibench.svd.large.maxresultsize "1g"
10-
hibench.svd.huge.examples 5000
11-
hibench.svd.huge.features 5000
12-
hibench.svd.huge.maxresultsize "4g"
13-
hibench.svd.gigantic.examples 6000
14-
hibench.svd.gigantic.features 6000
15-
hibench.svd.gigantic.maxresultsize "4g"
16-
hibench.svd.bigdata.examples 7000
17-
hibench.svd.bigdata.features 7000
18-
hibench.svd.bigdata.maxresultsize "4g"
1+
hibench.svd.tiny.examples 100
2+
hibench.svd.tiny.features 1000
3+
hibench.svd.tiny.singularvalues 800
4+
hibench.svd.tiny.maxresultsize "1g"
5+
hibench.svd.small.examples 1000
6+
hibench.svd.small.features 2000
7+
hibench.svd.small.singularvalues 1500
8+
hibench.svd.small.maxresultsize "1g"
9+
hibench.svd.large.examples 2000
10+
hibench.svd.large.features 4000
11+
hibench.svd.large.singularvalues 3000
12+
hibench.svd.large.maxresultsize "1g"
13+
hibench.svd.huge.examples 5000
14+
hibench.svd.huge.features 5000
15+
hibench.svd.huge.singularvalues 4000
16+
hibench.svd.huge.maxresultsize "4g"
17+
hibench.svd.gigantic.examples 6000
18+
hibench.svd.gigantic.features 6000
19+
hibench.svd.gigantic.singularvalues 5000
20+
hibench.svd.gigantic.maxresultsize "4g"
21+
hibench.svd.bigdata.examples 7000
22+
hibench.svd.bigdata.features 7000
23+
hibench.svd.bigdata.singularvalues 6000
24+
hibench.svd.bigdata.maxresultsize "4g"
1925

2026

2127
hibench.svd.examples ${hibench.svd.${hibench.scale.profile}.examples}
2228
hibench.svd.features ${hibench.svd.${hibench.scale.profile}.features}
29+
hibench.svd.singularvalues ${hibench.svd.${hibench.scale.profile}.singularvalues}
2330
hibench.svd.maxresultsize ${hibench.svd.${hibench.scale.profile}.maxresultsize}
2431
hibench.svd.partitions ${hibench.default.map.parallelism}
2532

33+
hibench.svd.computeU true
34+
2635
hibench.workload.input ${hibench.hdfs.data.dir}/SVD/Input
2736
hibench.workload.output ${hibench.hdfs.data.dir}/SVD/Output

sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/SVDExample.scala

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,64 @@ import org.apache.spark.SparkContext
2222
import org.apache.spark.mllib.linalg.Matrix
2323
import org.apache.spark.mllib.linalg.SingularValueDecomposition
2424
import org.apache.spark.mllib.linalg.Vector
25-
import org.apache.spark.mllib.linalg.Vectors
2625
import org.apache.spark.mllib.linalg.distributed.RowMatrix
2726
import org.apache.spark.rdd.RDD
2827

28+
import scopt.OptionParser
29+
2930
object SVDExample {
3031

32+
case class Params(
33+
numFeatures: Int = 0,
34+
numSingularValues: Int = 0,
35+
computeU: Boolean = true,
36+
maxResultSize: String = "1g",
37+
dataPath: String = null
38+
)
39+
3140
def main(args: Array[String]): Unit = {
32-
var inputPath = ""
33-
var numFeatures = 0
34-
var maxResultSize = "1g"
35-
36-
if (args.length == 3) {
37-
inputPath = args(0)
38-
numFeatures = args(1).toInt
39-
maxResultSize = args(2)
41+
val defaultParams = Params()
42+
val parser = new OptionParser[Params]("SVD") {
43+
head("SVD: an example of SVD for matrix decomposition.")
44+
opt[Int]("numFeatures")
45+
.text(s"numFeatures, default: ${defaultParams.numFeatures}")
46+
.action((x,c) => c.copy(numFeatures = x))
47+
opt[Int]("numSingularValues")
48+
.text(s"numSingularValues, default: ${defaultParams.numSingularValues}")
49+
.action((x,c) => c.copy(numSingularValues = x))
50+
opt[Boolean]("computeU")
51+
.text(s"computeU, default: ${defaultParams.computeU}")
52+
.action((x,c) => c.copy(computeU = x))
53+
opt[String]("maxResultSize")
54+
.text(s"maxResultSize, default: ${defaultParams.maxResultSize}")
55+
.action((x,c) => c.copy(maxResultSize = x))
56+
arg[String]("<dataPath>")
57+
.required()
58+
.text("data path of SVD")
59+
.action((x,c) => c.copy(dataPath = x))
4060
}
61+
parser.parse(args, defaultParams) match {
62+
case Some(params) => run(params)
63+
case _ => sys.exit(1)
64+
}
65+
}
66+
67+
def run(params: Params): Unit = {
4168

4269
val conf = new SparkConf()
43-
.setAppName("SVDExample")
44-
.set("spark.driver.maxResultSize",maxResultSize)
70+
.setAppName(s"SVD with $params")
71+
.set("spark.driver.maxResultSize", params.maxResultSize)
4572
val sc = new SparkContext(conf)
4673

47-
val dataRDD: RDD[Vector] = sc.objectFile(inputPath)
48-
val mat: RowMatrix = new RowMatrix(dataRDD)
74+
val dataPath = params.dataPath
75+
val numFeatures = params.numFeatures
76+
val numSingularValues = params.numSingularValues
77+
val computeU = params.computeU
78+
79+
val data: RDD[Vector] = sc.objectFile(dataPath)
80+
val mat: RowMatrix = new RowMatrix(data)
4981

50-
val svd: SingularValueDecomposition[RowMatrix, Matrix] = mat.computeSVD(numFeatures-1, computeU = true)
82+
val svd: SingularValueDecomposition[RowMatrix, Matrix] = mat.computeSVD(numSingularValues, computeU)
5183
val U: RowMatrix = svd.U // The U factor is a RowMatrix.
5284
val s: Vector = svd.s // The singular values are stored in a local dense vector.
5385
val V: Matrix = svd.V // The V factor is a local dense matrix.

0 commit comments

Comments
 (0)