@@ -22,32 +22,64 @@ import org.apache.spark.SparkContext
2222import org .apache .spark .mllib .linalg .Matrix
2323import org .apache .spark .mllib .linalg .SingularValueDecomposition
2424import org .apache .spark .mllib .linalg .Vector
25- import org .apache .spark .mllib .linalg .Vectors
2625import org .apache .spark .mllib .linalg .distributed .RowMatrix
2726import org .apache .spark .rdd .RDD
2827
28+ import scopt .OptionParser
29+
2930object SVDExample {
3031
32+ case class Params (
33+ numFeatures : Int = 0 ,
34+ numSingularValues : Int = 0 ,
35+ computeU : Boolean = true ,
36+ maxResultSize : String = " 1g" ,
37+ dataPath : String = null
38+ )
39+
3140 def main (args : Array [String ]): Unit = {
32- var inputPath = " "
33- var numFeatures = 0
34- var maxResultSize = " 1g"
35-
36- if (args.length == 3 ) {
37- inputPath = args(0 )
38- numFeatures = args(1 ).toInt
39- maxResultSize = args(2 )
41+ val defaultParams = Params ()
42+ val parser = new OptionParser [Params ](" SVD" ) {
43+ head(" SVD: an example of SVD for matrix decomposition." )
44+ opt[Int ](" numFeatures" )
45+ .text(s " numFeatures, default: ${defaultParams.numFeatures}" )
46+ .action((x,c) => c.copy(numFeatures = x))
47+ opt[Int ](" numSingularValues" )
48+ .text(s " numSingularValues, default: ${defaultParams.numSingularValues}" )
49+ .action((x,c) => c.copy(numSingularValues = x))
50+ opt[Boolean ](" computeU" )
51+ .text(s " computeU, default: ${defaultParams.computeU}" )
52+ .action((x,c) => c.copy(computeU = x))
53+ opt[String ](" maxResultSize" )
54+ .text(s " maxResultSize, default: ${defaultParams.maxResultSize}" )
55+ .action((x,c) => c.copy(maxResultSize = x))
56+ arg[String ](" <dataPath>" )
57+ .required()
58+ .text(" data path of SVD" )
59+ .action((x,c) => c.copy(dataPath = x))
4060 }
61+ parser.parse(args, defaultParams) match {
62+ case Some (params) => run(params)
63+ case _ => sys.exit(1 )
64+ }
65+ }
66+
67+ def run (params : Params ): Unit = {
4168
4269 val conf = new SparkConf ()
43- .setAppName(" SVDExample " )
44- .set(" spark.driver.maxResultSize" ,maxResultSize)
70+ .setAppName(s " SVD with $params " )
71+ .set(" spark.driver.maxResultSize" , params. maxResultSize)
4572 val sc = new SparkContext (conf)
4673
47- val dataRDD : RDD [Vector ] = sc.objectFile(inputPath)
48- val mat : RowMatrix = new RowMatrix (dataRDD)
74+ val dataPath = params.dataPath
75+ val numFeatures = params.numFeatures
76+ val numSingularValues = params.numSingularValues
77+ val computeU = params.computeU
78+
79+ val data : RDD [Vector ] = sc.objectFile(dataPath)
80+ val mat : RowMatrix = new RowMatrix (data)
4981
50- val svd : SingularValueDecomposition [RowMatrix , Matrix ] = mat.computeSVD(numFeatures - 1 , computeU = true )
82+ val svd : SingularValueDecomposition [RowMatrix , Matrix ] = mat.computeSVD(numSingularValues , computeU)
5183 val U : RowMatrix = svd.U // The U factor is a RowMatrix.
5284 val s : Vector = svd.s // The singular values are stored in a local dense vector.
5385 val V : Matrix = svd.V // The V factor is a local dense matrix.
0 commit comments