This repository was archived by the owner on Dec 15, 2025. It is now read-only.
File tree Expand file tree Collapse file tree
sparkbench/ml/src/main/scala/com/intel/sparkbench/ml Expand file tree Collapse file tree Original file line number Diff line number Diff line change 133133 NUM_TOPICS_LDA = "hibench.lda.num_of_topics" ,
134134 DOC_LEN_MIN_LDA = "hibench.lda.doc_len_min" ,
135135 DOC_LEN_MAX_LDA = "hibench.lda.doc_len_max" ,
136+ NUM_ITERATIONS_LDA = "hibench.lda.num_iterations" ,
137+ OPTIMIZER_LDA = "hibench.lda.optimizer" ,
136138 MAXRESULTSIZE_LDA = "hibench.lda.maxresultsize" ,
137139 # For Pagerank
138140 PAGERANK_BASE_HDFS = "hibench.pagerank.base.hdfs" ,
Original file line number Diff line number Diff line change @@ -26,7 +26,7 @@ rmr_hdfs $OUTPUT_HDFS || true
2626
2727SIZE=` dir_size $INPUT_HDFS `
2828START_TIME=` timestamp`
29- run_spark_job com.intel.hibench.sparkbench.ml.LDAExample $INPUT_HDFS $OUTPUT_HDFS $NUM_TOPICS_LDA $MAXRESULTSIZE_LDA
29+ run_spark_job com.intel.hibench.sparkbench.ml.LDAExample $INPUT_HDFS $OUTPUT_HDFS $NUM_TOPICS_LDA $NUM_ITERATIONS_LDA $OPTIMIZER_LDA $ MAXRESULTSIZE_LDA
3030END_TIME=` timestamp`
3131
3232gen_report ${START_TIME} ${END_TIME} ${SIZE}
Original file line number Diff line number Diff line change @@ -44,5 +44,8 @@ hibench.lda.doc_len_max ${hibench.lda.${hibench.scale.
4444hibench.lda.maxresultsize ${hibench.lda.${hibench.scale.profile}.maxresultsize}
4545hibench.lda.partitions ${hibench.default.map.parallelism}
4646
47+ hibench.lda.optimizer "online"
48+ hibench.lda.num_iterations 10
49+
4750hibench.workload.input ${hibench.hdfs.data.dir}/LDA/Input
4851hibench.workload.output ${hibench.hdfs.data.dir}/LDA/Output
Original file line number Diff line number Diff line change @@ -29,13 +29,17 @@ object LDAExample {
2929 var inputPath = " "
3030 var outputPath = " "
3131 var numTopics : Int = 10
32+ var maxIterations : Int = 10
33+ var optimizer = " online"
3234 var maxResultSize = " 1g"
3335
34- if (args.length == 4 ) {
36+ if (args.length == 6 ) {
3537 inputPath = args(0 )
3638 outputPath = args(1 )
3739 numTopics = args(2 ).toInt
38- maxResultSize = args(3 )
40+ maxIterations = args(3 ).toInt
41+ optimizer = args(4 )
42+ maxResultSize = args(5 )
3943 } else {
4044 System .err.println(
4145 s " Usage: $LDAExample <INPUT_PATH> <OUTPUT_PATH> <NUM_TOPICS> <MAX_RESULT_SIZE> "
@@ -51,7 +55,7 @@ object LDAExample {
5155 val corpus : RDD [(Long , Vector )] = sc.objectFile(inputPath)
5256
5357 // Cluster the documents into numTopics topics using LDA
54- val ldaModel = new LDA ().setK(numTopics).setOptimizer(" online " ).run(corpus)
58+ val ldaModel = new LDA ().setK(numTopics).setMaxIterations(maxIterations). setOptimizer(optimizer ).run(corpus)
5559
5660 // Save and load model.
5761 ldaModel.save(sc, outputPath)
You can’t perform that action at this time.
0 commit comments