Skip to content

Commit

Permalink
Merge pull request #522 from jtengyp/svm
Browse files Browse the repository at this point in the history
change the form of arguments of SVM to OptionParser
  • Loading branch information
Meng, Peng authored Nov 1, 2017
2 parents 83ac369 + 489654b commit dc77558
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 30 deletions.
3 changes: 3 additions & 0 deletions bin/functions/hibench_prop_env_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@
# For SVM
NUM_EXAMPLES_SVM="hibench.svm.examples",
NUM_FEATURES_SVM="hibench.svm.examples",
NUM_ITERATIONS_SVM="hibench.svm.numIterations",
STEPSIZE_SVM="hibench.svm.stepSize",
REGPARAM_SVM="hibench.svm.regParam",
# For ALS
NUM_USERS_ALS="hibench.als.users",
NUM_PRODUCTS_ALS="hibench.als.products",
Expand Down
2 changes: 1 addition & 1 deletion bin/workloads/ml/svm/spark/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ rmr_hdfs $OUTPUT_HDFS || true

SIZE=`dir_size $INPUT_HDFS`
START_TIME=`timestamp`
run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample ${INPUT_HDFS}
run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample --numIterations $NUM_ITERATIONS_SVM --stepSize $STEPSIZE_SVM --regParam $REGPARAM_SVM $INPUT_HDFS
END_TIME=`timestamp`

gen_report ${START_TIME} ${END_TIME} ${SIZE}
Expand Down
32 changes: 18 additions & 14 deletions conf/workloads/ml/svm.conf
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
hibench.svm.tiny.examples 1000
hibench.svm.tiny.features 1000
hibench.svm.small.examples 10000
hibench.svm.small.features 10000
hibench.svm.large.examples 50000
hibench.svm.large.features 100000
hibench.svm.huge.examples 120000
hibench.svm.huge.features 300000
hibench.svm.gigantic.examples 140000
hibench.svm.gigantic.features 300000
hibench.svm.bigdata.examples 150000
hibench.svm.bigdata.features 300000
hibench.svm.tiny.examples 1000
hibench.svm.tiny.features 1000
hibench.svm.small.examples 10000
hibench.svm.small.features 10000
hibench.svm.large.examples 50000
hibench.svm.large.features 100000
hibench.svm.huge.examples 120000
hibench.svm.huge.features 300000
hibench.svm.gigantic.examples 140000
hibench.svm.gigantic.features 300000
hibench.svm.bigdata.examples 150000
hibench.svm.bigdata.features 300000


hibench.svm.examples ${hibench.svm.${hibench.scale.profile}.examples}
hibench.svm.features ${hibench.svm.${hibench.scale.profile}.features}
hibench.svm.partitions ${hibench.default.map.parallelism}

hibench.workload.input ${hibench.hdfs.data.dir}/SVM/Input
hibench.workload.output ${hibench.hdfs.data.dir}/SVM/Output
hibench.svm.numIterations 100
hibench.svm.stepSize 1.0
hibench.svm.regParam 0.01

hibench.workload.input ${hibench.hdfs.data.dir}/SVM/Input
hibench.workload.output ${hibench.hdfs.data.dir}/SVM/Output
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,69 @@
* limitations under the License.
*/

// scalastyle:off println
package com.intel.hibench.sparkbench.ml

import org.apache.spark.{SparkConf, SparkContext}
// $example on$
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.regression.LabeledPoint
// $example off$

import scopt.OptionParser

object SVMWithSGDExample {

case class Params(
numIterations: Int = 100,
stepSize: Double = 1.0,
regParam: Double = 0.01,
dataPath: String = null
)

def main(args: Array[String]): Unit = {
var inputPath = ""
if (args.length == 1) {
inputPath = args(0)
}
val defaultParams = Params()

val parser = new OptionParser[Params]("SVM") {
head("SVM: an example of SVM for classification.")
opt[Int]("numIterations")
.text(s"numIterations, default: ${defaultParams.numIterations}")
.action((x,c) => c.copy(numIterations = x))
opt[Double]("stepSize")
.text(s"stepSize, default: ${defaultParams.stepSize}")
.action((x,c) => c.copy(stepSize = x))
opt[Double]("regParam")
.text(s"regParam, default: ${defaultParams.regParam}")
.action((x,c) => c.copy(regParam = x))
arg[String]("<dataPath>")
.required()
.text("data path of SVM")
.action((x, c) => c.copy(dataPath = x))
}
parser.parse(args, defaultParams) match {
case Some(params) => run(params)
case _ => sys.exit(1)
}
}

val conf = new SparkConf().setAppName("SVMWithSGDExample")
def run(params: Params): Unit = {

val conf = new SparkConf().setAppName(s"SVM with $params")
val sc = new SparkContext(conf)

// $example on$
val data: RDD[LabeledPoint] = sc.objectFile(inputPath)
val dataPath = params.dataPath
val numIterations = params.numIterations
val stepSize = params.stepSize
val regParam = params.regParam

val data: RDD[LabeledPoint] = sc.objectFile(dataPath)

// Split data into training (60%) and test (40%).
val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
val training = splits(0).cache()
val test = splits(1)

// Run training algorithm to build the model
val numIterations = 100
val model = SVMWithSGD.train(training, numIterations)
val model = SVMWithSGD.train(training, numIterations, stepSize, regParam)

// Clear the default threshold.
model.clearThreshold()
Expand All @@ -65,8 +94,6 @@ object SVMWithSGDExample {

println("Area under ROC = " + auROC)

// Save and load model
sc.stop()
}
}
// scalastyle:on println

0 comments on commit dc77558

Please sign in to comment.