[1/2] spark git commit: [SPARK-11549][DOCS] Replace example code in mllib-evaluation-metrics.md using include_example

meng Fri, 20 Nov 2015 15:19:22 -0800

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 0665fb5ea -> 1dde97176



http://git-wip-us.apache.org/repos/asf/spark/blob/1dde9717/examples/src/main/scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala
----------------------------------------------------------------------
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala
 
b/examples/src/main/scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala
new file mode 100644
index 0000000..4503c15
--- /dev/null
+++ 
b/examples/src/main/scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+// $example on$
+import org.apache.spark.mllib.evaluation.MultilabelMetrics
+import org.apache.spark.rdd.RDD
+// $example off$
+import org.apache.spark.{SparkContext, SparkConf}
+
+object MultiLabelMetricsExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("MultiLabelMetricsExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    val scoreAndLabels: RDD[(Array[Double], Array[Double])] = sc.parallelize(
+      Seq((Array(0.0, 1.0), Array(0.0, 2.0)),
+        (Array(0.0, 2.0), Array(0.0, 1.0)),
+        (Array.empty[Double], Array(0.0)),
+        (Array(2.0), Array(2.0)),
+        (Array(2.0, 0.0), Array(2.0, 0.0)),
+        (Array(0.0, 1.0, 2.0), Array(0.0, 1.0)),
+        (Array(1.0), Array(1.0, 2.0))), 2)
+
+    // Instantiate metrics object
+    val metrics = new MultilabelMetrics(scoreAndLabels)
+
+    // Summary stats
+    println(s"Recall = ${metrics.recall}")
+    println(s"Precision = ${metrics.precision}")
+    println(s"F1 measure = ${metrics.f1Measure}")
+    println(s"Accuracy = ${metrics.accuracy}")
+
+    // Individual label stats
+    metrics.labels.foreach(label =>
+      println(s"Class $label precision = ${metrics.precision(label)}"))
+    metrics.labels.foreach(label => println(s"Class $label recall = 
${metrics.recall(label)}"))
+    metrics.labels.foreach(label => println(s"Class $label F1-score = 
${metrics.f1Measure(label)}"))
+
+    // Micro stats
+    println(s"Micro recall = ${metrics.microRecall}")
+    println(s"Micro precision = ${metrics.microPrecision}")
+    println(s"Micro F1 measure = ${metrics.microF1Measure}")
+
+    // Hamming loss
+    println(s"Hamming loss = ${metrics.hammingLoss}")
+
+    // Subset accuracy
+    println(s"Subset accuracy = ${metrics.subsetAccuracy}")
+    // $example off$
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/1dde9717/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala
----------------------------------------------------------------------
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala
 
b/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala
new file mode 100644
index 0000000..0904449
--- /dev/null
+++ 
b/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+// $example on$
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+import org.apache.spark.{SparkContext, SparkConf}
+
+object MulticlassMetricsExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("MulticlassMetricsExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load training data in LIBSVM format
+    val data = MLUtils.loadLibSVMFile(sc, 
"data/mllib/sample_multiclass_classification_data.txt")
+
+    // Split data into training (60%) and test (40%)
+    val Array(training, test) = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    training.cache()
+
+    // Run training algorithm to build the model
+    val model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(3)
+      .run(training)
+
+    // Compute raw scores on the test set
+    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
+      val prediction = model.predict(features)
+      (prediction, label)
+    }
+
+    // Instantiate metrics object
+    val metrics = new MulticlassMetrics(predictionAndLabels)
+
+    // Confusion matrix
+    println("Confusion matrix:")
+    println(metrics.confusionMatrix)
+
+    // Overall Statistics
+    val precision = metrics.precision
+    val recall = metrics.recall // same as true positive rate
+    val f1Score = metrics.fMeasure
+    println("Summary Statistics")
+    println(s"Precision = $precision")
+    println(s"Recall = $recall")
+    println(s"F1 Score = $f1Score")
+
+    // Precision by label
+    val labels = metrics.labels
+    labels.foreach { l =>
+      println(s"Precision($l) = " + metrics.precision(l))
+    }
+
+    // Recall by label
+    labels.foreach { l =>
+      println(s"Recall($l) = " + metrics.recall(l))
+    }
+
+    // False positive rate by label
+    labels.foreach { l =>
+      println(s"FPR($l) = " + metrics.falsePositiveRate(l))
+    }
+
+    // F-measure by label
+    labels.foreach { l =>
+      println(s"F1-Score($l) = " + metrics.fMeasure(l))
+    }
+
+    // Weighted stats
+    println(s"Weighted precision: ${metrics.weightedPrecision}")
+    println(s"Weighted recall: ${metrics.weightedRecall}")
+    println(s"Weighted F1 score: ${metrics.weightedFMeasure}")
+    println(s"Weighted false positive rate: 
${metrics.weightedFalsePositiveRate}")
+    // $example off$
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/1dde9717/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
----------------------------------------------------------------------
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
 
b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
new file mode 100644
index 0000000..cffa03d
--- /dev/null
+++ 
b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+// $example on$
+import org.apache.spark.mllib.evaluation.{RegressionMetrics, RankingMetrics}
+import org.apache.spark.mllib.recommendation.{ALS, Rating}
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+
+object RankingMetricsExample {
+  def main(args: Array[String]) {
+    val conf = new SparkConf().setAppName("RankingMetricsExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    import sqlContext.implicits._
+    // $example on$
+    // Read in the ratings data
+    val ratings = sc.textFile("data/mllib/sample_movielens_data.txt").map { 
line =>
+      val fields = line.split("::")
+      Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble - 2.5)
+    }.cache()
+
+    // Map ratings to 1 or 0, 1 indicating a movie that should be recommended
+    val binarizedRatings = ratings.map(r => Rating(r.user, r.product,
+      if (r.rating > 0) 1.0 else 0.0)).cache()
+
+    // Summarize ratings
+    val numRatings = ratings.count()
+    val numUsers = ratings.map(_.user).distinct().count()
+    val numMovies = ratings.map(_.product).distinct().count()
+    println(s"Got $numRatings ratings from $numUsers users on $numMovies 
movies.")
+
+    // Build the model
+    val numIterations = 10
+    val rank = 10
+    val lambda = 0.01
+    val model = ALS.train(ratings, rank, numIterations, lambda)
+
+    // Define a function to scale ratings from 0 to 1
+    def scaledRating(r: Rating): Rating = {
+      val scaledRating = math.max(math.min(r.rating, 1.0), 0.0)
+      Rating(r.user, r.product, scaledRating)
+    }
+
+    // Get sorted top ten predictions for each user and then scale from [0, 1]
+    val userRecommended = model.recommendProductsForUsers(10).map { case 
(user, recs) =>
+      (user, recs.map(scaledRating))
+    }
+
+    // Assume that any movie a user rated 3 or higher (which maps to a 1) is a 
relevant document
+    // Compare with top ten most relevant documents
+    val userMovies = binarizedRatings.groupBy(_.user)
+    val relevantDocuments = userMovies.join(userRecommended).map { case (user, 
(actual,
+    predictions)) =>
+      (predictions.map(_.product), actual.filter(_.rating > 
0.0).map(_.product).toArray)
+    }
+
+    // Instantiate metrics object
+    val metrics = new RankingMetrics(relevantDocuments)
+
+    // Precision at K
+    Array(1, 3, 5).foreach { k =>
+      println(s"Precision at $k = ${metrics.precisionAt(k)}")
+    }
+
+    // Mean average precision
+    println(s"Mean average precision = ${metrics.meanAveragePrecision}")
+
+    // Normalized discounted cumulative gain
+    Array(1, 3, 5).foreach { k =>
+      println(s"NDCG at $k = ${metrics.ndcgAt(k)}")
+    }
+
+    // Get predictions for each data point
+    val allPredictions = model.predict(ratings.map(r => (r.user, 
r.product))).map(r => ((r.user,
+      r.product), r.rating))
+    val allRatings = ratings.map(r => ((r.user, r.product), r.rating))
+    val predictionsAndLabels = allPredictions.join(allRatings).map { case 
((user, product),
+    (predicted, actual)) =>
+      (predicted, actual)
+    }
+
+    // Get the RMSE using regression metrics
+    val regressionMetrics = new RegressionMetrics(predictionsAndLabels)
+    println(s"RMSE = ${regressionMetrics.rootMeanSquaredError}")
+
+    // R-squared
+    println(s"R-squared = ${regressionMetrics.r2}")
+    // $example off$
+  }
+}
+// scalastyle:on println

http://git-wip-us.apache.org/repos/asf/spark/blob/1dde9717/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
----------------------------------------------------------------------
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
 
b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
new file mode 100644
index 0000000..47d4453
--- /dev/null
+++ 
b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// scalastyle:off println
+
+package org.apache.spark.examples.mllib
+
+// $example on$
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD
+import org.apache.spark.mllib.evaluation.RegressionMetrics
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkConf, SparkContext}
+
+object RegressionMetricsExample {
+  def main(args: Array[String]) : Unit = {
+    val conf = new SparkConf().setAppName("RegressionMetricsExample")
+    val sc = new SparkContext(conf)
+    val sqlContext = new SQLContext(sc)
+    // $example on$
+    // Load the data
+    val data = MLUtils.loadLibSVMFile(sc, 
"data/mllib/sample_linear_regression_data.txt").cache()
+
+    // Build the model
+    val numIterations = 100
+    val model = LinearRegressionWithSGD.train(data, numIterations)
+
+    // Get predictions
+    val valuesAndPreds = data.map{ point =>
+      val prediction = model.predict(point.features)
+      (prediction, point.label)
+    }
+
+    // Instantiate metrics object
+    val metrics = new RegressionMetrics(valuesAndPreds)
+
+    // Squared error
+    println(s"MSE = ${metrics.meanSquaredError}")
+    println(s"RMSE = ${metrics.rootMeanSquaredError}")
+
+    // R-squared
+    println(s"R-squared = ${metrics.r2}")
+
+    // Mean absolute error
+    println(s"MAE = ${metrics.meanAbsoluteError}")
+
+    // Explained variance
+    println(s"Explained variance = ${metrics.explainedVariance}")
+    // $example off$
+  }
+}
+// scalastyle:on println
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: [SPARK-11549][DOCS] Replace example code in mllib-evaluation-metrics.md using include_example

Reply via email to