spark git commit: [SPARK-8922] [DOCUMENTATION, MLLIB] Add @since tags to mllib.evaluation

meng Wed, 12 Aug 2015 21:53:50 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 8f055e595 -> 690284037



[SPARK-8922] [DOCUMENTATION, MLLIB] Add @since tags to mllib.evaluation

Author: shikai.tang <tar.sk...@gmail.com>

Closes #7429 from mosessky/master.

(cherry picked from commit df543892122342b97e5137b266959ba97589b3ef)
Signed-off-by: Xiangrui Meng <m...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69028403
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69028403
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69028403

Branch: refs/heads/branch-1.5
Commit: 690284037ecd880d48d5e835b150a2f31feb7c73
Parents: 8f055e5
Author: shikai.tang <tar.sk...@gmail.com>
Authored: Wed Aug 12 21:53:15 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Wed Aug 12 21:53:24 2015 -0700

----------------------------------------------------------------------
 .../BinaryClassificationMetrics.scala           | 32 +++++++++++++++++---
 .../mllib/evaluation/MulticlassMetrics.scala    |  9 ++++++
 .../mllib/evaluation/MultilabelMetrics.scala    |  4 +++
 .../spark/mllib/evaluation/RankingMetrics.scala |  4 +++
 .../mllib/evaluation/RegressionMetrics.scala    |  6 ++++
 5 files changed, 50 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/69028403/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index c1d1a22..486741e 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.DataFrame
  *                of bins may not exactly equal numBins. The last bin in each 
partition may
  *                be smaller as a result, meaning there may be an extra sample 
at
  *                partition boundaries.
+ * @since 1.3.0
  */
 @Experimental
 class BinaryClassificationMetrics(
@@ -51,6 +52,7 @@ class BinaryClassificationMetrics(
 
   /**
    * Defaults `numBins` to 0.
+   * @since 1.0.0
    */
   def this(scoreAndLabels: RDD[(Double, Double)]) = this(scoreAndLabels, 0)
 
@@ -61,12 +63,18 @@ class BinaryClassificationMetrics(
   private[mllib] def this(scoreAndLabels: DataFrame) =
     this(scoreAndLabels.map(r => (r.getDouble(0), r.getDouble(1))))
 
-  /** Unpersist intermediate RDDs used in the computation. */
+  /**
+   * Unpersist intermediate RDDs used in the computation.
+   * @since 1.0.0
+   */
   def unpersist() {
     cumulativeCounts.unpersist()
   }
 
-  /** Returns thresholds in descending order. */
+  /**
+   * Returns thresholds in descending order.
+   * @since 1.0.0
+   */
   def thresholds(): RDD[Double] = cumulativeCounts.map(_._1)
 
   /**
@@ -74,6 +82,7 @@ class BinaryClassificationMetrics(
    * which is an RDD of (false positive rate, true positive rate)
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
    * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
+   * @since 1.0.0
    */
   def roc(): RDD[(Double, Double)] = {
     val rocCurve = createCurve(FalsePositiveRate, Recall)
@@ -85,6 +94,7 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the receiver operating characteristic (ROC) curve.
+   * @since 1.0.0
    */
   def areaUnderROC(): Double = AreaUnderCurve.of(roc())
 
@@ -92,6 +102,7 @@ class BinaryClassificationMetrics(
    * Returns the precision-recall curve, which is an RDD of (recall, 
precision),
    * NOT (precision, recall), with (0.0, 1.0) prepended to it.
    * @see http://en.wikipedia.org/wiki/Precision_and_recall
+   * @since 1.0.0
    */
   def pr(): RDD[(Double, Double)] = {
     val prCurve = createCurve(Recall, Precision)
@@ -102,6 +113,7 @@ class BinaryClassificationMetrics(
 
   /**
    * Computes the area under the precision-recall curve.
+   * @since 1.0.0
    */
   def areaUnderPR(): Double = AreaUnderCurve.of(pr())
 
@@ -110,16 +122,26 @@ class BinaryClassificationMetrics(
    * @param beta the beta factor in F-Measure computation.
    * @return an RDD of (threshold, F-Measure) pairs.
    * @see http://en.wikipedia.org/wiki/F1_score
+   * @since 1.0.0
    */
   def fMeasureByThreshold(beta: Double): RDD[(Double, Double)] = 
createCurve(FMeasure(beta))
 
-  /** Returns the (threshold, F-Measure) curve with beta = 1.0. */
+  /**
+   * Returns the (threshold, F-Measure) curve with beta = 1.0.
+   * @since 1.0.0
+   */
   def fMeasureByThreshold(): RDD[(Double, Double)] = fMeasureByThreshold(1.0)
 
-  /** Returns the (threshold, precision) curve. */
+  /**
+   * Returns the (threshold, precision) curve.
+   * @since 1.0.0
+   */
   def precisionByThreshold(): RDD[(Double, Double)] = createCurve(Precision)
 
-  /** Returns the (threshold, recall) curve. */
+  /**
+   * Returns the (threshold, recall) curve.
+   * @since 1.0.0
+   */
   def recallByThreshold(): RDD[(Double, Double)] = createCurve(Recall)
 
   private lazy val (

http://git-wip-us.apache.org/repos/asf/spark/blob/69028403/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index 4628dc5..dddfa3e 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multiclass classification.
  *
  * @param predictionAndLabels an RDD of (prediction, label) pairs.
+ * @since 1.1.0
  */
 @Experimental
 class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
@@ -64,6 +65,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, 
Double)]) {
    * predicted classes are in columns,
    * they are ordered by class label ascending,
    * as in "labels"
+   * @since 1.1.0
    */
   def confusionMatrix: Matrix = {
     val n = labels.size
@@ -83,12 +85,14 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, 
Double)]) {
   /**
    * Returns true positive rate for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def truePositiveRate(label: Double): Double = recall(label)
 
   /**
    * Returns false positive rate for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def falsePositiveRate(label: Double): Double = {
     val fp = fpByClass.getOrElse(label, 0)
@@ -98,6 +102,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, 
Double)]) {
   /**
    * Returns precision for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def precision(label: Double): Double = {
     val tp = tpByClass(label)
@@ -108,6 +113,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, 
Double)]) {
   /**
    * Returns recall for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def recall(label: Double): Double = tpByClass(label).toDouble / 
labelCountByClass(label)
 
@@ -115,6 +121,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, 
Double)]) {
    * Returns f-measure for a given label (category)
    * @param label the label.
    * @param beta the beta parameter.
+   * @since 1.1.0
    */
   def fMeasure(label: Double, beta: Double): Double = {
     val p = precision(label)
@@ -126,6 +133,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, 
Double)]) {
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
+   * @since 1.1.0
    */
   def fMeasure(label: Double): Double = fMeasure(label, 1.0)
 
@@ -179,6 +187,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, 
Double)]) {
   /**
    * Returns weighted averaged f-measure
    * @param beta the beta parameter.
+   * @since 1.1.0
    */
   def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case 
(category, count) =>
     fMeasure(category, beta) * count.toDouble / labelCount

http://git-wip-us.apache.org/repos/asf/spark/blob/69028403/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index bf6eb1d..77cb1e0 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for multilabel classification.
  * @param predictionAndLabels an RDD of (predictions, labels) pairs,
  * both are non-null Arrays, each with unique elements.
+ * @since 1.2.0
  */
 class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], 
Array[Double])]) {
 
@@ -103,6 +104,7 @@ class MultilabelMetrics(predictionAndLabels: 
RDD[(Array[Double], Array[Double])]
   /**
    * Returns precision for a given label (category)
    * @param label the label.
+   * @since 1.2.0
    */
   def precision(label: Double): Double = {
     val tp = tpPerClass(label)
@@ -113,6 +115,7 @@ class MultilabelMetrics(predictionAndLabels: 
RDD[(Array[Double], Array[Double])]
   /**
    * Returns recall for a given label (category)
    * @param label the label.
+   * @since 1.2.0
    */
   def recall(label: Double): Double = {
     val tp = tpPerClass(label)
@@ -123,6 +126,7 @@ class MultilabelMetrics(predictionAndLabels: 
RDD[(Array[Double], Array[Double])]
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
+   * @since 1.2.0
    */
   def f1Measure(label: Double): Double = {
     val p = precision(label)

http://git-wip-us.apache.org/repos/asf/spark/blob/69028403/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 5b5a2a1..063fbed 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -34,6 +34,7 @@ import org.apache.spark.rdd.RDD
  * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] 
instance.
  *
  * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) 
pairs.
+ * @since 1.2.0
  */
 @Experimental
 class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], 
Array[T])])
@@ -55,6 +56,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: 
RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated precision, must be positive
    * @return the average precision at the first k ranking positions
+   * @since 1.2.0
    */
   def precisionAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
@@ -124,6 +126,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: 
RDD[(Array[T], Array[T])]
    *
    * @param k the position to compute the truncated ndcg, must be positive
    * @return the average ndcg at the first k ranking positions
+   * @since 1.2.0
    */
   def ndcgAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
@@ -162,6 +165,7 @@ object RankingMetrics {
   /**
    * Creates a [[RankingMetrics]] instance (for Java users).
    * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth 
set) pairs
+   * @since 1.4.0
    */
   def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): 
RankingMetrics[E] = {
     implicit val tag = JavaSparkContext.fakeClassTag[E]

http://git-wip-us.apache.org/repos/asf/spark/blob/69028403/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 408847a..54dfd8c 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for regression.
  *
  * @param predictionAndObservations an RDD of (prediction, observation) pairs.
+ * @since 1.2.0
  */
 @Experimental
 class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) 
extends Logging {
@@ -66,6 +67,7 @@ class RegressionMetrics(predictionAndObservations: 
RDD[(Double, Double)]) extend
    * Returns the variance explained by regression.
    * explainedVariance = \sum_i (\hat{y_i} - \bar{y})^2 / n
    * @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
+   * @since 1.2.0
    */
   def explainedVariance: Double = {
     SSreg / summary.count
@@ -74,6 +76,7 @@ class RegressionMetrics(predictionAndObservations: 
RDD[(Double, Double)]) extend
   /**
    * Returns the mean absolute error, which is a risk function corresponding 
to the
    * expected value of the absolute error loss or l1-norm loss.
+   * @since 1.2.0
    */
   def meanAbsoluteError: Double = {
     summary.normL1(1) / summary.count
@@ -82,6 +85,7 @@ class RegressionMetrics(predictionAndObservations: 
RDD[(Double, Double)]) extend
   /**
    * Returns the mean squared error, which is a risk function corresponding to 
the
    * expected value of the squared error loss or quadratic loss.
+   * @since 1.2.0
    */
   def meanSquaredError: Double = {
     SSerr / summary.count
@@ -90,6 +94,7 @@ class RegressionMetrics(predictionAndObservations: 
RDD[(Double, Double)]) extend
   /**
    * Returns the root mean squared error, which is defined as the square root 
of
    * the mean squared error.
+   * @since 1.2.0
    */
   def rootMeanSquaredError: Double = {
     math.sqrt(this.meanSquaredError)
@@ -98,6 +103,7 @@ class RegressionMetrics(predictionAndObservations: 
RDD[(Double, Double)]) extend
   /**
    * Returns R^2^, the unadjusted coefficient of determination.
    * @see [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+   * @since 1.2.0
    */
   def r2: Double = {
     1 - SSerr / SStot


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8922] [DOCUMENTATION, MLLIB] Add @since tags to mllib.evaluation

Reply via email to