spark git commit: [SPARK-11521][ML][DOC] Document that Logistic, Linear Regression summaries ignore weight col

meng Tue, 24 Nov 2015 09:55:10 -0800

Repository: spark
Updated Branches:
  refs/heads/master 56a0aba0a -> 9e24ba667



[SPARK-11521][ML][DOC] Document that Logistic, Linear Regression summaries 
ignore weight col

Doc for 1.6 that the summaries mostly ignore the weight column.
To be corrected for 1.7

CC: mengxr thunterdb

Author: Joseph K. Bradley <jos...@databricks.com>

Closes #9927 from jkbradley/linregsummary-doc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9e24ba66
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9e24ba66
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9e24ba66

Branch: refs/heads/master
Commit: 9e24ba667e43290fbaa3cacb93cf5d9be790f1fd
Parents: 56a0aba
Author: Joseph K. Bradley <jos...@databricks.com>
Authored: Tue Nov 24 09:54:55 2015 -0800
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Tue Nov 24 09:54:55 2015 -0800

----------------------------------------------------------------------
 .../ml/classification/LogisticRegression.scala    | 18 ++++++++++++++++++
 .../spark/ml/regression/LinearRegression.scala    | 15 +++++++++++++++
 2 files changed, 33 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9e24ba66/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 418bbdc..d320d64 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -755,23 +755,35 @@ class BinaryLogisticRegressionSummary 
private[classification] (
    * Returns the receiver operating characteristic (ROC) curve,
    * which is an Dataframe having two fields (FPR, TPR)
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LogisticRegression.weightCol]].
+   *       This will change in later Spark versions.
    * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
    */
   @transient lazy val roc: DataFrame = binaryMetrics.roc().toDF("FPR", "TPR")
 
   /**
    * Computes the area under the receiver operating characteristic (ROC) curve.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LogisticRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   lazy val areaUnderROC: Double = binaryMetrics.areaUnderROC()
 
   /**
    * Returns the precision-recall curve, which is an Dataframe containing
    * two fields recall, precision with (0.0, 1.0) prepended to it.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LogisticRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @transient lazy val pr: DataFrame = binaryMetrics.pr().toDF("recall", 
"precision")
 
   /**
    * Returns a dataframe with two fields (threshold, F-Measure) curve with 
beta = 1.0.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LogisticRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @transient lazy val fMeasureByThreshold: DataFrame = {
     binaryMetrics.fMeasureByThreshold().toDF("threshold", "F-Measure")
@@ -781,6 +793,9 @@ class BinaryLogisticRegressionSummary 
private[classification] (
    * Returns a dataframe with two fields (threshold, precision) curve.
    * Every possible probability obtained in transforming the dataset are used
    * as thresholds used in calculating the precision.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LogisticRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @transient lazy val precisionByThreshold: DataFrame = {
     binaryMetrics.precisionByThreshold().toDF("threshold", "precision")
@@ -790,6 +805,9 @@ class BinaryLogisticRegressionSummary 
private[classification] (
    * Returns a dataframe with two fields (threshold, recall) curve.
    * Every possible probability obtained in transforming the dataset are used
    * as thresholds used in calculating the recall.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LogisticRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @transient lazy val recallByThreshold: DataFrame = {
     binaryMetrics.recallByThreshold().toDF("threshold", "recall")

http://git-wip-us.apache.org/repos/asf/spark/blob/9e24ba66/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 70ccec7..1db9166 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -540,6 +540,9 @@ class LinearRegressionSummary private[regression] (
    * Returns the explained variance regression score.
    * explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
    * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LinearRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @Since("1.5.0")
   val explainedVariance: Double = metrics.explainedVariance
@@ -547,6 +550,9 @@ class LinearRegressionSummary private[regression] (
   /**
    * Returns the mean absolute error, which is a risk function corresponding 
to the
    * expected value of the absolute error loss or l1-norm loss.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LinearRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @Since("1.5.0")
   val meanAbsoluteError: Double = metrics.meanAbsoluteError
@@ -554,6 +560,9 @@ class LinearRegressionSummary private[regression] (
   /**
    * Returns the mean squared error, which is a risk function corresponding to 
the
    * expected value of the squared error loss or quadratic loss.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LinearRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @Since("1.5.0")
   val meanSquaredError: Double = metrics.meanSquaredError
@@ -561,6 +570,9 @@ class LinearRegressionSummary private[regression] (
   /**
    * Returns the root mean squared error, which is defined as the square root 
of
    * the mean squared error.
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LinearRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @Since("1.5.0")
   val rootMeanSquaredError: Double = metrics.rootMeanSquaredError
@@ -568,6 +580,9 @@ class LinearRegressionSummary private[regression] (
   /**
    * Returns R^2^, the coefficient of determination.
    * Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+   *
+   * Note: This ignores instance weights (setting all to 1.0) from 
[[LinearRegression.weightCol]].
+   *       This will change in later Spark versions.
    */
   @Since("1.5.0")
   val r2: Double = metrics.r2


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-11521][ML][DOC] Document that Logistic, Linear Regression summaries ignore weight col

Reply via email to