Github user jkbradley commented on a diff in the pull request:
https://github.com/apache/spark/pull/7538#discussion_r36361309
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
---
@@ -408,6 +451,119 @@ private[classification] class MultiClassSummarizer
extends Serializable {
}
/**
+ * Abstraction for multinomial Logistic Regression Training results.
+ */
+sealed trait LogisticRegressionTrainingSummary extends Serializable {
+
+ /** objective function (scaled loss + regularization) at each iteration.
*/
+ val objectiveHistory: Array[Double]
+
+ /** Number of training iterations until termination */
+ val totalIterations: Int = objectiveHistory.length
+
+}
+
+/**
+ * Abstraction for Logistic Regression Results for a given model.
+ */
+sealed trait LogisticRegressionSummary extends Serializable {
+
+}
+
+/**
+ * :: Experimental ::
+ * Logistic regression training results.
+ * @param predictions dataframe outputted by the model's `transform`
method.
+ * @param probabilityCol field in "predictions" which gives the calibrated
probability of
+ * each sample as a vector.
+ * @param labelCol field in "predictions" which gives the true label of
each sample.
+ * @param objectiveHistory objective function (scaled loss +
regularization) at each iteration.
+ */
+@Experimental
+class BinaryLogisticRegressionTrainingSummary private[classification] (
+ predictions: DataFrame,
+ probabilityCol: String,
+ labelCol: String,
+ val objectiveHistory: Array[Double])
+ extends BinaryLogisticRegressionSummary(predictions, probabilityCol,
labelCol)
+ with LogisticRegressionTrainingSummary {
+
+}
+
+/**
+ * :: Experimental ::
+ * Binary Logistic regression results for a given model.
+ * @param predictions dataframe outputted by the model's `transform`
method.
+ * @param probabilityCol field in "predictions" which gives the calibrated
probability of
+ * each sample.
+ * @param labelCol field in "predictions" which gives the true label of
each sample.
+ */
+@Experimental
+class BinaryLogisticRegressionSummary private[classification] (
+ @transient val predictions: DataFrame,
+ val probabilityCol: String,
+ val labelCol: String) extends LogisticRegressionSummary {
+
+ private val sqlContext = predictions.sqlContext
+ import sqlContext.implicits._
+
+ /**
+ * Returns a BinaryClassificationMetrics object.
+ */
+ // TODO: Allow the user to vary the number of bins using a setBins
method in
+ // BinaryClassificationMetrics. For now the default is set to 100.
+ @transient private val binaryMetrics = new BinaryClassificationMetrics(
+ predictions.select(probabilityCol, labelCol).map {
+ case Row(score: Vector, label: Double) => (score(1), label)
+ }, 100
+ )
+
+ /**
+ * Returns the receiver operating characteristic (ROC) curve,
+ * which is an Dataframe having two fields (FPR, TPR)
+ * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
+ * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
+ */
+ def roc(): DataFrame = binaryMetrics.roc().toDF("FPR", "TPR")
+
+ /**
+ * Computes the area under the receiver operating characteristic (ROC)
curve.
+ */
+ def areaUnderROC(): Double = binaryMetrics.areaUnderROC()
--- End diff --
Make lazy val
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]