Github user feynmanliang commented on a diff in the pull request:
https://github.com/apache/spark/pull/7099#discussion_r33989108
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---
@@ -212,12 +226,110 @@ class LinearRegressionModel private[ml] (
extends RegressionModel[Vector, LinearRegressionModel]
with LinearRegressionParams {
+ @transient private var trainingResults:
Option[LinearRegressionTrainingResults] = None
+
+ /**
+ * Gets results (e.g. residuals, mse, r^2) of model on training set.
This method should only
+ * be called on the driver (it is not available on workers).
+ */
+ def getTrainingResults: Option[LinearRegressionTrainingResults] =
trainingResults
+
+ def setTrainingResults(trainingResults:
LinearRegressionTrainingResults): this.type = {
+ this.trainingResults = Some(trainingResults)
+ this
+ }
+
+ /**
+ * Evaluates the model on a test-set.
+ * @param testset Test dataset to evaluate model on.
+ * @return
+ */
+ def evaluate(testset: DataFrame): LinearRegressionResults = {
+ val t = udf { features: Vector => predict(features) }
+ val predictionAndObservations = testset
+ .select(col($(labelCol)),
t(col($(featuresCol))).as($(predictionCol)))
+
+ new LinearRegressionResults(predictionAndObservations)
+ }
+
override protected def predict(features: Vector): Double = {
dot(features, weights) + intercept
}
override def copy(extra: ParamMap): LinearRegressionModel = {
- copyValues(new LinearRegressionModel(uid, weights, intercept), extra)
+ val newModel = new LinearRegressionModel(uid, weights, intercept)
+ if (trainingResults.isDefined)
newModel.setTrainingResults(trainingResults.get)
+ copyValues(newModel, extra)
+ }
+}
+
+/**
+ * :: Experimental ::
+ * Linear regression training results.
+ * @param predictionAndLabel dataframe with columns prediction (0) and
label (1).
+ * @param objectiveTrace objective function value at each iteration.
+ */
+@Experimental
+class LinearRegressionTrainingResults private[ml] (
+ predictionAndLabel: DataFrame,
+ val objectiveTrace: Array[Double])
+ extends LinearRegressionResults(predictionAndLabel) {
+
+ /** Number of training iterations until termination */
+ val totalIterations = objectiveTrace.length
+
+}
+
+/**
+ * :: Experimental ::
+ * Linear regression results evaluated on a dataset.
+ * @param predictionAndLabel dataframe with columns prediction(0) and
label (1).
--- End diff --
OK
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]