Github user feynmanliang commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7099#discussion_r34078680
  
    --- Diff: 
mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---
    @@ -212,12 +230,140 @@ class LinearRegressionModel private[ml] (
       extends RegressionModel[Vector, LinearRegressionModel]
       with LinearRegressionParams {
     
    +  private var trainingSummary: Option[LinearRegressionTrainingSummary] = 
None
    +
    +  /**
    +   * Gets results summary (e.g. residuals, mse, r-squared ) of model on 
training set. An exception
    +   * is thrown if `trainingSummary == None`.
    +   */
    +  def summary: LinearRegressionTrainingSummary = trainingSummary match {
    +    case Some(summ) => summ
    +    case None =>
    +      throw new SparkException(
    +        "No training summary available for this LinearRegressionModel",
    +        new NullPointerException())
    +  }
    +
    +  private[regression] def setSummary(summary: 
LinearRegressionTrainingSummary): this.type = {
    +    this.trainingSummary = Some(summary)
    +    this
    +  }
    +
    +  def hasSummary: Boolean = trainingSummary.isDefined
    +
    +
    +  /**
    +   * Evaluates the model on a testset.
    +   * @param dataset Test dataset to evaluate model on.
    +   */
    +  def evaluate(dataset: DataFrame): LinearRegressionSummary = {
    +    val t = udf { features: Vector => predict(features) }
    +    val predictionAndObservations = dataset
    +      .select(col($(labelCol)), 
t(col($(featuresCol))).as($(predictionCol)))
    +
    +    new LinearRegressionSummary(predictionAndObservations, 
$(predictionCol), $(labelCol))
    +  }
    +
       override protected def predict(features: Vector): Double = {
         dot(features, weights) + intercept
       }
     
       override def copy(extra: ParamMap): LinearRegressionModel = {
    -    copyValues(new LinearRegressionModel(uid, weights, intercept), extra)
    +    val newModel = new LinearRegressionModel(uid, weights, intercept)
    +    if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
    +    copyValues(newModel, extra)
    +  }
    +}
    +
    +/**
    + * :: Experimental ::
    + * Linear regression training results.
    + * @param predictions predictions outputted by the model's `transform` 
method.
    + * @param objectiveTrace objective function (scaled loss + regularization) 
at each iteration.
    + */
    +@Experimental
    +class LinearRegressionTrainingSummary private[ml] (
    +    predictions: DataFrame,
    +    predictionCol: String,
    +    labelCol: String,
    +    val objectiveTrace: DataFrame)
    +  extends LinearRegressionSummary(predictions, predictionCol, labelCol) {
    +
    +  def this(
    +      predictions: DataFrame,
    +      predictionCol: String,
    +      labelCol: String,
    +      objectiveTrace: Array[Double]) = {
    +
    +    this(
    +      predictions,
    +      predictionCol,
    +      labelCol,
    +      predictions.sqlContext
    +        .createDataFrame(objectiveTrace.zipWithIndex.map(_.swap).toSeq)
    +        .toDF("iteration", "objective")
    +    )
    +  }
    +
    +  /** Number of training iterations until termination */
    +  val totalIterations = objectiveTrace.count()
    +
    +}
    +
    +/**
    + * :: Experimental ::
    + * Linear regression results evaluated on a dataset.
    + * @param predictions predictions outputted by the model's `transform` 
method.
    + */
    +@Experimental
    +class LinearRegressionSummary private[ml] (
    +    @transient val predictions: DataFrame,
    +    val predictionCol: String,
    +    val labelCol: String) extends Serializable {
    +
    +  private val metrics = new RegressionMetrics(predictions
    +    .select(predictionCol, labelCol)
    +    .map { case Row(pred: Double, label: Double) => (pred, label) }
    +  )
    +
    +  // Force evaluation of lazy RegressionMetrics.summary for proper 
serialization
    +  metrics.explainedVariance
    --- End diff --
    
    Will this suffice to ensure that metrics serializes correctly even though 
predictions are transient? Should we consider just making 
RegressionMetrics.summary not lazy?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to