Github user jkbradley commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7099#discussion_r33967441
  
    --- Diff: 
mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---
    @@ -212,12 +226,110 @@ class LinearRegressionModel private[ml] (
       extends RegressionModel[Vector, LinearRegressionModel]
       with LinearRegressionParams {
     
    +  @transient private var trainingResults: 
Option[LinearRegressionTrainingResults] = None
    +
    +  /**
    +   * Gets results (e.g. residuals, mse, r^2) of model on training set. 
This method should only
    +   * be called on the driver (it is not available on workers).
    +   */
    +  def getTrainingResults: Option[LinearRegressionTrainingResults] = 
trainingResults
    +
    +  def setTrainingResults(trainingResults: 
LinearRegressionTrainingResults): this.type = {
    +    this.trainingResults = Some(trainingResults)
    +    this
    +  }
    +
    +  /**
    +   * Evaluates the model on a test-set.
    +   * @param testset Test dataset to evaluate model on.
    +   * @return
    +   */
    +  def evaluate(testset: DataFrame): LinearRegressionResults = {
    +    val t = udf { features: Vector => predict(features) }
    +    val predictionAndObservations = testset
    +      .select(col($(labelCol)), 
t(col($(featuresCol))).as($(predictionCol)))
    +
    +    new LinearRegressionResults(predictionAndObservations)
    +  }
    +
       override protected def predict(features: Vector): Double = {
         dot(features, weights) + intercept
       }
     
       override def copy(extra: ParamMap): LinearRegressionModel = {
    -    copyValues(new LinearRegressionModel(uid, weights, intercept), extra)
    +    val newModel = new LinearRegressionModel(uid, weights, intercept)
    +    if (trainingResults.isDefined) 
newModel.setTrainingResults(trainingResults.get)
    +    copyValues(newModel, extra)
    +  }
    +}
    +
    +/**
    + * :: Experimental ::
    + * Linear regression training results.
    + * @param predictionAndLabel dataframe with columns prediction (0) and 
label (1).
    + * @param objectiveTrace objective function value at each iteration.
    + */
    +@Experimental
    +class LinearRegressionTrainingResults private[ml] (
    +    predictionAndLabel: DataFrame,
    +    val objectiveTrace: Array[Double])
    +  extends LinearRegressionResults(predictionAndLabel) {
    +
    +  /** Number of training iterations until termination */
    +  val totalIterations = objectiveTrace.length
    +
    +}
    +
    +/**
    + * :: Experimental ::
    + * Linear regression results evaluated on a dataset.
    + * @param predictionAndLabel dataframe with columns prediction(0) and 
label (1).
    + */
    +@Experimental
    +class LinearRegressionResults private[ml] (
    +    val predictionAndLabel: DataFrame) {
    +
    +  private val metrics = new RegressionMetrics(predictionAndLabel.map {
    +    case Row(pred: Double, obs: Double) => (pred, obs)
    +  })
    +
    +  /**
    +   * Returns the explained variance regression score.
    +   * explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
    +   * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
    +   */
    +  def explainedVariance: Double = metrics.explainedVariance
    +
    +  /**
    +   * Returns the mean absolute error, which is a risk function 
corresponding to the
    +   * expected value of the absolute error loss or l1-norm loss.
    +   */
    +  def meanAbsoluteError: Double = metrics.meanAbsoluteError
    +
    +  /**
    +   * Returns the mean squared error, which is a risk function 
corresponding to the
    +   * expected value of the squared error loss or quadratic loss.
    +   */
    +  def meanSquaredError: Double = metrics.meanSquaredError
    +
    +  /**
    +   * Returns the root mean squared error, which is defined as the square 
root of
    +   * the mean squared error.
    +   */
    +  def rootMeanSquaredError: Double = metrics.rootMeanSquaredError
    +
    +  /**
    +   * Returns R^2^, the coefficient of determination.
    +   * Reference: 
[[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
    +   */
    +  def r2: Double = metrics.r2
    +
    +  /** Get residuals */
    --- End diff --
    
    Define residuals in doc to make it clear that it's pred - label (not vice 
versa).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to