Github user mengxr commented on a diff in the pull request:

    https://github.com/apache/spark/pull/9413#discussion_r43770122
  
    --- Diff: 
mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala ---
    @@ -474,6 +487,75 @@ class LinearRegressionSummary private[regression] (
         predictions.select(t(col(predictionCol), 
col(labelCol)).as("residuals"))
       }
     
    +  /** Number of instances in DataFrame predictions */
    +  lazy val numInstances: Long = predictions.count()
    +
    +  /** Degrees of freedom */
    +  private val degreesOfFreedom: Long = if (model.getFitIntercept) {
    +    numInstances - model.coefficients.size - 1
    +  } else {
    +    numInstances - model.coefficients.size
    +  }
    +
    +  /**
    +   * The weighted residuals, the usual residuals rescaled by
    +   * the square root of the instance weights.
    +   */
    +  lazy val devianceResiduals: Array[Double] = {
    +    val weighted = if (model.getWeightCol.isEmpty) lit(1.0) else 
sqrt(col(model.getWeightCol))
    +    val dr = 
predictions.select(col(model.getLabelCol).minus(col(model.getPredictionCol))
    +      .multiply(weighted).as("weightedResiduals"))
    +      .select(min(col("weightedResiduals")).as("min"), 
max(col("weightedResiduals")).as("max"))
    +      .first()
    +    Array(dr.getDouble(0), dr.getDouble(1))
    +  }
    +
    +  /**
    +   * Standard error of estimated coefficients.
    +   * Note that standard error of estimated intercept is not supported 
currently.
    +   */
    +  lazy val coefficientStandardErrors: Array[Double] = {
    +    if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) {
    +      throw new UnsupportedOperationException(
    +        "No Std. Error of coefficients available for this 
LinearRegressionModel")
    +    } else {
    +      val rss = if (model.getWeightCol.isEmpty) {
    +        meanSquaredError * numInstances
    +      } else {
    +        val t = udf { (pred: Double, label: Double, weight: Double) =>
    +          math.pow(label - pred, 2.0) * weight }
    +        predictions.select(t(col(model.getPredictionCol), 
col(model.getLabelCol),
    +          
col(model.getWeightCol)).as("wse")).agg(sum(col("wse"))).first().getDouble(0)
    +      }
    +      val sigma2 = rss / degreesOfFreedom
    +      diagInvAtWA.map(_ * sigma2).map(math.sqrt(_))
    +    }
    +  }
    +
    +  /** T-statistic of estimated coefficients.
    --- End diff --
    
    minor: This is ScalaDoc style. We can fix it in the next update.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to