Github user yanboliang commented on a diff in the pull request:
https://github.com/apache/spark/pull/12778#discussion_r61594078
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
---
@@ -17,65 +17,34 @@
package org.apache.spark.ml.r
+import org.apache.hadoop.fs.Path
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.feature.RFormula
import org.apache.spark.ml.regression._
+import org.apache.spark.ml.util._
import org.apache.spark.sql._
private[r] class GeneralizedLinearRegressionWrapper private (
- pipeline: PipelineModel,
- val features: Array[String]) {
+ val pipeline: PipelineModel,
+ val rFeatures: Array[String],
+ val rCoefficients: Array[Double],
+ val rDispersion: Double,
+ val rNullDeviance: Double,
+ val rDeviance: Double,
+ val rResidualDegreeOfFreedomNull: Long,
+ val rResidualDegreeOfFreedom: Long,
+ val rAic: Double,
+ val rNumIterations: Int,
+ val isLoaded: Boolean = false) extends MLWritable {
private val glm: GeneralizedLinearRegressionModel =
pipeline.stages(1).asInstanceOf[GeneralizedLinearRegressionModel]
- lazy val rFeatures: Array[String] = if (glm.getFitIntercept) {
- Array("(Intercept)") ++ features
- } else {
- features
- }
-
- lazy val rCoefficients: Array[Double] = if (glm.getFitIntercept) {
- Array(glm.intercept) ++ glm.coefficients.toArray ++
- rCoefficientStandardErrors ++ rTValues ++ rPValues
- } else {
- glm.coefficients.toArray ++ rCoefficientStandardErrors ++ rTValues ++
rPValues
- }
-
- private lazy val rCoefficientStandardErrors = if (glm.getFitIntercept) {
- Array(glm.summary.coefficientStandardErrors.last) ++
- glm.summary.coefficientStandardErrors.dropRight(1)
- } else {
- glm.summary.coefficientStandardErrors
- }
-
- private lazy val rTValues = if (glm.getFitIntercept) {
- Array(glm.summary.tValues.last) ++ glm.summary.tValues.dropRight(1)
- } else {
- glm.summary.tValues
- }
-
- private lazy val rPValues = if (glm.getFitIntercept) {
- Array(glm.summary.pValues.last) ++ glm.summary.pValues.dropRight(1)
- } else {
- glm.summary.pValues
- }
-
- lazy val rDispersion: Double = glm.summary.dispersion
-
- lazy val rNullDeviance: Double = glm.summary.nullDeviance
-
- lazy val rDeviance: Double = glm.summary.deviance
-
- lazy val rResidualDegreeOfFreedomNull: Long =
glm.summary.residualDegreeOfFreedomNull
-
- lazy val rResidualDegreeOfFreedom: Long =
glm.summary.residualDegreeOfFreedom
-
- lazy val rAic: Double = glm.summary.aic
-
- lazy val rNumIterations: Int = glm.summary.numIterations
-
lazy val rDevianceResiduals: DataFrame = glm.summary.residuals()
--- End diff --
Because we did not store ```summary``` in
```GeneralizedLinearRegression.save()```, a saved-loaded model can get
```summary```. We add ```isLoaded``` to distinguish whether the model is
save-loaded and corresponding R interface will not call this interface to
escape from throwing exception.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]