WeichenXu123 commented on a change in pull request #31693:
URL: https://github.com/apache/spark/pull/31693#discussion_r589345291
##########
File path:
mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
##########
@@ -824,6 +843,44 @@ class LogisticRegressionSuite extends MLTest with
DefaultReadWriteTest {
assert(model6.coefficients ~= coefficientsExpected5 relTol 1E-3)
}
+ test("SPARK-34448: binary logistic regression with intercept with features
with small var") {
+ val trainer1 = new
LogisticRegression().setFitIntercept(true).setStandardization(true)
+ .setWeightCol("weight")
+ val trainer2 = new
LogisticRegression().setFitIntercept(true).setStandardization(false)
+ .setWeightCol("weight")
+
+ val model1 = trainer1.fit(binaryDatasetWithSmallVar)
+ val model2 = trainer2.fit(binaryDatasetWithSmallVar)
+
+ /*
+ Use the following R code to load the data and train the model using
glmnet package.
+ library("glmnet")
+ data <- read.csv("path", header=FALSE)
+ label = factor(data$V1)
+ w = data$V2
+ features = as.matrix(data.frame(data$V3, data$V4, data$V5, data$V6))
+ coefficients = coef(glmnet(features, label, weights=w,
family="binomial", alpha = 0,
+ lambda = 0))
+ coefficients
+ 5 x 1 sparse Matrix of class "dgCMatrix"
+ s0
+ (Intercept) -0.1609517
+ data.V3 -0.5843178
+ data.V4 0.8936137
+ data.V5 -0.3820793
+ data.V6 1.6347469
+ */
+ val coefficientsR = Vectors.dense(-0.5843178, 0.8936137, -0.3820793,
1.6347469)
+ val interceptR = -0.1609517
+
+ assert(model1.intercept ~== interceptR relTol 1E-3)
+ assert(model1.coefficients ~= coefficientsR relTol 1E-3)
+
+ // Without regularization, with or without standardization will converge
to the same solution.
+ assert(model2.intercept ~== interceptR relTol 1E-3)
+ assert(model2.coefficients ~= coefficientsR relTol 1E-3)
+ }
+
Review comment:
bounded optimizer case may be complicated, and it is rarely used, we can
skip this for now.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]