[GitHub] spark pull request #15394: [SPARK-17748][ML] One pass solver for Weighted Le...

yanboliang Mon, 24 Oct 2016 23:40:41 -0700

Github user yanboliang commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15394#discussion_r84838671
  
    --- Diff: 
mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala 
---
    @@ -132,28 +232,256 @@ class WeightedLeastSquaresSuite extends 
SparkFunSuite with MLlibTestSparkContext
         var idx = 0
         for (fitIntercept <- Seq(false, true)) {
           for (standardization <- Seq(false, true)) {
    -        val wls = new WeightedLeastSquares(
    -          fitIntercept, regParam = 0.0, standardizeFeatures = 
standardization,
    -          standardizeLabel = standardization).fit(instancesConstLabel)
    -        val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
    -        assert(actual ~== expected(idx) absTol 1e-4)
    +        for (solver <- WeightedLeastSquares.supportedSolvers) {
    +          val wls = new WeightedLeastSquares(fitIntercept, regParam = 0.0, 
elasticNetParam = 0.0,
    +            standardizeFeatures = standardization, standardizeLabel = 
standardization,
    +            solverType = solver).fit(instancesConstLabel)
    +          val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
    +          assert(actual ~== expected(idx) absTol 1e-4)
    +        }
           }
           idx += 1
         }
    +
    +    // when label is constant zero, and fitIntercept is false, we should 
not train and get all zeros
    +    for (solver <- WeightedLeastSquares.supportedSolvers) {
    +      val wls = new WeightedLeastSquares(fitIntercept = false, regParam = 
0.0,
    +        elasticNetParam = 0.0, standardizeFeatures = true, 
standardizeLabel = true,
    +        solverType = solver).fit(instancesConstZeroLabel)
    +      val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
    +      assert(actual === Vectors.dense(0.0, 0.0, 0.0))
    +      assert(wls.objectiveHistory === Array(0.0))
    +    }
       }
     
       test("WLS with regularization when label is constant") {
         // if regParam is non-zero and standardization is true, the problem is 
ill-defined and
         // an exception is thrown.
    -    val wls = new WeightedLeastSquares(
    -      fitIntercept = false, regParam = 0.1, standardizeFeatures = true,
    -      standardizeLabel = true)
    -    intercept[IllegalArgumentException]{
    -      wls.fit(instancesConstLabel)
    +    for (solver <- WeightedLeastSquares.supportedSolvers) {
    +      val wls = new WeightedLeastSquares(fitIntercept = false, regParam = 
0.1,
    +        elasticNetParam = 0.0, standardizeFeatures = true, 
standardizeLabel = true,
    +        solverType = solver)
    +      intercept[IllegalArgumentException]{
    +        wls.fit(instancesConstLabel)
    +      }
         }
       }
     
    -  test("WLS against glmnet") {
    +  test("WLS against glmnet with constant features") {
    +    // Cholesky solver does not handle singular input with no 
regularization
    +    for (fitIntercept <- Seq(false, true);
    +         standardization <- Seq(false, true)) {
    +      val wls = new WeightedLeastSquares(fitIntercept, regParam = 0.0, 
elasticNetParam = 0.0,
    +        standardizeFeatures = standardization, standardizeLabel = 
standardization,
    +        solverType = WeightedLeastSquares.Cholesky)
    +      intercept[SingularMatrixException] {
    +        wls.fit(constantFeaturesInstances)
    +      }
    +    }
    +
    +    // Cholesky also fails when regularization is added but we don't wish 
to standardize
    +    val wls = new WeightedLeastSquares(true, regParam = 0.5, 
elasticNetParam = 0.0,
    +      standardizeFeatures = false, standardizeLabel = false,
    +      solverType = WeightedLeastSquares.Cholesky)
    +    intercept[SingularMatrixException] {
    +      wls.fit(constantFeaturesInstances)
    +    }
    +
    +    /*
    +      for (intercept in c(FALSE, TRUE)) {
    +        model <- glmnet(A, b, weights=w, intercept=intercept, lambda=0.5,
    +                       standardize=T, alpha=0.0, thresh=1E-14)
    +        print(as.vector(coef(model)))
    +      }
    +      [1] 0.000000 0.000000 2.235802
    +      [1] 9.798771 0.000000 1.365503
    +     */
    +    // should not fail when regularization and standardization are added
    +    val expectedCholesky = Seq(
    +      Vectors.dense(0.0, 0.0, 2.235802),
    +      Vectors.dense(9.798771, 0.0, 1.365503)
    +    )
    +    var idx = 0
    +    for (fitIntercept <- Seq(false, true)) {
    +      val wls = new WeightedLeastSquares(fitIntercept = fitIntercept, 
regParam = 0.5,
    +        elasticNetParam = 0.0, standardizeFeatures = true,
    +        standardizeLabel = true, solverType = 
WeightedLeastSquares.Cholesky)
    +        .fit(constantFeaturesInstances)
    +      val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
    +      assert(actual ~== expectedCholesky(idx) absTol 1e-6)
    +      idx += 1
    +    }
    +
    +    /*
    +      for (intercept in c(FALSE, TRUE)) {
    +        for (standardize in c(FALSE, TRUE)) {
    +          for (regParams in list(c(0.0, 0.0), c(0.5, 0.0), c(0.5, 0.5), 
c(0.5, 1.0))) {
    +            model <- glmnet(A, b, weights=w, intercept=intercept, 
lambda=regParams[1],
    +                           standardize=standardize, alpha=regParams[2], 
thresh=1E-14)
    +            print(as.vector(coef(model)))
    +          }
    +        }
    +      }
    +      [1] 0.000000 0.000000 2.253012
    +      [1] 0.000000 0.000000 2.250857
    +      [1] 0.000000 0.000000 2.249784
    +      [1] 0.000000 0.000000 2.248709
    +      [1] 0.000000 0.000000 2.253012
    +      [1] 0.000000 0.000000 2.235802
    +      [1] 0.000000 0.000000 2.238297
    +      [1] 0.000000 0.000000 2.240811
    +      [1] 8.218905 0.000000 1.517413
    +      [1] 8.434286 0.000000 1.496703
    +      [1] 8.648497 0.000000 1.476106
    +      [1] 8.865672 0.000000 1.455224
    +      [1] 8.218905 0.000000 1.517413
    +      [1] 9.798771 0.000000 1.365503
    +      [1] 9.919095 0.000000 1.353933
    +      [1] 10.052804  0.000000  1.341077
    +     */
    +    val expectedQuasiNewton = Seq(
    +      Vectors.dense(0.000000, 0.000000, 2.253012),
    +      Vectors.dense(0.000000, 0.000000, 2.250857),
    +      Vectors.dense(0.000000, 0.000000, 2.249784),
    +      Vectors.dense(0.000000, 0.000000, 2.248709),
    +      Vectors.dense(0.000000, 0.000000, 2.253012),
    +      Vectors.dense(0.000000, 0.000000, 2.235802),
    +      Vectors.dense(0.000000, 0.000000, 2.238297),
    +      Vectors.dense(0.000000, 0.000000, 2.240811),
    +      Vectors.dense(8.218905, 0.000000, 1.517413),
    +      Vectors.dense(8.434286, 0.000000, 1.496703),
    +      Vectors.dense(8.648497, 0.000000, 1.476106),
    +      Vectors.dense(8.865672, 0.000000, 1.455224),
    +      Vectors.dense(8.218905, 0.000000, 1.517413),
    +      Vectors.dense(9.798771, 0.000000, 1.365503),
    +      Vectors.dense(9.919095, 0.000000, 1.353933),
    +      Vectors.dense(10.052804, 0.000000, 1.341077))
    +
    +    idx = 0
    +    for (fitIntercept <- Seq(false, true);
    +         standardization <- Seq(false, true);
    +         (lambda, alpha) <- Seq((0.0, 0.0), (0.5, 0.0), (0.5, 0.5), (0.5, 
1.0))) {
    +      for (solver <- Seq(WeightedLeastSquares.Auto, 
WeightedLeastSquares.Cholesky)) {
    --- End diff --
    
    Should the enumeration be removed? Since it's not used. This is a very 
minor issue, we can address it in follow up work.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #15394: [SPARK-17748][ML] One pass solver for Weighted Le...

Reply via email to