spark git commit: [SPARK-13429][MLLIB] Unify Logistic Regression convergence tolerance of ML & MLlib

meng Mon, 22 Feb 2016 23:37:27 -0800

Repository: spark
Updated Branches:
  refs/heads/master 4fd199369 -> 72427c3e1



[SPARK-13429][MLLIB] Unify Logistic Regression convergence tolerance of ML & 
MLlib

## What changes were proposed in this pull request?
In order to provide better and consistent result, let's change the default 
value of MLlib ```LogisticRegressionWithLBFGS convergenceTol``` from ```1E-4``` 
to ```1E-6``` which will be equal to ML ```LogisticRegression```.
cc dbtsai
## How was the this patch tested?
unit tests

Author: Yanbo Liang <[email protected]>

Closes #11299 from yanboliang/spark-13429.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72427c3e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72427c3e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72427c3e

Branch: refs/heads/master
Commit: 72427c3e115daf06f7ad8aa50115a8e0da2c6d62
Parents: 4fd1993
Author: Yanbo Liang <[email protected]>
Authored: Mon Feb 22 23:37:09 2016 -0800
Committer: Xiangrui Meng <[email protected]>
Committed: Mon Feb 22 23:37:09 2016 -0800

----------------------------------------------------------------------
 .../org/apache/spark/mllib/optimization/LBFGS.scala |  4 ++--
 .../classification/LogisticRegressionSuite.scala    | 16 ++++++++--------
 python/pyspark/mllib/classification.py              |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/72427c3e/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index a5bd77e..11179a2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -41,7 +41,7 @@ class LBFGS(private var gradient: Gradient, private var 
updater: Updater)
   extends Optimizer with Logging {
 
   private var numCorrections = 10
-  private var convergenceTol = 1E-4
+  private var convergenceTol = 1E-6
   private var maxNumIterations = 100
   private var regParam = 0.0
 
@@ -59,7 +59,7 @@ class LBFGS(private var gradient: Gradient, private var 
updater: Updater)
   }
 
   /**
-   * Set the convergence tolerance of iterations for L-BFGS. Default 1E-4.
+   * Set the convergence tolerance of iterations for L-BFGS. Default 1E-6.
    * Smaller value will lead to higher accuracy with the cost of more 
iterations.
    * This value must be nonnegative. Lower convergence values are less tolerant
    * and therefore generally cause more iterations to be run.

http://git-wip-us.apache.org/repos/asf/spark/blob/72427c3e/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index d140545..cea0adc 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -667,9 +667,9 @@ class LogisticRegressionSuite extends SparkFunSuite with 
MLlibTestSparkContext w
 
   test("binary logistic regression with intercept with L1 regularization") {
     val trainer1 = new 
LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(true)
-    trainer1.optimizer.setUpdater(new 
L1Updater).setRegParam(0.12).setConvergenceTol(1E-6)
+    trainer1.optimizer.setUpdater(new L1Updater).setRegParam(0.12)
     val trainer2 = new 
LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(false)
-    trainer2.optimizer.setUpdater(new 
L1Updater).setRegParam(0.12).setConvergenceTol(1E-6)
+    trainer2.optimizer.setUpdater(new L1Updater).setRegParam(0.12)
 
     val model1 = trainer1.run(binaryDataset)
     val model2 = trainer2.run(binaryDataset)
@@ -726,9 +726,9 @@ class LogisticRegressionSuite extends SparkFunSuite with 
MLlibTestSparkContext w
 
   test("binary logistic regression without intercept with L1 regularization") {
     val trainer1 = new 
LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(true)
-    trainer1.optimizer.setUpdater(new 
L1Updater).setRegParam(0.12).setConvergenceTol(1E-6)
+    trainer1.optimizer.setUpdater(new L1Updater).setRegParam(0.12)
     val trainer2 = new 
LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(false)
-    trainer2.optimizer.setUpdater(new 
L1Updater).setRegParam(0.12).setConvergenceTol(1E-6)
+    trainer2.optimizer.setUpdater(new L1Updater).setRegParam(0.12)
 
     val model1 = trainer1.run(binaryDataset)
     val model2 = trainer2.run(binaryDataset)
@@ -786,9 +786,9 @@ class LogisticRegressionSuite extends SparkFunSuite with 
MLlibTestSparkContext w
 
   test("binary logistic regression with intercept with L2 regularization") {
     val trainer1 = new 
LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(true)
-    trainer1.optimizer.setUpdater(new 
SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6)
+    trainer1.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37)
     val trainer2 = new 
LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(false)
-    trainer2.optimizer.setUpdater(new 
SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6)
+    trainer2.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37)
 
     val model1 = trainer1.run(binaryDataset)
     val model2 = trainer2.run(binaryDataset)
@@ -845,9 +845,9 @@ class LogisticRegressionSuite extends SparkFunSuite with 
MLlibTestSparkContext w
 
   test("binary logistic regression without intercept with L2 regularization") {
     val trainer1 = new 
LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(true)
-    trainer1.optimizer.setUpdater(new 
SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6)
+    trainer1.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37)
     val trainer2 = new 
LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(false)
-    trainer2.optimizer.setUpdater(new 
SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6)
+    trainer2.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37)
 
     val model1 = trainer1.run(binaryDataset)
     val model2 = trainer2.run(binaryDataset)

http://git-wip-us.apache.org/repos/asf/spark/blob/72427c3e/python/pyspark/mllib/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/classification.py 
b/python/pyspark/mllib/classification.py
index b24592c..b4d54ef 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -327,7 +327,7 @@ class LogisticRegressionWithLBFGS(object):
     @classmethod
     @since('1.2.0')
     def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, 
regType="l2",
-              intercept=False, corrections=10, tolerance=1e-4, 
validateData=True, numClasses=2):
+              intercept=False, corrections=10, tolerance=1e-6, 
validateData=True, numClasses=2):
         """
         Train a logistic regression model on the given data.
 
@@ -359,7 +359,7 @@ class LogisticRegressionWithLBFGS(object):
           (default: 10)
         :param tolerance:
           The convergence tolerance of iterations for L-BFGS.
-          (default: 1e-4)
+          (default: 1e-6)
         :param validateData:
           Boolean parameter which indicates if the algorithm should
           validate data before training.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-13429][MLLIB] Unify Logistic Regression convergence tolerance of ML & MLlib

Reply via email to