Repository: spark Updated Branches: refs/heads/master 4fd199369 -> 72427c3e1
[SPARK-13429][MLLIB] Unify Logistic Regression convergence tolerance of ML & MLlib ## What changes were proposed in this pull request? In order to provide better and consistent result, let's change the default value of MLlib ```LogisticRegressionWithLBFGS convergenceTol``` from ```1E-4``` to ```1E-6``` which will be equal to ML ```LogisticRegression```. cc dbtsai ## How was the this patch tested? unit tests Author: Yanbo Liang <[email protected]> Closes #11299 from yanboliang/spark-13429. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72427c3e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72427c3e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72427c3e Branch: refs/heads/master Commit: 72427c3e115daf06f7ad8aa50115a8e0da2c6d62 Parents: 4fd1993 Author: Yanbo Liang <[email protected]> Authored: Mon Feb 22 23:37:09 2016 -0800 Committer: Xiangrui Meng <[email protected]> Committed: Mon Feb 22 23:37:09 2016 -0800 ---------------------------------------------------------------------- .../org/apache/spark/mllib/optimization/LBFGS.scala | 4 ++-- .../classification/LogisticRegressionSuite.scala | 16 ++++++++-------- python/pyspark/mllib/classification.py | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/72427c3e/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index a5bd77e..11179a2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -41,7 +41,7 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) extends Optimizer with Logging { private var numCorrections = 10 - private var convergenceTol = 1E-4 + private var convergenceTol = 1E-6 private var maxNumIterations = 100 private var regParam = 0.0 @@ -59,7 +59,7 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) } /** - * Set the convergence tolerance of iterations for L-BFGS. Default 1E-4. + * Set the convergence tolerance of iterations for L-BFGS. Default 1E-6. * Smaller value will lead to higher accuracy with the cost of more iterations. * This value must be nonnegative. Lower convergence values are less tolerant * and therefore generally cause more iterations to be run. http://git-wip-us.apache.org/repos/asf/spark/blob/72427c3e/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala index d140545..cea0adc 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala @@ -667,9 +667,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w test("binary logistic regression with intercept with L1 regularization") { val trainer1 = new LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(true) - trainer1.optimizer.setUpdater(new L1Updater).setRegParam(0.12).setConvergenceTol(1E-6) + trainer1.optimizer.setUpdater(new L1Updater).setRegParam(0.12) val trainer2 = new LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(false) - trainer2.optimizer.setUpdater(new L1Updater).setRegParam(0.12).setConvergenceTol(1E-6) + trainer2.optimizer.setUpdater(new L1Updater).setRegParam(0.12) val model1 = trainer1.run(binaryDataset) val model2 = trainer2.run(binaryDataset) @@ -726,9 +726,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w test("binary logistic regression without intercept with L1 regularization") { val trainer1 = new LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(true) - trainer1.optimizer.setUpdater(new L1Updater).setRegParam(0.12).setConvergenceTol(1E-6) + trainer1.optimizer.setUpdater(new L1Updater).setRegParam(0.12) val trainer2 = new LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(false) - trainer2.optimizer.setUpdater(new L1Updater).setRegParam(0.12).setConvergenceTol(1E-6) + trainer2.optimizer.setUpdater(new L1Updater).setRegParam(0.12) val model1 = trainer1.run(binaryDataset) val model2 = trainer2.run(binaryDataset) @@ -786,9 +786,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w test("binary logistic regression with intercept with L2 regularization") { val trainer1 = new LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(true) - trainer1.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6) + trainer1.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37) val trainer2 = new LogisticRegressionWithLBFGS().setIntercept(true).setFeatureScaling(false) - trainer2.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6) + trainer2.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37) val model1 = trainer1.run(binaryDataset) val model2 = trainer2.run(binaryDataset) @@ -845,9 +845,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w test("binary logistic regression without intercept with L2 regularization") { val trainer1 = new LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(true) - trainer1.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6) + trainer1.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37) val trainer2 = new LogisticRegressionWithLBFGS().setIntercept(false).setFeatureScaling(false) - trainer2.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37).setConvergenceTol(1E-6) + trainer2.optimizer.setUpdater(new SquaredL2Updater).setRegParam(1.37) val model1 = trainer1.run(binaryDataset) val model2 = trainer2.run(binaryDataset) http://git-wip-us.apache.org/repos/asf/spark/blob/72427c3e/python/pyspark/mllib/classification.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index b24592c..b4d54ef 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -327,7 +327,7 @@ class LogisticRegressionWithLBFGS(object): @classmethod @since('1.2.0') def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2", - intercept=False, corrections=10, tolerance=1e-4, validateData=True, numClasses=2): + intercept=False, corrections=10, tolerance=1e-6, validateData=True, numClasses=2): """ Train a logistic regression model on the given data. @@ -359,7 +359,7 @@ class LogisticRegressionWithLBFGS(object): (default: 10) :param tolerance: The convergence tolerance of iterations for L-BFGS. - (default: 1e-4) + (default: 1e-6) :param validateData: Boolean parameter which indicates if the algorithm should validate data before training. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
