spark git commit: [SPARK-20449][ML] Upgrade breeze version to 0.13.1

2017-04-25 Thread dbtsai
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 e2591c6d7 -> 55834a898


[SPARK-20449][ML] Upgrade breeze version to 0.13.1

## What changes were proposed in this pull request?
Upgrade breeze version to 0.13.1, which fixed some critical bugs of L-BFGS-B.

## How was this patch tested?
Existing unit tests.

Author: Yanbo Liang 

Closes #17746 from yanboliang/spark-20449.

(cherry picked from commit 67eef47acfd26f1f0be3e8ef10453514f3655f62)
Signed-off-by: DB Tsai 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55834a89
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55834a89
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55834a89

Branch: refs/heads/branch-2.2
Commit: 55834a898547b00bb8de1891fd061651f941aa0b
Parents: e2591c6
Author: Yanbo Liang 
Authored: Tue Apr 25 17:10:41 2017 +
Committer: DB Tsai 
Committed: Tue Apr 25 17:11:06 2017 +

--
 LICENSE   |  1 +
 .../tests/testthat/test_mllib_classification.R| 10 +-
 dev/deps/spark-deps-hadoop-2.6| 12 +++-
 dev/deps/spark-deps-hadoop-2.7| 12 +++-
 .../regression/GeneralizedLinearRegression.scala  |  4 ++--
 .../apache/spark/mllib/clustering/LDAModel.scala  | 14 --
 .../spark/mllib/optimization/LBFGSSuite.scala |  4 ++--
 pom.xml   |  2 +-
 python/pyspark/ml/classification.py   | 18 --
 9 files changed, 37 insertions(+), 40 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/55834a89/LICENSE
--
diff --git a/LICENSE b/LICENSE
index 7950dd6..c21032a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -297,3 +297,4 @@ The text of each license is also included at 
licenses/LICENSE-[project].txt.
  (MIT License) RowsGroup (http://datatables.net/license/mit)
  (MIT License) jsonFormatter 
(http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
  (MIT License) modernizr 
(https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
+ (MIT License) machinist (https://github.com/typelevel/machinist)

http://git-wip-us.apache.org/repos/asf/spark/blob/55834a89/R/pkg/inst/tests/testthat/test_mllib_classification.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R 
b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index 459254d..af7cbdc 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -288,18 +288,18 @@ test_that("spark.mlp", {
 c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-   c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
+   c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
 
   model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, 
initialWeights =
 c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 
9.0))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-   c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
+   c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
 
   model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-   c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", 
"1.0", "0.0"))
+   c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "0.0", 
"1.0", "0.0"))
 
   # Test formula works well
   df <- suppressWarnings(createDataFrame(iris))
@@ -310,8 +310,8 @@ test_that("spark.mlp", {
   expect_equal(summary$numOfOutputs, 3)
   expect_equal(summary$layers, c(4, 3))
   expect_equal(length(summary$weights), 15)
-  expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 
7.4489734, -6.3751413,
-   -10.2376130), tolerance = 1e-6)
+  expect_equal(head(summary$weights, 5), list(-0.5793153, -4.652961, 6.216155, 
-6.649478,
+   -10.51147), tolerance = 1e-3)
 })
 
 test_that("spark.naiveBayes", {

http://git-wip-us.apache.org/repos/asf/spark/blob/55834a89/dev/deps/spark-deps-hadoop-2.6
--
diff 

spark git commit: [SPARK-20449][ML] Upgrade breeze version to 0.13.1

2017-04-25 Thread dbtsai
Repository: spark
Updated Branches:
  refs/heads/master 387565cf1 -> 67eef47ac


[SPARK-20449][ML] Upgrade breeze version to 0.13.1

## What changes were proposed in this pull request?
Upgrade breeze version to 0.13.1, which fixed some critical bugs of L-BFGS-B.

## How was this patch tested?
Existing unit tests.

Author: Yanbo Liang 

Closes #17746 from yanboliang/spark-20449.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/67eef47a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/67eef47a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/67eef47a

Branch: refs/heads/master
Commit: 67eef47acfd26f1f0be3e8ef10453514f3655f62
Parents: 387565c
Author: Yanbo Liang 
Authored: Tue Apr 25 17:10:41 2017 +
Committer: DB Tsai 
Committed: Tue Apr 25 17:10:41 2017 +

--
 LICENSE   |  1 +
 .../tests/testthat/test_mllib_classification.R| 10 +-
 dev/deps/spark-deps-hadoop-2.6| 12 +++-
 dev/deps/spark-deps-hadoop-2.7| 12 +++-
 .../regression/GeneralizedLinearRegression.scala  |  4 ++--
 .../apache/spark/mllib/clustering/LDAModel.scala  | 14 --
 .../spark/mllib/optimization/LBFGSSuite.scala |  4 ++--
 pom.xml   |  2 +-
 python/pyspark/ml/classification.py   | 18 --
 9 files changed, 37 insertions(+), 40 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/67eef47a/LICENSE
--
diff --git a/LICENSE b/LICENSE
index 7950dd6..c21032a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -297,3 +297,4 @@ The text of each license is also included at 
licenses/LICENSE-[project].txt.
  (MIT License) RowsGroup (http://datatables.net/license/mit)
  (MIT License) jsonFormatter 
(http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
  (MIT License) modernizr 
(https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
+ (MIT License) machinist (https://github.com/typelevel/machinist)

http://git-wip-us.apache.org/repos/asf/spark/blob/67eef47a/R/pkg/inst/tests/testthat/test_mllib_classification.R
--
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R 
b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index 459254d..af7cbdc 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -288,18 +288,18 @@ test_that("spark.mlp", {
 c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-   c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
+   c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
 
   model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, 
initialWeights =
 c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 
9.0))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-   c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
+   c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", 
"1.0", "0.0"))
 
   model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-   c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", 
"1.0", "0.0"))
+   c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "0.0", 
"1.0", "0.0"))
 
   # Test formula works well
   df <- suppressWarnings(createDataFrame(iris))
@@ -310,8 +310,8 @@ test_that("spark.mlp", {
   expect_equal(summary$numOfOutputs, 3)
   expect_equal(summary$layers, c(4, 3))
   expect_equal(length(summary$weights), 15)
-  expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 
7.4489734, -6.3751413,
-   -10.2376130), tolerance = 1e-6)
+  expect_equal(head(summary$weights, 5), list(-0.5793153, -4.652961, 6.216155, 
-6.649478,
+   -10.51147), tolerance = 1e-3)
 })
 
 test_that("spark.naiveBayes", {

http://git-wip-us.apache.org/repos/asf/spark/blob/67eef47a/dev/deps/spark-deps-hadoop-2.6
--
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 73dc1f9..9287bd4 100644
---