spark git commit: [SPARK-11542] [SPARKR] fix glm with long fomular

2015-11-05 Thread davies
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 0eb233507 -> dac83094f


[SPARK-11542] [SPARKR] fix glm with long fomular

Because deparse() will break the long string into multiple lines, the 
deserialization will fail

Author: Davies Liu 

Closes #9510 from davies/fix_glm.

(cherry picked from commit 244010624200eddea6dfd1b2c89f40be45212e96)
Signed-off-by: Davies Liu 

Conflicts:
R/pkg/R/mllib.R


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dac83094
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dac83094
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dac83094

Branch: refs/heads/branch-1.5
Commit: dac83094f52edd9ba3a2072b1b7f3186f1a995b7
Parents: 0eb2335
Author: Davies Liu 
Authored: Thu Nov 5 16:34:10 2015 -0800
Committer: Davies Liu 
Committed: Thu Nov 5 16:37:57 2015 -0800

--
 R/pkg/R/mllib.R   |  3 ++-
 R/pkg/inst/tests/test_mllib.R | 12 
 2 files changed, 14 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/dac83094/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index cea3d76..5e5d458 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -46,8 +46,9 @@ setClass("PipelineModel", representation(model = "jobj"))
 setMethod("glm", signature(formula = "formula", family = "ANY", data = 
"DataFrame"),
   function(formula, family = c("gaussian", "binomial"), data, lambda = 
0, alpha = 0) {
 family <- match.arg(family)
+formula <- paste(deparse(formula), collapse="")
 model <- callJStatic("org.apache.spark.ml.api.r.SparkRWrappers",
- "fitRModelFormula", deparse(formula), 
data@sdf, family, lambda,
+ "fitRModelFormula", formula, data@sdf, 
family, lambda,
  alpha)
 return(new("PipelineModel", model = model))
   })

http://git-wip-us.apache.org/repos/asf/spark/blob/dac83094/R/pkg/inst/tests/test_mllib.R
--
diff --git a/R/pkg/inst/tests/test_mllib.R b/R/pkg/inst/tests/test_mllib.R
index f272de7..18d58ba 100644
--- a/R/pkg/inst/tests/test_mllib.R
+++ b/R/pkg/inst/tests/test_mllib.R
@@ -33,6 +33,18 @@ test_that("glm and predict", {
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), 
"double")
 })
 
+test_that("glm should work with long formula", {
+  training <- createDataFrame(sqlContext, iris)
+  training$LongLongLongLongLongName <- training$Sepal_Width
+  training$VeryLongLongLongLonLongName <- training$Sepal_Length
+  training$AnotherLongLongLongLongName <- training$Species
+  model <- glm(LongLongLongLongLongName ~ VeryLongLongLongLonLongName + 
AnotherLongLongLongLongName,
+   data = training)
+  vals <- collect(select(predict(model, training), "prediction"))
+  rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), 
iris)
+  expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+})
+
 test_that("predictions match with native glm", {
   training <- createDataFrame(sqlContext, iris)
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-11542] [SPARKR] fix glm with long fomular

2015-11-05 Thread davies
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 40eadae16 -> 37c59f0ba


[SPARK-11542] [SPARKR] fix glm with long fomular

Because deparse() will break the long string into multiple lines, the 
deserialization will fail

Author: Davies Liu 

Closes #9510 from davies/fix_glm.

(cherry picked from commit 244010624200eddea6dfd1b2c89f40be45212e96)
Signed-off-by: Davies Liu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/37c59f0b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/37c59f0b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/37c59f0b

Branch: refs/heads/branch-1.6
Commit: 37c59f0ba5c6f5effde2212ca2952058a2fb9ea1
Parents: 40eadae
Author: Davies Liu 
Authored: Thu Nov 5 16:34:10 2015 -0800
Committer: Davies Liu 
Committed: Thu Nov 5 16:34:32 2015 -0800

--
 R/pkg/R/mllib.R   |  3 ++-
 R/pkg/inst/tests/test_mllib.R | 12 
 2 files changed, 14 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/37c59f0b/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 60bfadb..b0d73dd 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -48,8 +48,9 @@ setMethod("glm", signature(formula = "formula", family = 
"ANY", data = "DataFram
   function(formula, family = c("gaussian", "binomial"), data, lambda = 
0, alpha = 0,
 standardize = TRUE, solver = "auto") {
 family <- match.arg(family)
+formula <- paste(deparse(formula), collapse="")
 model <- callJStatic("org.apache.spark.ml.api.r.SparkRWrappers",
- "fitRModelFormula", deparse(formula), 
data@sdf, family, lambda,
+ "fitRModelFormula", formula, data@sdf, 
family, lambda,
  alpha, standardize, solver)
 return(new("PipelineModel", model = model))
   })

http://git-wip-us.apache.org/repos/asf/spark/blob/37c59f0b/R/pkg/inst/tests/test_mllib.R
--
diff --git a/R/pkg/inst/tests/test_mllib.R b/R/pkg/inst/tests/test_mllib.R
index 032cfef..4761e28 100644
--- a/R/pkg/inst/tests/test_mllib.R
+++ b/R/pkg/inst/tests/test_mllib.R
@@ -33,6 +33,18 @@ test_that("glm and predict", {
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), 
"double")
 })
 
+test_that("glm should work with long formula", {
+  training <- createDataFrame(sqlContext, iris)
+  training$LongLongLongLongLongName <- training$Sepal_Width
+  training$VeryLongLongLongLonLongName <- training$Sepal_Length
+  training$AnotherLongLongLongLongName <- training$Species
+  model <- glm(LongLongLongLongLongName ~ VeryLongLongLongLonLongName + 
AnotherLongLongLongLongName,
+   data = training)
+  vals <- collect(select(predict(model, training), "prediction"))
+  rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), 
iris)
+  expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+})
+
 test_that("predictions match with native glm", {
   training <- createDataFrame(sqlContext, iris)
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-11542] [SPARKR] fix glm with long fomular

2015-11-05 Thread davies
Repository: spark
Updated Branches:
  refs/heads/master b6974f8fe -> 244010624


[SPARK-11542] [SPARKR] fix glm with long fomular

Because deparse() will break the long string into multiple lines, the 
deserialization will fail

Author: Davies Liu 

Closes #9510 from davies/fix_glm.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/24401062
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/24401062
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/24401062

Branch: refs/heads/master
Commit: 244010624200eddea6dfd1b2c89f40be45212e96
Parents: b6974f8
Author: Davies Liu 
Authored: Thu Nov 5 16:34:10 2015 -0800
Committer: Davies Liu 
Committed: Thu Nov 5 16:34:10 2015 -0800

--
 R/pkg/R/mllib.R   |  3 ++-
 R/pkg/inst/tests/test_mllib.R | 12 
 2 files changed, 14 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/24401062/R/pkg/R/mllib.R
--
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 60bfadb..b0d73dd 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -48,8 +48,9 @@ setMethod("glm", signature(formula = "formula", family = 
"ANY", data = "DataFram
   function(formula, family = c("gaussian", "binomial"), data, lambda = 
0, alpha = 0,
 standardize = TRUE, solver = "auto") {
 family <- match.arg(family)
+formula <- paste(deparse(formula), collapse="")
 model <- callJStatic("org.apache.spark.ml.api.r.SparkRWrappers",
- "fitRModelFormula", deparse(formula), 
data@sdf, family, lambda,
+ "fitRModelFormula", formula, data@sdf, 
family, lambda,
  alpha, standardize, solver)
 return(new("PipelineModel", model = model))
   })

http://git-wip-us.apache.org/repos/asf/spark/blob/24401062/R/pkg/inst/tests/test_mllib.R
--
diff --git a/R/pkg/inst/tests/test_mllib.R b/R/pkg/inst/tests/test_mllib.R
index 032cfef..4761e28 100644
--- a/R/pkg/inst/tests/test_mllib.R
+++ b/R/pkg/inst/tests/test_mllib.R
@@ -33,6 +33,18 @@ test_that("glm and predict", {
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), 
"double")
 })
 
+test_that("glm should work with long formula", {
+  training <- createDataFrame(sqlContext, iris)
+  training$LongLongLongLongLongName <- training$Sepal_Width
+  training$VeryLongLongLongLonLongName <- training$Sepal_Length
+  training$AnotherLongLongLongLongName <- training$Species
+  model <- glm(LongLongLongLongLongName ~ VeryLongLongLongLonLongName + 
AnotherLongLongLongLongName,
+   data = training)
+  vals <- collect(select(predict(model, training), "prediction"))
+  rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), 
iris)
+  expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+})
+
 test_that("predictions match with native glm", {
   training <- createDataFrame(sqlContext, iris)
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org