Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/20146#discussion_r160066433
--- Diff: R/pkg/tests/fulltests/test_mllib_regression.R ---
@@ -174,17 +182,17 @@ test_that("spark.glm summary", {
expect_equal(stats$aic, rStats$aic)
# Test spark.glm works with offset
- training <- suppressWarnings(createDataFrame(iris))
+ training <- suppressWarnings(createDataFrame(dataset))
stats <- summary(spark.glm(training, Sepal_Width ~ Sepal_Length +
Species,
family = poisson(), offsetCol =
"Petal_Length"))
rStats <- suppressWarnings(summary(glm(Sepal.Width ~ Sepal.Length +
Species,
- data = iris, family = poisson(), offset =
iris$Petal.Length)))
+ data = dataset, family = poisson(), offset =
dataset$Petal.Length)))
expect_true(all(abs(rStats$coefficients - stats$coefficients) < 1e-3))
# Test summary works on base GLM models
- baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data =
iris)
+ baseModel <- stats::glm(Sepal.Width ~ Sepal.Length + Species, data =
dataset)
baseSummary <- summary(baseModel)
- expect_true(abs(baseSummary$deviance - 12.19313) < 1e-4)
+ expect_true(abs(baseSummary$deviance - 11.84013) < 1e-4)
--- End diff --
R glm:
```R
> baseSummary <- summary(stats::glm(Sepal.Width ~ Sepal.Length + Species,
data = dataset))
> baseSummary$deviance
[1] 11.84013
```
Spark glm:
```R
> baseSummary <- summary(spark.glm(training, Sepal_Width ~ Sepal_Length +
Species))
> baseSummary$deviance
[1] 11.84013
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]