[GitHub] [spark] dilipbiswal commented on a change in pull request #24087: [SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side and fix join docs for scala, python and r

2019-03-14 Thread GitBox
dilipbiswal commented on a change in pull request #24087: 
[SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side 
and fix join docs for scala, python and r
URL: https://github.com/apache/spark/pull/24087#discussion_r265849362
 
 

 ##
 File path: R/pkg/R/DataFrame.R
 ##
 @@ -2520,8 +2520,9 @@ setMethod("dropDuplicates",
 #' Column expression. If joinExpr is omitted, the default, inner join is 
attempted and an error is
 #' thrown if it would be a Cartesian Product. For Cartesian join, use 
crossJoin instead.
 #' @param joinType The type of join to perform, default 'inner'.
-#' Must be one of: 'inner', 'cross', 'outer', 'full', 'full_outer',
-#' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', or 'left_anti'.
+#' Must be one of: 'inner', 'cross', 'outer', 'full', 'fullouter', 
'full_outer',
+#' 'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer', 
'semi',
+# 'leftsemi', 'left_semi', 'anti', 'leftanti', 'left_anti'.
 
 Review comment:
   @felixcheung Thanks a lot. Will fix.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dilipbiswal commented on a change in pull request #24087: [SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side and fix join docs for scala, python and r

2019-03-14 Thread GitBox
dilipbiswal commented on a change in pull request #24087: 
[SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side 
and fix join docs for scala, python and r
URL: https://github.com/apache/spark/pull/24087#discussion_r265849398
 
 

 ##
 File path: R/pkg/R/DataFrame.R
 ##
 @@ -2553,14 +2554,14 @@ setMethod("join",
 "outer", "full", "fullouter", "full_outer",
 "left", "leftouter", "left_outer",
 "right", "rightouter", "right_outer",
-"left_semi", "leftsemi", "left_anti", "leftanti")) {
+"semi", "left_semi", "leftsemi", "anti", "left_anti", 
"leftanti")) {
   joinType <- gsub("_", "", joinType)
   sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, 
joinType)
 } else {
-  stop("joinType must be one of the following types: ",
-   "'inner', 'cross', 'outer', 'full', 'full_outer',",
-   "'left', 'left_outer', 'right', 'right_outer',",
-   "'left_semi', or 'left_anti'.")
+  stop(paste("joinType must be one of the following types: ",
 
 Review comment:
   @felixcheung Sure.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dilipbiswal commented on a change in pull request #24087: [SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side and fix join docs for scala, python and r

2019-03-14 Thread GitBox
dilipbiswal commented on a change in pull request #24087: 
[SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side 
and fix join docs for scala, python and r
URL: https://github.com/apache/spark/pull/24087#discussion_r265848916
 
 

 ##
 File path: R/pkg/tests/fulltests/test_sparkSQL.R
 ##
 @@ -2356,40 +2356,96 @@ test_that("join(), crossJoin() and merge() on a 
DataFrame", {
   expect_equal(names(joined2), c("age", "name", "name", "test"))
   expect_equal(count(joined2), 3)
 
-  joined3 <- join(df, df2, df$name == df2$name, "rightouter")
+  joined3 <- join(df, df2, df$name == df2$name, "right")
   expect_equal(names(joined3), c("age", "name", "name", "test"))
   expect_equal(count(joined3), 4)
   expect_true(is.na(collect(orderBy(joined3, joined3$age))$age[2]))
-
-  joined4 <- select(join(df, df2, df$name == df2$name, "outer"),
-alias(df$age + 5, "newAge"), df$name, df2$test)
-  expect_equal(names(joined4), c("newAge", "name", "test"))
+  
+  joined4 <- join(df, df2, df$name == df2$name, "right_outer")
+  expect_equal(names(joined4), c("age", "name", "name", "test"))
   expect_equal(count(joined4), 4)
-  expect_equal(collect(orderBy(joined4, joined4$name))$newAge[3], 24)
+  expect_true(is.na(collect(orderBy(joined4, joined4$age))$age[2]))
 
-  joined5 <- join(df, df2, df$name == df2$name, "leftouter")
+  joined5 <- join(df, df2, df$name == df2$name, "rightouter")
   expect_equal(names(joined5), c("age", "name", "name", "test"))
-  expect_equal(count(joined5), 3)
-  expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[1]))
-
-  joined6 <- join(df, df2, df$name == df2$name, "inner")
-  expect_equal(names(joined6), c("age", "name", "name", "test"))
-  expect_equal(count(joined6), 3)
+  expect_equal(count(joined5), 4)
+  expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[2]))
 
-  joined7 <- join(df, df2, df$name == df2$name, "leftsemi")
-  expect_equal(names(joined7), c("age", "name"))
-  expect_equal(count(joined7), 3)
 
-  joined8 <- join(df, df2, df$name == df2$name, "left_outer")
-  expect_equal(names(joined8), c("age", "name", "name", "test"))
-  expect_equal(count(joined8), 3)
-  expect_true(is.na(collect(orderBy(joined8, joined8$age))$age[1]))
-
-  joined9 <- join(df, df2, df$name == df2$name, "right_outer")
-  expect_equal(names(joined9), c("age", "name", "name", "test"))
+  joined6 <- select(join(df, df2, df$name == df2$name, "outer"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined6), c("newAge", "name", "test"))
+  expect_equal(count(joined6), 4)
+  expect_equal(collect(orderBy(joined6, joined6$name))$newAge[3], 24)
+  
+  joined7 <- select(join(df, df2, df$name == df2$name, "full"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined7), c("newAge", "name", "test"))
+  expect_equal(count(joined7), 4)
+  expect_equal(collect(orderBy(joined7, joined7$name))$newAge[3], 24)
+  
+  joined8 <- select(join(df, df2, df$name == df2$name, "fullouter"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined8), c("newAge", "name", "test"))
+  expect_equal(count(joined8), 4)
+  expect_equal(collect(orderBy(joined8, joined8$name))$newAge[3], 24)
+  
+  joined9 <- select(join(df, df2, df$name == df2$name, "full_outer"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+  expect_equal(names(joined9), c("newAge", "name", "test"))
   expect_equal(count(joined9), 4)
-  expect_true(is.na(collect(orderBy(joined9, joined9$age))$age[2]))
-
+  expect_equal(collect(orderBy(joined9, joined9$name))$newAge[3], 24)
+
+  joined10 <- join(df, df2, df$name == df2$name, "left")
+  expect_equal(names(joined10), c("age", "name", "name", "test"))
+  expect_equal(count(joined10), 3)
+  expect_true(is.na(collect(orderBy(joined10, joined10$age))$age[1]))
+  
+  joined11 <- join(df, df2, df$name == df2$name, "leftouter")
+  expect_equal(names(joined11), c("age", "name", "name", "test"))
+  expect_equal(count(joined11), 3)
+  expect_true(is.na(collect(orderBy(joined11, joined11$age))$age[1]))
+  
+  joined12 <- join(df, df2, df$name == df2$name, "left_outer")
+  expect_equal(names(joined12), c("age", "name", "name", "test"))
+  expect_equal(count(joined12), 3)
+  expect_true(is.na(collect(orderBy(joined12, joined12$age))$age[1]))
+
+  joined13 <- join(df, df2, df$name == df2$name, "inner")
+  expect_equal(names(joined13), c("age", "name", "name", "test"))
+  expect_equal(count(joined13), 3)
+
+  joined14 <- join(df, df2, df$name == df2$name, "semi")
+  expect_equal(names(joined14), c("age", "name"))
+  expect_equal(count(joined14), 3)
+  
+  joined14 <- join(df, df2, df$name == df2$name, "leftsemi")
+  expect_equal(names(joined14), c("age", "name"))
+  expect_equal(count(joined14), 3)
+  
+  joined15 <- join(df, df2, df$name == df2$name, "left_semi")
+  expect_equal(names(joined15), c("age", "name"))
+