dilipbiswal commented on a change in pull request #24087:
[SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side
and fix join docs for scala, python and r
URL: https://github.com/apache/spark/pull/24087#discussion_r265848916
##
File path: R/pkg/tests/fulltests/test_sparkSQL.R
##
@@ -2356,40 +2356,96 @@ test_that("join(), crossJoin() and merge() on a
DataFrame", {
expect_equal(names(joined2), c("age", "name", "name", "test"))
expect_equal(count(joined2), 3)
- joined3 <- join(df, df2, df$name == df2$name, "rightouter")
+ joined3 <- join(df, df2, df$name == df2$name, "right")
expect_equal(names(joined3), c("age", "name", "name", "test"))
expect_equal(count(joined3), 4)
expect_true(is.na(collect(orderBy(joined3, joined3$age))$age[2]))
-
- joined4 <- select(join(df, df2, df$name == df2$name, "outer"),
-alias(df$age + 5, "newAge"), df$name, df2$test)
- expect_equal(names(joined4), c("newAge", "name", "test"))
+
+ joined4 <- join(df, df2, df$name == df2$name, "right_outer")
+ expect_equal(names(joined4), c("age", "name", "name", "test"))
expect_equal(count(joined4), 4)
- expect_equal(collect(orderBy(joined4, joined4$name))$newAge[3], 24)
+ expect_true(is.na(collect(orderBy(joined4, joined4$age))$age[2]))
- joined5 <- join(df, df2, df$name == df2$name, "leftouter")
+ joined5 <- join(df, df2, df$name == df2$name, "rightouter")
expect_equal(names(joined5), c("age", "name", "name", "test"))
- expect_equal(count(joined5), 3)
- expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[1]))
-
- joined6 <- join(df, df2, df$name == df2$name, "inner")
- expect_equal(names(joined6), c("age", "name", "name", "test"))
- expect_equal(count(joined6), 3)
+ expect_equal(count(joined5), 4)
+ expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[2]))
- joined7 <- join(df, df2, df$name == df2$name, "leftsemi")
- expect_equal(names(joined7), c("age", "name"))
- expect_equal(count(joined7), 3)
- joined8 <- join(df, df2, df$name == df2$name, "left_outer")
- expect_equal(names(joined8), c("age", "name", "name", "test"))
- expect_equal(count(joined8), 3)
- expect_true(is.na(collect(orderBy(joined8, joined8$age))$age[1]))
-
- joined9 <- join(df, df2, df$name == df2$name, "right_outer")
- expect_equal(names(joined9), c("age", "name", "name", "test"))
+ joined6 <- select(join(df, df2, df$name == df2$name, "outer"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+ expect_equal(names(joined6), c("newAge", "name", "test"))
+ expect_equal(count(joined6), 4)
+ expect_equal(collect(orderBy(joined6, joined6$name))$newAge[3], 24)
+
+ joined7 <- select(join(df, df2, df$name == df2$name, "full"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+ expect_equal(names(joined7), c("newAge", "name", "test"))
+ expect_equal(count(joined7), 4)
+ expect_equal(collect(orderBy(joined7, joined7$name))$newAge[3], 24)
+
+ joined8 <- select(join(df, df2, df$name == df2$name, "fullouter"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+ expect_equal(names(joined8), c("newAge", "name", "test"))
+ expect_equal(count(joined8), 4)
+ expect_equal(collect(orderBy(joined8, joined8$name))$newAge[3], 24)
+
+ joined9 <- select(join(df, df2, df$name == df2$name, "full_outer"),
+alias(df$age + 5, "newAge"), df$name, df2$test)
+ expect_equal(names(joined9), c("newAge", "name", "test"))
expect_equal(count(joined9), 4)
- expect_true(is.na(collect(orderBy(joined9, joined9$age))$age[2]))
-
+ expect_equal(collect(orderBy(joined9, joined9$name))$newAge[3], 24)
+
+ joined10 <- join(df, df2, df$name == df2$name, "left")
+ expect_equal(names(joined10), c("age", "name", "name", "test"))
+ expect_equal(count(joined10), 3)
+ expect_true(is.na(collect(orderBy(joined10, joined10$age))$age[1]))
+
+ joined11 <- join(df, df2, df$name == df2$name, "leftouter")
+ expect_equal(names(joined11), c("age", "name", "name", "test"))
+ expect_equal(count(joined11), 3)
+ expect_true(is.na(collect(orderBy(joined11, joined11$age))$age[1]))
+
+ joined12 <- join(df, df2, df$name == df2$name, "left_outer")
+ expect_equal(names(joined12), c("age", "name", "name", "test"))
+ expect_equal(count(joined12), 3)
+ expect_true(is.na(collect(orderBy(joined12, joined12$age))$age[1]))
+
+ joined13 <- join(df, df2, df$name == df2$name, "inner")
+ expect_equal(names(joined13), c("age", "name", "name", "test"))
+ expect_equal(count(joined13), 3)
+
+ joined14 <- join(df, df2, df$name == df2$name, "semi")
+ expect_equal(names(joined14), c("age", "name"))
+ expect_equal(count(joined14), 3)
+
+ joined14 <- join(df, df2, df$name == df2$name, "leftsemi")
+ expect_equal(names(joined14), c("age", "name"))
+ expect_equal(count(joined14), 3)
+
+ joined15 <- join(df, df2, df$name == df2$name, "left_semi")
+ expect_equal(names(joined15), c("age", "name"))
+