HyukjinKwon commented on a change in pull request #24087: [SPARK-27096][SQL][FOLLOWUP] Do the correct validation of join types in R side and fix join docs for scala, python and r URL: https://github.com/apache/spark/pull/24087#discussion_r265825938
########## File path: R/pkg/tests/fulltests/test_sparkSQL.R ########## @@ -2356,40 +2356,96 @@ test_that("join(), crossJoin() and merge() on a DataFrame", { expect_equal(names(joined2), c("age", "name", "name", "test")) expect_equal(count(joined2), 3) - joined3 <- join(df, df2, df$name == df2$name, "rightouter") + joined3 <- join(df, df2, df$name == df2$name, "right") expect_equal(names(joined3), c("age", "name", "name", "test")) expect_equal(count(joined3), 4) expect_true(is.na(collect(orderBy(joined3, joined3$age))$age[2])) - - joined4 <- select(join(df, df2, df$name == df2$name, "outer"), - alias(df$age + 5, "newAge"), df$name, df2$test) - expect_equal(names(joined4), c("newAge", "name", "test")) + + joined4 <- join(df, df2, df$name == df2$name, "right_outer") + expect_equal(names(joined4), c("age", "name", "name", "test")) expect_equal(count(joined4), 4) - expect_equal(collect(orderBy(joined4, joined4$name))$newAge[3], 24) + expect_true(is.na(collect(orderBy(joined4, joined4$age))$age[2])) - joined5 <- join(df, df2, df$name == df2$name, "leftouter") + joined5 <- join(df, df2, df$name == df2$name, "rightouter") expect_equal(names(joined5), c("age", "name", "name", "test")) - expect_equal(count(joined5), 3) - expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[1])) - - joined6 <- join(df, df2, df$name == df2$name, "inner") - expect_equal(names(joined6), c("age", "name", "name", "test")) - expect_equal(count(joined6), 3) + expect_equal(count(joined5), 4) + expect_true(is.na(collect(orderBy(joined5, joined5$age))$age[2])) - joined7 <- join(df, df2, df$name == df2$name, "leftsemi") - expect_equal(names(joined7), c("age", "name")) - expect_equal(count(joined7), 3) - joined8 <- join(df, df2, df$name == df2$name, "left_outer") - expect_equal(names(joined8), c("age", "name", "name", "test")) - expect_equal(count(joined8), 3) - expect_true(is.na(collect(orderBy(joined8, joined8$age))$age[1])) - - joined9 <- join(df, df2, df$name == df2$name, "right_outer") - expect_equal(names(joined9), c("age", "name", "name", "test")) + joined6 <- select(join(df, df2, df$name == df2$name, "outer"), + alias(df$age + 5, "newAge"), df$name, df2$test) + expect_equal(names(joined6), c("newAge", "name", "test")) + expect_equal(count(joined6), 4) + expect_equal(collect(orderBy(joined6, joined6$name))$newAge[3], 24) + + joined7 <- select(join(df, df2, df$name == df2$name, "full"), + alias(df$age + 5, "newAge"), df$name, df2$test) + expect_equal(names(joined7), c("newAge", "name", "test")) + expect_equal(count(joined7), 4) + expect_equal(collect(orderBy(joined7, joined7$name))$newAge[3], 24) + + joined8 <- select(join(df, df2, df$name == df2$name, "fullouter"), + alias(df$age + 5, "newAge"), df$name, df2$test) + expect_equal(names(joined8), c("newAge", "name", "test")) + expect_equal(count(joined8), 4) + expect_equal(collect(orderBy(joined8, joined8$name))$newAge[3], 24) + + joined9 <- select(join(df, df2, df$name == df2$name, "full_outer"), + alias(df$age + 5, "newAge"), df$name, df2$test) + expect_equal(names(joined9), c("newAge", "name", "test")) expect_equal(count(joined9), 4) - expect_true(is.na(collect(orderBy(joined9, joined9$age))$age[2])) - + expect_equal(collect(orderBy(joined9, joined9$name))$newAge[3], 24) + + joined10 <- join(df, df2, df$name == df2$name, "left") + expect_equal(names(joined10), c("age", "name", "name", "test")) + expect_equal(count(joined10), 3) + expect_true(is.na(collect(orderBy(joined10, joined10$age))$age[1])) + + joined11 <- join(df, df2, df$name == df2$name, "leftouter") + expect_equal(names(joined11), c("age", "name", "name", "test")) + expect_equal(count(joined11), 3) + expect_true(is.na(collect(orderBy(joined11, joined11$age))$age[1])) + + joined12 <- join(df, df2, df$name == df2$name, "left_outer") + expect_equal(names(joined12), c("age", "name", "name", "test")) + expect_equal(count(joined12), 3) + expect_true(is.na(collect(orderBy(joined12, joined12$age))$age[1])) + + joined13 <- join(df, df2, df$name == df2$name, "inner") + expect_equal(names(joined13), c("age", "name", "name", "test")) + expect_equal(count(joined13), 3) + + joined14 <- join(df, df2, df$name == df2$name, "semi") + expect_equal(names(joined14), c("age", "name")) + expect_equal(count(joined14), 3) + + joined14 <- join(df, df2, df$name == df2$name, "leftsemi") + expect_equal(names(joined14), c("age", "name")) + expect_equal(count(joined14), 3) + + joined15 <- join(df, df2, df$name == df2$name, "left_semi") + expect_equal(names(joined15), c("age", "name")) + expect_equal(count(joined15), 3) + + joined16 <- join(df2, df, df2$name == df$name, "anti") + expect_equal(names(joined16), c("name", "test")) + expect_equal(count(joined16), 1) + + joined17 <- join(df2, df, df2$name == df$name, "leftanti") + expect_equal(names(joined17), c("name", "test")) + expect_equal(count(joined17), 1) + + joined18 <- join(df2, df, df2$name == df$name, "left_anti") + expect_equal(names(joined18), c("name", "test")) + expect_equal(count(joined18), 1) + + error_msg <- paste("joinType must be one of the following types: ", + "'inner', 'cross', 'outer', 'full', 'fullouter', 'full_outer',", + "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer',", + "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti' or 'left_anti'.") + retError <- tryCatch(join(df2, df, df2$name == df$name, "invalid"), error = function(e) e) Review comment: You can do `expect_error` but not a big deal. Ignore this comment if you receive no more comments to fix. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org