Repository: spark Updated Branches: refs/heads/branch-2.0 a7e9e60df -> ead3bbdae
[SPARK-15684][SPARKR] Not mask startsWith and endsWith in R ## What changes were proposed in this pull request? In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work in SparkR. 1. Remove signature in generic.R 2. Add setMethod in column.R 3. Add unit tests ## How was this patch tested? Manually test it through SparkR shell for both column data and string data, which are added into the unit test file. Author: [email protected] <[email protected]> Closes #13476 from wangmiao1981/start. (cherry picked from commit 3ec4461c46e2959f4c640df0292cfcacfe0f727f) Signed-off-by: Shivaram Venkataraman <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ead3bbda Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ead3bbda Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ead3bbda Branch: refs/heads/branch-2.0 Commit: ead3bbdaef428ac22ee2cecbdc76140d7700871f Parents: a7e9e60 Author: [email protected] <[email protected]> Authored: Tue Jun 7 09:13:18 2016 -0700 Committer: Shivaram Venkataraman <[email protected]> Committed: Tue Jun 7 09:13:48 2016 -0700 ---------------------------------------------------------------------- R/pkg/R/column.R | 36 +++++++++++++++++++++++++- R/pkg/R/generics.R | 4 +-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 7 +++++ 3 files changed, 44 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ead3bbda/R/pkg/R/column.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index a3e0937..873e8b1 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -57,7 +57,7 @@ operators <- list( "^" = "pow" ) column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") -column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") +column_functions2 <- c("like", "rlike", "getField", "getItem", "contains") createOperator <- function(op) { setMethod(op, @@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"), column(jc) }) +#' startsWith +#' +#' Determines if entries of x start with string (entries of) prefix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname startsWith +#' @name startsWith +#' @family colum_func +#' +#' @param x vector of character string whose âstartsâ are considered +#' @param prefix character vector (often of length one) +setMethod("startsWith", signature(x = "Column"), + function(x, prefix) { + jc <- callJMethod(x@jc, "startsWith", as.vector(prefix)) + column(jc) + }) + +#' endsWith +#' +#' Determines if entries of x end with string (entries of) suffix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname endsWith +#' @name endsWith +#' @family colum_func +#' +#' @param x vector of character string whose âendsâ are considered +#' @param suffix character vector (often of length one) +setMethod("endsWith", signature(x = "Column"), + function(x, suffix) { + jc <- callJMethod(x@jc, "endsWith", as.vector(suffix)) + column(jc) + }) + #' between #' #' Test if the column is between the lower bound and upper bound, inclusive. http://git-wip-us.apache.org/repos/asf/spark/blob/ead3bbda/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ed76ad6..f0cde56 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") }) #' @rdname column #' @export -setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") }) +setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") }) #' @rdname column #' @export @@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) #' @rdname column #' @export -setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") }) +setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") }) #' @rdname column #' @export http://git-wip-us.apache.org/repos/asf/spark/blob/ead3bbda/R/pkg/inst/tests/testthat/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 94fa363..375cb6f 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1136,7 +1136,14 @@ test_that("string operators", { df <- read.json(jsonPath) expect_equal(count(where(df, like(df$name, "A%"))), 1) expect_equal(count(where(df, startsWith(df$name, "A"))), 1) + expect_true(first(select(df, startsWith(df$name, "M")))[[1]]) + expect_false(first(select(df, startsWith(df$name, "m")))[[1]]) + expect_true(first(select(df, endsWith(df$name, "el")))[[1]]) expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi") + if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { + expect_true(startsWith("Hello World", "Hello")) + expect_false(endsWith("Hello World", "a")) + } expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30") expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30") expect_equal(collect(select(df, concat_ws(":", df$name)))[[2, 1]], "Andy") --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
