This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new aa51da42908 [SPARK-39723][R] Implement functionExists/getFunc in
SparkR for 3L namespace
aa51da42908 is described below
commit aa51da4290814bf3ccdc52000b8d90d6db575d3f
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue Jul 12 11:05:25 2022 +0800
[SPARK-39723][R] Implement functionExists/getFunc in SparkR for 3L namespace
### What changes were proposed in this pull request?
1, implement functionExists/getFunc in SparkR
2, update doc of ListFunctions
### Why are the changes needed?
for 3L namespace
### Does this PR introduce _any_ user-facing change?
yes, new API functionExists
### How was this patch tested?
added UT
Closes #37135 from zhengruifeng/r_3L_func.
Lead-authored-by: Ruifeng Zheng <[email protected]>
Co-authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
R/pkg/NAMESPACE | 2 +
R/pkg/R/catalog.R | 75 ++++++++++++++++++++++++++++++++++-
R/pkg/pkgdown/_pkgdown_template.yml | 2 +
R/pkg/tests/fulltests/test_sparkSQL.R | 34 +++++++++++++++-
4 files changed, 111 insertions(+), 2 deletions(-)
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 3937791421a..e078ba0c2cd 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -479,7 +479,9 @@ export("as.DataFrame",
"databaseExists",
"dropTempTable",
"dropTempView",
+ "functionExists",
"getDatabase",
+ "getFunc",
"getTable",
"listCatalogs",
"listColumns",
diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R
index 680415ea6cd..942af4de3c0 100644
--- a/R/pkg/R/catalog.R
+++ b/R/pkg/R/catalog.R
@@ -583,13 +583,14 @@ listColumns <- function(tableName, databaseName = NULL) {
#' This includes all temporary functions.
#'
#' @param databaseName (optional) name of the database
+#' The database name can be qualified with catalog name
since 3.4.0.
#' @return a SparkDataFrame of the list of function descriptions.
#' @rdname listFunctions
#' @name listFunctions
#' @examples
#' \dontrun{
#' sparkR.session()
-#' listFunctions()
+#' listFunctions(spark_catalog.default)
#' }
#' @note since 2.2.0
listFunctions <- function(databaseName = NULL) {
@@ -606,6 +607,78 @@ listFunctions <- function(databaseName = NULL) {
dataFrame(callJMethod(jdst, "toDF"))
}
+#' Checks if the function with the specified name exists.
+#'
+#' Checks if the function with the specified name exists.
+#'
+#' @param functionName name of the function, allowed to be qualified with
catalog name
+#' @rdname functionExists
+#' @name functionExists
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' functionExists("spark_catalog.default.myFunc")
+#' }
+#' @note since 3.4.0
+functionExists <- function(functionName) {
+ sparkSession <- getSparkSession()
+ if (class(functionName) != "character") {
+ stop("functionName must be a string.")
+ }
+ catalog <- callJMethod(sparkSession, "catalog")
+ callJMethod(catalog, "functionExists", functionName)
+}
+
+#' Get the function with the specified name
+#'
+#' Get the function with the specified name
+#'
+#' @param functionName name of the function, allowed to be qualified with
catalog name
+#' @return A named list.
+#' @rdname getFunc
+#' @name getFunc
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' func <- getFunc("spark_catalog.default.myFunc")
+#' }
+#' @note since 3.4.0. Use different name with the scala/python side, to avoid
the
+#' signature conflict with built-in "getFunction".
+getFunc <- function(functionName) {
+ sparkSession <- getSparkSession()
+ if (class(functionName) != "character") {
+ stop("functionName must be a string.")
+ }
+ catalog <- callJMethod(sparkSession, "catalog")
+ jfunc <- handledCallJMethod(catalog, "getFunction", functionName)
+
+ ret <- list(name = callJMethod(jfunc, "name"))
+ jcata <- callJMethod(jfunc, "catalog")
+ if (is.null(jcata)) {
+ ret$catalog <- NA
+ } else {
+ ret$catalog <- jcata
+ }
+
+ jns <- callJMethod(jfunc, "namespace")
+ if (is.null(jns)) {
+ ret$namespace <- NA
+ } else {
+ ret$namespace <- jns
+ }
+
+ jdesc <- callJMethod(jfunc, "description")
+ if (is.null(jdesc)) {
+ ret$description <- NA
+ } else {
+ ret$description <- jdesc
+ }
+
+ ret$className <- callJMethod(jfunc, "className")
+ ret$isTemporary <- callJMethod(jfunc, "isTemporary")
+ ret
+}
+
#' Recovers all the partitions in the directory of a table and update the
catalog
#'
#' Recovers all the partitions in the directory of a table and update the
catalog. The name should
diff --git a/R/pkg/pkgdown/_pkgdown_template.yml
b/R/pkg/pkgdown/_pkgdown_template.yml
index df93f200ab2..1da1d62ee9c 100644
--- a/R/pkg/pkgdown/_pkgdown_template.yml
+++ b/R/pkg/pkgdown/_pkgdown_template.yml
@@ -266,7 +266,9 @@ reference:
- databaseExists
- dropTempTable
- dropTempView
+ - functionExists
- getDatabase
+ - getFunc
- getTable
- listCatalogs
- listColumns
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 85eca6b510b..fc54d89a1a4 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -4074,7 +4074,7 @@ test_that("catalog APIs, currentDatabase,
setCurrentDatabase, listDatabases, get
expect_equal(db$catalog, "spark_catalog")
})
-test_that("catalog APIs, listTables, listColumns, listFunctions, getTable", {
+test_that("catalog APIs, listTables, getTable, listColumns, listFunctions,
functionExists", {
tb <- listTables()
count <- count(tables())
expect_equal(nrow(listTables("default")), count)
@@ -4111,6 +4111,38 @@ test_that("catalog APIs, listTables, listColumns,
listFunctions, getTable", {
paste("Error in listFunctions : no such database - Database",
"'zxwtyswklpf_db' not found"))
+ expect_true(functionExists("abs"))
+ expect_false(functionExists("aabbss"))
+
+ func0 <- getFunc("abs")
+ expect_equal(func0$name, "abs")
+ expect_equal(func0$className,
"org.apache.spark.sql.catalyst.expressions.Abs")
+ expect_true(func0$isTemporary)
+
+ sql("CREATE FUNCTION func1 AS
'org.apache.spark.sql.catalyst.expressions.Add'")
+
+ func1 <- getFunc("spark_catalog.default.func1")
+ expect_equal(func1$name, "func1")
+ expect_equal(func1$catalog, "spark_catalog")
+ expect_equal(length(func1$namespace), 1)
+ expect_equal(func1$namespace[[1]], "default")
+ expect_equal(func1$className,
"org.apache.spark.sql.catalyst.expressions.Add")
+ expect_false(func1$isTemporary)
+
+ expect_true(functionExists("func1"))
+ expect_true(functionExists("default.func1"))
+ expect_true(functionExists("spark_catalog.default.func1"))
+
+ expect_false(functionExists("func2"))
+ expect_false(functionExists("default.func2"))
+ expect_false(functionExists("spark_catalog.default.func2"))
+
+ sql("DROP FUNCTION func1")
+
+ expect_false(functionExists("func1"))
+ expect_false(functionExists("default.func1"))
+ expect_false(functionExists("spark_catalog.default.func1"))
+
# recoverPartitions does not work with temporary view
expect_error(recoverPartitions("cars"),
paste("Error in recoverPartitions : analysis error - cars is a
temp view.",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]