This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cbfa0513421 [SPARK-38974][SQL] Filter registered functions with a
given database name in list functions
cbfa0513421 is described below
commit cbfa0513421d5e9e9b7410d7f86b8e25df4ae548
Author: allisonwang-db <[email protected]>
AuthorDate: Fri Apr 22 11:24:34 2022 +0800
[SPARK-38974][SQL] Filter registered functions with a given database name
in list functions
### What changes were proposed in this pull request?
This PR fixes a bug in list functions to filter out registered functions
that do not belong to the specified database.
### Why are the changes needed?
To fix a bug for `SHOW FUNCTIONS IN [db]`. Listed functions should only
include all temporary functions and persistent functions in the specified
database, instead of all registered functions in the function registry.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Unit test
Closes #36291 from allisonwang-db/spark-38974-list-functions.
Authored-by: allisonwang-db <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/catalog/SessionCatalog.scala | 30 +++++++++++++---------
.../sql/catalyst/catalog/SessionCatalogSuite.scala | 19 ++++++++++++++
2 files changed, 37 insertions(+), 12 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 322302e8a6f..6b7f8a207d6 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1753,6 +1753,23 @@ class SessionCatalog(
}
}
+ /**
+ * List all registered functions in a database with the given pattern.
+ */
+ private def listRegisteredFunctions(db: String, pattern: String):
Seq[FunctionIdentifier] = {
+ val functions = (functionRegistry.listFunction() ++
tableFunctionRegistry.listFunction())
+ .filter(_.database.forall(_ == db))
+ StringUtils.filterPattern(functions.map(_.unquotedString), pattern).map {
f =>
+ // In functionRegistry, function names are stored as an unquoted format.
+ Try(parser.parseFunctionIdentifier(f)) match {
+ case Success(e) => e
+ case Failure(_) =>
+ // The names of some built-in functions are not parsable by our
parser, e.g., %
+ FunctionIdentifier(f)
+ }
+ }
+ }
+
/**
* List all functions in the specified database, including temporary
functions. This
* returns the function identifier and the scope in which it was defined
(system or user
@@ -1770,18 +1787,7 @@ class SessionCatalog(
requireDbExists(dbName)
val dbFunctions = externalCatalog.listFunctions(dbName, pattern).map { f =>
FunctionIdentifier(f, Some(dbName)) }
- val loadedFunctions = StringUtils
- .filterPattern(
- (functionRegistry.listFunction() ++
tableFunctionRegistry.listFunction())
- .map(_.unquotedString), pattern).map { f =>
- // In functionRegistry, function names are stored as an unquoted
format.
- Try(parser.parseFunctionIdentifier(f)) match {
- case Success(e) => e
- case Failure(_) =>
- // The names of some built-in functions are not parsable by our
parser, e.g., %
- FunctionIdentifier(f)
- }
- }
+ val loadedFunctions = listRegisteredFunctions(db, pattern)
val functions = dbFunctions ++ loadedFunctions
// The session catalog caches some persistent functions in the
FunctionRegistry
// so there can be duplicates.
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 8769a6f189f..2d9a17716d4 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1636,6 +1636,25 @@ abstract class SessionCatalogSuite extends AnalysisTest
with Eventually {
}
}
+ test("SPARK-38974: list functions in database") {
+ withEmptyCatalog { catalog =>
+ val tmpFunc = newFunc("func1", None)
+ val func1 = newFunc("func1", Some("default"))
+ val func2 = newFunc("func2", Some("db1"))
+ val builder = (e: Seq[Expression]) => e.head
+ catalog.createDatabase(newDb("db1"), ignoreIfExists = false)
+ catalog.registerFunction(tmpFunc, overrideIfExists = false,
functionBuilder = Some(builder))
+ catalog.createFunction(func1, ignoreIfExists = false)
+ catalog.createFunction(func2, ignoreIfExists = false)
+ // Load func2 into the function registry.
+ catalog.registerFunction(func2, overrideIfExists = false,
functionBuilder = Some(builder))
+ // Should not include func2.
+ assert(catalog.listFunctions("default", "*").map(_._1).toSet ==
+ Set(FunctionIdentifier("func1"), FunctionIdentifier("func1",
Some("default")))
+ )
+ }
+ }
+
test("copy SessionCatalog state - temp views") {
withEmptyCatalog { original =>
val tempTable1 = Range(1, 10, 1, 10)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]