This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 47f0303944a [SPARK-39880][SQL] V2 SHOW FUNCTIONS command should print qualified function name like v1 47f0303944a is described below commit 47f0303944abb11d3018186bc125113772eff8ef Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Wed Jul 27 19:00:22 2022 +0500 [SPARK-39880][SQL] V2 SHOW FUNCTIONS command should print qualified function name like v1 ### What changes were proposed in this pull request? This PR fixes a mismatch between v1 and v2 SHOW FUNCTIONS command: v1 prints qualified function names such as `spark_catalog.db1.f1`, but v2 only prints the function name. V2 command should follow v1 behavior which makes more sense. This PR also updates the SHOW FUNCTIONS test suites to match the new behavior. ### Why are the changes needed? keep v1 and v2 commands consistent. ### Does this PR introduce _any_ user-facing change? No, v2 SHOW FUNCTIONS is new in Spark 3.4 (not released) ### How was this patch tested? updated tests Closes #37301 from cloud-fan/show-functions. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../datasources/v2/ShowFunctionsExec.scala | 30 +++++------ .../execution/command/ShowFunctionsSuiteBase.scala | 58 ++++++++-------------- .../execution/command/v1/ShowFunctionsSuite.scala | 32 +++++------- .../execution/command/v2/ShowFunctionsSuite.scala | 1 - .../execution/command/ShowFunctionsSuite.scala | 15 ++---- 5 files changed, 52 insertions(+), 84 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowFunctionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowFunctionsExec.scala index 5ca0b01d42f..cc95ee53531 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowFunctionsExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowFunctionsExec.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.StringUtils +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.FunctionCatalog import org.apache.spark.sql.execution.LeafExecNode @@ -37,28 +38,27 @@ case class ShowFunctionsExec( systemScope: Boolean, pattern: Option[String]) extends V2CommandExec with LeafExecNode { + private def applyPattern(names: Seq[String]): Seq[String] = { + StringUtils.filterPattern(names.toSeq, pattern.getOrElse("*")) + } + override protected def run(): Seq[InternalRow] = { val rows = new ArrayBuffer[InternalRow]() val systemFunctions = if (systemScope) { - // All built-in functions - (FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet).map(_.unquotedString) ++ - // Hard code "<>", "!=", "between", "case", and "||" - // for now as there is no corresponding functions. - // "<>", "!=", "between", "case", and "||" is system functions, - // only show when systemScope=true - FunctionRegistry.builtinOperators.keys.toSeq + // All built-in functions, and operators such as "<>", "||" + val builtinFunctions = FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet + applyPattern(builtinFunctions.map(_.unquotedString).toSeq ++ + FunctionRegistry.builtinOperators.keys.toSeq) } else Seq.empty val userFunctions = if (userScope) { // List all temporary functions in the session catalog - session.sessionState.catalog.listTemporaryFunctions().map(_.unquotedString) ++ - // List all functions registered in the given name space of the catalog - catalog.listFunctions(namespace.toArray).map(_.name()).toSeq + applyPattern(session.sessionState.catalog.listTemporaryFunctions().map(_.unquotedString)) ++ + // List all functions registered in the given namespace of the catalog + applyPattern(catalog.listFunctions(namespace.toArray).map(_.name())).map { funcName => + (catalog.name() +: namespace :+ funcName).quoted + } } else Seq.empty - val allFunctions = StringUtils.filterPattern( - userFunctions ++ systemFunctions, - pattern.getOrElse("*")).distinct.sorted - - allFunctions.foreach { fn => + (userFunctions ++ systemFunctions).distinct.sorted.foreach { fn => rows += toCatalystRow(fn) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsSuiteBase.scala index 415bc813792..0f23cc699be 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsSuiteBase.scala @@ -17,9 +17,8 @@ package org.apache.spark.sql.execution.command -import java.util.Locale - import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.util.Utils /** @@ -28,21 +27,19 @@ import org.apache.spark.util.Utils * specific test suites: * * - V2 catalog tests: `org.apache.spark.sql.execution.command.v2.ShowFunctionsSuite` - * - V1 catalog tests: - * `org.apache.spark.sql.execution.command.v1.ShowFunctionsSuiteBase` - * - Temporary functions: - * `org.apache.spark.sql.execution.command.v1.ShowTempFunctionsSuite` - * - Permanent functions: - * `org.apache.spark.sql.hive.execution.command.ShowFunctionsSuite` + * - V1 catalog tests: `org.apache.spark.sql.execution.command.v1.ShowFunctionsSuiteBase` + * - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowFunctionsSuite` + * - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowFunctionsSuite` */ trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils { override val command = "SHOW FUNCTIONS" protected def funCatalog: String = catalog - protected def createFunction(name: String): Unit = {} - protected def dropFunction(name: String): Unit = {} - protected def showFun(ns: String, name: String): String = s"$ns.$name".toLowerCase(Locale.ROOT) - protected def isTempFunctions(): Boolean = false + protected def createFunction(name: String): Unit + protected def dropFunction(name: String): Unit + protected def qualifiedFunName(ns: String, name: String): String = { + Seq(funCatalog, ns, name).quoted + } /** * Drops function `funName` after calling `f`. @@ -53,9 +50,9 @@ trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils { } } - protected def withNamespaceAndFuns(ns: String, funNames: Seq[String], cat: String = funCatalog) + protected def withNamespaceAndFuns(ns: String, funNames: Seq[String]) (f: (String, Seq[String]) => Unit): Unit = { - val nsCat = s"$cat.$ns" + val nsCat = s"$funCatalog.$ns" withNamespace(nsCat) { sql(s"CREATE NAMESPACE $nsCat") val nsCatFns = funNames.map(funName => s"$nsCat.$funName") @@ -65,9 +62,9 @@ trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils { } } - protected def withNamespaceAndFun(ns: String, funName: String, cat: String = funCatalog) + protected def withNamespaceAndFun(ns: String, funName: String) (f: (String, String) => Unit): Unit = { - withNamespaceAndFuns(ns, Seq(funName), cat) { case (ns, Seq(name)) => + withNamespaceAndFuns(ns, Seq(funName)) { case (ns, Seq(name)) => f(ns, name) } } @@ -87,7 +84,7 @@ trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils { createFunction(f) QueryTest.checkAnswer( sql(s"SHOW USER FUNCTIONS IN $ns"), - Row(showFun("ns", "logiii")) :: Nil) + Row(qualifiedFunName("ns", "logiii")) :: Nil) } } @@ -99,7 +96,7 @@ trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils { spark.udf.register(f1, (arg1: Int, arg2: String) => arg2 + arg1) QueryTest.checkAnswer( sql(s"SHOW USER FUNCTIONS IN $ns"), - Row(showFun("ns", "poggi")) :: Row(f1) :: Nil) + Row(qualifiedFunName("ns", "poggi")) :: Row(f1) :: Nil) QueryTest.checkAnswer( sql(s"SHOW ALL FUNCTIONS IN $ns").filter(s"function='$f1'"), Row(f1) :: Nil) @@ -130,7 +127,7 @@ trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils { createFunction(f) QueryTest.checkAnswer( sql(s"SHOW ALL FUNCTIONS IN $ns"), - allFuns :+ Row(showFun("ns", "current_datei"))) + allFuns :+ Row(qualifiedFunName("ns", "current_datei"))) } } @@ -141,48 +138,35 @@ trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils { funs.foreach(createFunction) QueryTest.checkAnswer( sql(s"SHOW USER FUNCTIONS IN $ns LIKE '*'"), - testFuns.map(testFun => Row(showFun("ns", testFun)))) + testFuns.map(testFun => Row(qualifiedFunName("ns", testFun)))) QueryTest.checkAnswer( sql(s"SHOW USER FUNCTIONS IN $ns LIKE '*rc*'"), - Seq("crc32i", "crc16j").map(testFun => Row(showFun("ns", testFun)))) + Seq("crc32i", "crc16j").map(testFun => Row(qualifiedFunName("ns", testFun)))) } } test("show a function by its string name") { - assume(!isTempFunctions()) val testFuns = Seq("crc32i", "crc16j") withNamespaceAndFuns("ns", testFuns) { (ns, funs) => assert(sql(s"SHOW USER FUNCTIONS IN $ns").isEmpty) funs.foreach(createFunction) QueryTest.checkAnswer( sql(s"SHOW USER FUNCTIONS IN $ns 'crc32i'"), - Row(showFun("ns", "crc32i")) :: Nil) + Row(qualifiedFunName("ns", "crc32i")) :: Nil) } } test("show functions matched to the '|' pattern") { - assume(!isTempFunctions()) val testFuns = Seq("crc32i", "crc16j", "date1900", "Date1") withNamespaceAndFuns("ns", testFuns) { (ns, funs) => assert(sql(s"SHOW USER FUNCTIONS IN $ns").isEmpty) funs.foreach(createFunction) QueryTest.checkAnswer( sql(s"SHOW USER FUNCTIONS IN $ns LIKE 'crc32i|date1900'"), - Seq("crc32i", "date1900").map(testFun => Row(showFun("ns", testFun)))) + Seq("crc32i", "date1900").map(testFun => Row(qualifiedFunName("ns", testFun)))) QueryTest.checkAnswer( sql(s"SHOW USER FUNCTIONS IN $ns LIKE 'crc32i|date*'"), - Seq("crc32i", "date1900", "Date1").map(testFun => Row(showFun("ns", testFun)))) - } - } - - test("show a function by its id") { - assume(!isTempFunctions()) - withNamespaceAndFun("ns", "crc32i") { (ns, fun) => - assert(sql(s"SHOW USER FUNCTIONS IN $ns").isEmpty) - createFunction(fun) - QueryTest.checkAnswer( - sql(s"SHOW USER FUNCTIONS $fun"), - Row(showFun("ns", "crc32i")) :: Nil) + Seq("crc32i", "date1900", "Date1").map(testFun => Row(qualifiedFunName("ns", testFun)))) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowFunctionsSuite.scala index f7ea4e75875..e46e2ce0d14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowFunctionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.command.v1 -import java.util.Locale +import test.org.apache.spark.sql.MyDoubleSum import org.apache.spark.sql.execution.command @@ -26,30 +26,22 @@ import org.apache.spark.sql.execution.command * table catalogs. The tests that cannot run for all V1 catalogs are located in more * specific test suites: * - * - Temporary functions of V1 catalog: - * `org.apache.spark.sql.execution.command.v1.ShowTempFunctionsSuite` - * - Permanent functions of V1 catalog: - * `org.apache.spark.sql.hive.execution.command.ShowFunctionsSuite` + * - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowFunctionsSuite` + * - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowFunctionsSuite` */ trait ShowFunctionsSuiteBase extends command.ShowFunctionsSuiteBase - with command.TestsV1AndV2Commands - -/** - * The class contains tests for the `SHOW FUNCTIONS` command to check temporary functions. - */ -class ShowTempFunctionsSuite extends ShowFunctionsSuiteBase with CommandSuiteBase { - override def commandVersion: String = super[ShowFunctionsSuiteBase].commandVersion - override protected def isTempFunctions(): Boolean = true - + with command.TestsV1AndV2Commands { override protected def createFunction(name: String): Unit = { - spark.udf.register(name, (arg1: Int, arg2: String) => arg2 + arg1) + sql(s"CREATE FUNCTION $name AS '${classOf[MyDoubleSum].getName}'") } - override protected def dropFunction(name: String): Unit = { - spark.sessionState.catalog.dropTempFunction(name, false) + sql(s"DROP FUNCTION IF EXISTS $name") } +} - override protected def showFun(ns: String, name: String): String = { - s"$catalog.$ns.$name".toLowerCase(Locale.ROOT) - } +/** + * The class contains tests for the `SHOW FUNCTIONS` command to check V1 In-Memory table catalog. + */ +class ShowFunctionsSuite extends ShowFunctionsSuiteBase with CommandSuiteBase { + override def commandVersion: String = super[ShowFunctionsSuiteBase].commandVersion } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowFunctionsSuite.scala index b3f791abdad..f5630e52559 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowFunctionsSuite.scala @@ -29,7 +29,6 @@ import org.apache.spark.sql.execution.command */ class ShowFunctionsSuite extends command.ShowFunctionsSuiteBase with CommandSuiteBase { override protected def funCatalog: String = s"fun_$catalog" - override protected def showFun(ns: String, name: String): String = name private def getFunCatalog(): InMemoryCatalog = { spark.sessionState.catalogManager.catalog(funCatalog).asInstanceOf[InMemoryCatalog] diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowFunctionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowFunctionsSuite.scala index d471669f25c..95d7af00de3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowFunctionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowFunctionsSuite.scala @@ -19,22 +19,15 @@ package org.apache.spark.sql.hive.execution.command import java.util.Locale -import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.execution.command.v1 -import org.apache.spark.sql.hive.execution.UDFToListInt /** - * The class contains tests for the `SHOW FUNCTIONS` command to check permanent functions. + * The class contains tests for the `SHOW FUNCTIONS` command to check V1 Hive external catalog. */ class ShowFunctionsSuite extends v1.ShowFunctionsSuiteBase with CommandSuiteBase { override def commandVersion: String = super[ShowFunctionsSuiteBase].commandVersion - override protected def showFun(ns: String, name: String): String = - s"$SESSION_CATALOG_NAME.$ns.$name".toLowerCase(Locale.ROOT) - - override protected def createFunction(name: String): Unit = { - sql(s"CREATE FUNCTION $name AS '${classOf[UDFToListInt].getName}'") - } - override protected def dropFunction(name: String): Unit = { - sql(s"DROP FUNCTION IF EXISTS $name") + override def qualifiedFunName(ns: String, name: String): String = { + // Hive Metastore lower-cases all identifiers. + super.qualifiedFunName(ns, name).toLowerCase(Locale.ROOT) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org