srielau commented on code in PR #55915:
URL: https://github.com/apache/spark/pull/55915#discussion_r3259684764
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala:
##########
@@ -101,38 +105,115 @@ case class DescribeFunctionCommand(
toAttributes(schema)
}
- override def run(sparkSession: SparkSession): Seq[Row] = {
- val identifier = if (info.getDb != null) {
- sparkSession.sessionState.catalog.qualifyIdentifier(
- FunctionIdentifier(info.getName, Some(info.getDb)))
+ private def append(buffer: ArrayBuffer[(String, String)], key: String,
value: String): Unit = {
+ buffer += (key -> value)
+ }
+
+ /**
+ * Pad all input strings into the same length using the max string length
among all inputs.
+ */
+ private def tabulate(inputs: Seq[String]): Seq[String] = {
+ val maxLen = inputs.map(_.length).max
+ inputs.map { input => input.padTo(maxLen, " ").mkString }
+ }
+
+ private def formatParameters(params: StructType): Seq[String] = {
+ val names = tabulate(params.map(_.name))
+ val dataTypes = tabulate(params.map(_.dataType.sql))
+ // Only show parameter comments in extended mode.
+ val comments = params.map { p =>
+ if (isExtended) p.getComment().map(c => s" '$c'").getOrElse("") else ""
+ }
+ val defaults = params.map { p =>
+ if (isExtended) p.getDefault().map(d => s" DEFAULT $d").getOrElse("")
else ""
+ }
+ names zip dataTypes zip defaults zip comments map {
+ case (((name, dataType), default), comment) => s"$name
$dataType$default$comment"
+ }
+ }
+
+ private def describeSQLFunction(info: ExpressionInfo, parser:
ParserInterface): Seq[Row] = {
+ val buffer = new ArrayBuffer[(String, String)]
+ val f = SQLFunction.fromExpressionInfo(info, parser)
+ append(buffer, "Function:", f.name.toString)
Review Comment:
Fixed in a6e1a52. Hoisted `qualifyIdentifier` out of the `if/else` in
`run()`, so both the SQL-UDF and legacy paths share the same
`FunctionIdentifier` and render the catalog-qualified 3-part name.
`describeSQLFunction` now takes the qualified identifier as a parameter and
emits `qualifiedName.unquotedString` for the `Function:` row.
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala:
##########
@@ -101,38 +105,115 @@ case class DescribeFunctionCommand(
toAttributes(schema)
}
- override def run(sparkSession: SparkSession): Seq[Row] = {
- val identifier = if (info.getDb != null) {
- sparkSession.sessionState.catalog.qualifyIdentifier(
- FunctionIdentifier(info.getName, Some(info.getDb)))
+ private def append(buffer: ArrayBuffer[(String, String)], key: String,
value: String): Unit = {
+ buffer += (key -> value)
+ }
+
+ /**
+ * Pad all input strings into the same length using the max string length
among all inputs.
Review Comment:
Done in a6e1a52.
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala:
##########
@@ -101,38 +105,115 @@ case class DescribeFunctionCommand(
toAttributes(schema)
}
- override def run(sparkSession: SparkSession): Seq[Row] = {
- val identifier = if (info.getDb != null) {
- sparkSession.sessionState.catalog.qualifyIdentifier(
- FunctionIdentifier(info.getName, Some(info.getDb)))
+ private def append(buffer: ArrayBuffer[(String, String)], key: String,
value: String): Unit = {
+ buffer += (key -> value)
+ }
+
+ /**
+ * Pad all input strings into the same length using the max string length
among all inputs.
+ */
+ private def tabulate(inputs: Seq[String]): Seq[String] = {
+ val maxLen = inputs.map(_.length).max
+ inputs.map { input => input.padTo(maxLen, " ").mkString }
+ }
+
+ private def formatParameters(params: StructType): Seq[String] = {
+ val names = tabulate(params.map(_.name))
+ val dataTypes = tabulate(params.map(_.dataType.sql))
+ // Only show parameter comments in extended mode.
+ val comments = params.map { p =>
+ if (isExtended) p.getComment().map(c => s" '$c'").getOrElse("") else ""
+ }
+ val defaults = params.map { p =>
+ if (isExtended) p.getDefault().map(d => s" DEFAULT $d").getOrElse("")
else ""
+ }
+ names zip dataTypes zip defaults zip comments map {
+ case (((name, dataType), default), comment) => s"$name
$dataType$default$comment"
+ }
+ }
+
+ private def describeSQLFunction(info: ExpressionInfo, parser:
ParserInterface): Seq[Row] = {
+ val buffer = new ArrayBuffer[(String, String)]
+ val f = SQLFunction.fromExpressionInfo(info, parser)
+ append(buffer, "Function:", f.name.toString)
+ append(buffer, "Type:", if (f.isTableFunc) SQLFunction.TABLE else
SQLFunction.SCALAR)
+ // Function input
+ val input = f.inputParam
+ if (input.nonEmpty) {
+ val params = formatParameters(input.get)
+ assert(params.nonEmpty)
+ append(buffer, "Input:", params.head)
+ params.tail.foreach(s => append(buffer, "", s))
} else {
- FunctionIdentifier(info.getName)
+ append(buffer, "Input:", "()")
}
- val name = identifier.unquotedString
- val result = if (info.getClassName != null) {
- Row(s"Function: $name") ::
- Row(s"Class: ${info.getClassName}") ::
- Row(s"Usage: ${info.getUsage}") :: Nil
+ // Function returns
+ if (f.isTableFunc) {
+ val returnParams = formatParameters(f.getTableFuncReturnCols)
+ assert(returnParams.nonEmpty)
+ append(buffer, "Returns:", returnParams.head)
+ returnParams.tail.foreach(s => append(buffer, "", s))
} else {
- Row(s"Function: $name") :: Row(s"Usage: ${info.getUsage}") :: Nil
+ f.getScalarFuncReturnType match {
+ case _: NullType =>
+ case other => append(buffer, "Returns:", other.sql)
+ }
+ }
+ if (isExtended) {
+ f.comment.foreach(c => append(buffer, "Comment:", c))
+ f.collation.foreach(c => append(buffer, "Collation:", c))
+ f.deterministic.foreach(d => append(buffer, "Deterministic:",
d.toString))
+ f.containsSQL.foreach { c =>
+ val dataAccess = if (c) "CONTAINS SQL" else "READS SQL DATA"
+ append(buffer, "Data Access:", dataAccess)
+ }
+ val configs = f.getSQLConfigs
+ if (configs.nonEmpty) {
+ val sorted = configs.toSeq.sortBy(_._1).map { case (key, value) =>
s"$key=$value" }
+ append(buffer, "Configs:", sorted.head)
+ sorted.tail.foreach(s => append(buffer, "", s))
+ }
+ f.owner.foreach(o => append(buffer, "Owner:", o))
+ append(buffer, "Create Time:", new
java.util.Date(f.createTimeMs).toString)
Review Comment:
Good catch. Fixed in a6e1a52 by taking the first option you suggested:
`SQLFunction.toCatalogFunction` now persists `functionMetadataToProps`
alongside `sqlFunctionToProps`, so OWNER and CREATE_TIME round-trip through the
metastore. `fromProps` reads them back with backward-compatible defaults for
older payloads (`owner = None`, `createTimeMs = System.currentTimeMillis` if
missing). Combined with the `registerFunction` change below (info passed in
directly for freshly-registered UDFs), the `Create Time:` row now reflects the
timestamp captured at `CREATE FUNCTION` time both for the just-cached case and
after a session restart.
The corresponding test (`describe SQL scalar functions`) now parses the
rendered timestamp and asserts it falls within `[before, after]` of the `CREATE
FUNCTION` call (per your suggestion on the test file).
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala:
##########
@@ -2146,7 +2146,15 @@ class SessionCatalog(
if (registry.functionExists(identToRegister) && !overrideIfExists) {
throw QueryCompilationErrors.functionAlreadyExistsError(func)
}
- val info = makeExprInfoForHiveFunction(funcDefinition)
+ val info = if (funcDefinition.isUserDefinedFunction) {
Review Comment:
Fixed in a6e1a52. `registerFunction` now takes an optional `info:
Option[ExpressionInfo]`, and the persistent path of
`registerUserDefinedFunction` passes `Some(function.toExpressionInfo)`, so the
freshly-registered UDF goes into the registry with the in-memory
`ExpressionInfo` (preserving CREATE-time `owner`/`createTimeMs`). The
`isUserDefinedFunction` branch inside `registerFunction` is reduced to a
fallback for callers that pass a raw `CatalogFunction` without a pre-built
`ExpressionInfo` (e.g. `RefreshFunctionCommand`, which loads from the metastore
and would otherwise cache an info with `usage = null` and break DESCRIBE).
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala:
##########
@@ -253,6 +256,40 @@ object SQLFunction {
}
}
+ /**
+ * Convert an [[ExpressionInfo]] into a SQL function.
+ */
+ def fromExpressionInfo(info: ExpressionInfo, parser: ParserInterface):
SQLFunction = {
Review Comment:
Done in a6e1a52. Extracted `private def fromProps(props, identifier,
parser)`. Both `fromCatalogFunction` and `fromExpressionInfo` are now thin
wrappers that just deserialize the JSON blob and forward to `fromProps`, so
adding a new property only needs a single edit.
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala:
##########
@@ -113,6 +113,107 @@ class SQLFunctionSuite extends SharedSparkSession {
}
}
+ test("describe SQL scalar functions") {
+ withUserDefinedFunction("foo" -> true, "bar" -> true, "area" -> false) {
+ // Temporary function
+ sql(
+ """
+ |CREATE TEMPORARY FUNCTION foo() RETURNS int
+ |COMMENT 'function foo' RETURN 1
+ |""".stripMargin)
+ checkKeywordsExist(sql("describe function foo"),
+ "Function:", "foo",
+ "Type:", "SCALAR",
+ "Input:", "()",
+ "Returns:", "INT")
+ checkKeywordsExist(sql("describe function extended foo"),
+ "Deterministic: true",
+ "Data Access:", "CONTAINS SQL",
+ "Comment:", "function foo",
+ "Create Time:",
+ "Body:", "1")
+ sql(
+ """
+ |CREATE TEMPORARY FUNCTION bar(x int default 8,
+ |y int default substr('8hello', 1, 1) comment 'var_y')
+ |RETURNS int COMMENT 'function bar' RETURN x + y
+ |""".stripMargin)
+ checkKeywordsExist(sql("describe function bar"),
+ "Function:", "bar",
+ "Input:", "x INT", "y INT",
+ "Returns:", "INT")
+ checkKeywordsExist(sql("describe function extended bar"),
+ "Input:", "x INT DEFAULT 8", "y INT DEFAULT substr('8hello', 1, 1)
'var_y'",
+ "Comment:", "function bar",
+ "Deterministic: true",
+ "Data Access:", "CONTAINS SQL",
+ "Body:", "x + y")
+ // Permanent function
+ sql(
+ """
+ |CREATE FUNCTION area(width double comment 'width', height double
comment 'height')
+ |RETURNS double
+ |COMMENT 'compute area'
+ |DETERMINISTIC
+ |RETURN width * height
+ |""".stripMargin)
+ checkKeywordsExist(sql("describe function area"),
+ "Function:", "default.area",
+ "Type:", "SCALAR",
+ "Input:", "width DOUBLE", "height DOUBLE",
+ "Returns:", "DOUBLE")
+ checkKeywordsExist(sql("describe function extended area"),
+ "Input:", "width DOUBLE 'width'", "height DOUBLE 'height'",
+ "Comment:", "compute area",
+ "Deterministic: true",
+ "Data Access:", "CONTAINS SQL",
+ "Create Time:",
+ "Body:", "width * height")
+ }
+ }
+
+ test("describe SQL table functions") {
+ withUserDefinedFunction("foo" -> false) {
+ sql(
+ """
+ |CREATE FUNCTION foo(x INT) RETURNS TABLE (a INT, b STRING)
+ |COMMENT 'table function foo' RETURN SELECT x, x
+ |""".stripMargin)
+ checkKeywordsExist(sql("describe function foo"),
+ "Function:", "foo",
+ "Type:", "TABLE",
+ "Input:", "x INT",
+ "Returns:", "a INT", "b STRING")
+ checkKeywordsExist(sql("describe function extended foo"),
+ "Comment:", "table function foo",
+ "Deterministic: true",
+ "Data Access:", "CONTAINS SQL",
+ "Create Time:",
+ "Body:", "SELECT x, x")
+ }
+ }
+
+ test("describe SQL functions with derived routine characteristics") {
+ withUserDefinedFunction("foo" -> false, "bar" -> false, "baz" -> false) {
+ withTable("tbl_for_describe") {
+ sql("CREATE TABLE tbl_for_describe AS SELECT 1 AS x")
+ sql("CREATE FUNCTION foo() RETURNS TABLE(x INT) RETURN SELECT * FROM
tbl_for_describe")
+ sql("CREATE FUNCTION bar() RETURNS DOUBLE RETURN SELECT SUM(x) +
rand() FROM foo()")
+ sql("CREATE FUNCTION baz() RETURNS INT NOT DETERMINISTIC READS SQL
DATA RETURN 1")
+ checkKeywordsExist(sql("DESCRIBE FUNCTION EXTENDED foo"),
+ "Deterministic: true",
+ "Data Access:", "READS SQL DATA")
+ checkKeywordsExist(sql("DESCRIBE FUNCTION EXTENDED bar"),
+ "Deterministic: false",
+ "Data Access:", "READS SQL DATA")
+ // Do not overwrite user specified routine characteristics.
Review Comment:
Done in a6e1a52.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]