This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f0b2e6da5211 [SPARK-45132][SQL] Fix IDENTIFIER for function invocation f0b2e6da5211 is described below commit f0b2e6da52113802f64f7879f207064d3bdbc7b0 Author: srielau <se...@rielau.com> AuthorDate: Thu Oct 12 21:34:49 2023 +0800 [SPARK-45132][SQL] Fix IDENTIFIER for function invocation ### What changes were proposed in this pull request? Due to a quirk in the parser, in some cases, IDENTIFIER(<funcStr>)(<arg>) is not properly recognized as a function invocation. The change is to remove the explicit IDENTIFIER-clause rule in the function invocation grammar and instead recognize IDENTIFIER(<arg>) within visitFunctionCall. ### Why are the changes needed? Function invocation support for IDENTIFIER is incomplete otherwise ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added new testcases to identifier-clause.sql ### Was this patch authored or co-authored using generative AI tooling? No Closes #42888 from srielau/SPARK-45132. Lead-authored-by: srielau <se...@rielau.com> Co-authored-by: Wenchen Fan <cloud0...@gmail.com> Co-authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 43 ++++++++++++---------- .../analyzer-results/identifier-clause.sql.out | 28 ++++++++++++-- .../sql-tests/inputs/identifier-clause.sql | 3 +- .../sql-tests/results/identifier-clause.sql.out | 27 +++++++++++++- 5 files changed, 77 insertions(+), 26 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 6a6d39e96ca2..77a9108e0632 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -967,7 +967,6 @@ primaryExpression | qualifiedName DOT ASTERISK #star | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor | LEFT_PAREN query RIGHT_PAREN #subqueryExpression - | IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN #identifierClause | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument (COMMA argument+=functionArgument)*)? RIGHT_PAREN (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? @@ -1196,6 +1195,7 @@ qualifiedNameList functionName : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN + | identFunc=IDENTIFIER_KW // IDENTIFIER itself is also a valid function name. | qualifiedName | FILTER | LEFT diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c2bc6e9eb65a..9abca8b95cf7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2246,13 +2246,6 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { } } - /** - * Create an expression for the IDENTIFIER() clause. - */ - override def visitIdentifierClause(ctx: IdentifierClauseContext): Expression = withOrigin(ctx) { - ExpressionWithUnresolvedIdentifier(expression(ctx.expression), UnresolvedAttribute(_)) - } - /** * Create a (windowed) Function expression. */ @@ -2274,19 +2267,31 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { val filter = Option(ctx.where).map(expression(_)) val ignoreNulls = Option(ctx.nullsOption).map(_.getType == SqlBaseParser.IGNORE).getOrElse(false) - val funcCtx = ctx.functionName - val func = withFuncIdentClause( - funcCtx, - ident => UnresolvedFunction(ident, arguments, isDistinct, filter, ignoreNulls) - ) - // Check if the function is evaluated in a windowed context. - ctx.windowSpec match { - case spec: WindowRefContext => - UnresolvedWindowExpression(func, visitWindowRef(spec)) - case spec: WindowDefContext => - WindowExpression(func, visitWindowDef(spec)) - case _ => func + // Is this an IDENTIFIER clause instead of a function call? + if (ctx.functionName.identFunc != null && + arguments.length == 1 && // One argument + ctx.setQuantifier == null && // No other clause + ctx.where == null && + ctx.nullsOption == null && + ctx.windowSpec == null) { + ExpressionWithUnresolvedIdentifier(arguments.head, UnresolvedAttribute(_)) + } else { + // It's a function call + val funcCtx = ctx.functionName + val func = withFuncIdentClause( + funcCtx, + ident => UnresolvedFunction(ident, arguments, isDistinct, filter, ignoreNulls) + ) + + // Check if the function is evaluated in a windowed context. + ctx.windowSpec match { + case spec: WindowRefContext => + UnresolvedWindowExpression(func, visitWindowRef(spec)) + case spec: WindowDefContext => + WindowExpression(func, visitWindowDef(spec)) + case _ => func + } } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 64268a75aced..a78039267643 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -187,10 +187,11 @@ Project [coalesce(cast(null as int), 1) AS coalesce(NULL, 1)#x] -- !query -SELECT IDENTIFIER('abs')(-1) +SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) -- !query analysis -Project [abs(-1) AS abs(-1)#x] -+- OneRowRelation +Project [abs(c1#x) AS abs(c1)#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] -- !query @@ -708,6 +709,27 @@ org.apache.spark.sql.AnalysisException } +-- !query +SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`IDENTIFIER`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 26, + "fragment" : "`IDENTIFIER`('abs')" + } ] +} + + -- !query CREATE TABLE IDENTIFIER(1)(c1 INT) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 00467244ff18..fd53f44d3c33 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -36,7 +36,7 @@ DROP SCHEMA s; -- Function reference SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1); -SELECT IDENTIFIER('abs')(-1); +SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1); SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1); -- Table DDL @@ -117,6 +117,7 @@ SELECT IDENTIFIER('') FROM VALUES(1) AS T(``); VALUES(IDENTIFIER(CAST(NULL AS STRING))); VALUES(IDENTIFIER(1)); VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1))); +SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1); CREATE TABLE IDENTIFIER(1)(c1 INT); CREATE TABLE IDENTIFIER('a.b.c')(c1 INT); diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index cfedaae39e81..d8ac69dd120f 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -205,9 +205,9 @@ struct<coalesce(NULL, 1):int> -- !query -SELECT IDENTIFIER('abs')(-1) +SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) -- !query schema -struct<abs(-1):int> +struct<abs(c1):int> -- !query output 1 @@ -818,6 +818,29 @@ org.apache.spark.sql.AnalysisException } +-- !query +SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`IDENTIFIER`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 26, + "fragment" : "`IDENTIFIER`('abs')" + } ] +} + + -- !query CREATE TABLE IDENTIFIER(1)(c1 INT) -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org