This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 249533bcc8c7 [SPARK-45132][SQL] Fix IDENTIFIER for function invocation
249533bcc8c7 is described below
commit 249533bcc8c7fa7f578961ce21d4d7118565dfc1
Author: srielau <[email protected]>
AuthorDate: Thu Oct 12 21:34:49 2023 +0800
[SPARK-45132][SQL] Fix IDENTIFIER for function invocation
### What changes were proposed in this pull request?
Due to a quirk in the parser, in some cases, IDENTIFIER(<funcStr>)(<arg>)
is not properly recognized as a function invocation.
The change is to remove the explicit IDENTIFIER-clause rule in the function
invocation grammar and instead recognize
IDENTIFIER(<arg>) within visitFunctionCall.
### Why are the changes needed?
Function invocation support for IDENTIFIER is incomplete otherwise
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Added new testcases to identifier-clause.sql
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #42888 from srielau/SPARK-45132.
Lead-authored-by: srielau <[email protected]>
Co-authored-by: Wenchen Fan <[email protected]>
Co-authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit f0b2e6da52113802f64f7879f207064d3bdbc7b0)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +-
.../spark/sql/catalyst/parser/AstBuilder.scala | 43 ++++++++++++----------
.../analyzer-results/identifier-clause.sql.out | 28 ++++++++++++--
.../sql-tests/inputs/identifier-clause.sql | 3 +-
.../sql-tests/results/identifier-clause.sql.out | 27 +++++++++++++-
5 files changed, 77 insertions(+), 26 deletions(-)
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 85dbc499fbde..04128216be07 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -951,7 +951,6 @@ primaryExpression
| qualifiedName DOT ASTERISK
#star
| LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN
#rowConstructor
| LEFT_PAREN query RIGHT_PAREN
#subqueryExpression
- | IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN
#identifierClause
| functionName LEFT_PAREN (setQuantifier? argument+=functionArgument
(COMMA argument+=functionArgument)*)? RIGHT_PAREN
(FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)?
@@ -1176,6 +1175,7 @@ qualifiedNameList
functionName
: IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN
+ | identFunc=IDENTIFIER_KW // IDENTIFIER itself is also a valid function
name.
| qualifiedName
| FILTER
| LEFT
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 83938632e534..b80ea8fddcfe 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2223,13 +2223,6 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
}
}
- /**
- * Create an expression for the IDENTIFIER() clause.
- */
- override def visitIdentifierClause(ctx: IdentifierClauseContext): Expression
= withOrigin(ctx) {
- ExpressionWithUnresolvedIdentifier(expression(ctx.expression),
UnresolvedAttribute(_))
- }
-
/**
* Create a (windowed) Function expression.
*/
@@ -2251,19 +2244,31 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
val filter = Option(ctx.where).map(expression(_))
val ignoreNulls =
Option(ctx.nullsOption).map(_.getType ==
SqlBaseParser.IGNORE).getOrElse(false)
- val funcCtx = ctx.functionName
- val func = withFuncIdentClause(
- funcCtx,
- ident => UnresolvedFunction(ident, arguments, isDistinct, filter,
ignoreNulls)
- )
- // Check if the function is evaluated in a windowed context.
- ctx.windowSpec match {
- case spec: WindowRefContext =>
- UnresolvedWindowExpression(func, visitWindowRef(spec))
- case spec: WindowDefContext =>
- WindowExpression(func, visitWindowDef(spec))
- case _ => func
+ // Is this an IDENTIFIER clause instead of a function call?
+ if (ctx.functionName.identFunc != null &&
+ arguments.length == 1 && // One argument
+ ctx.setQuantifier == null && // No other clause
+ ctx.where == null &&
+ ctx.nullsOption == null &&
+ ctx.windowSpec == null) {
+ ExpressionWithUnresolvedIdentifier(arguments.head,
UnresolvedAttribute(_))
+ } else {
+ // It's a function call
+ val funcCtx = ctx.functionName
+ val func = withFuncIdentClause(
+ funcCtx,
+ ident => UnresolvedFunction(ident, arguments, isDistinct, filter,
ignoreNulls)
+ )
+
+ // Check if the function is evaluated in a windowed context.
+ ctx.windowSpec match {
+ case spec: WindowRefContext =>
+ UnresolvedWindowExpression(func, visitWindowRef(spec))
+ case spec: WindowDefContext =>
+ WindowExpression(func, visitWindowDef(spec))
+ case _ => func
+ }
}
}
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index 00e2d8ff8ae7..f91d0a26cf8a 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -187,10 +187,11 @@ Project [coalesce(cast(null as int), 1) AS coalesce(NULL,
1)#x]
-- !query
-SELECT IDENTIFIER('abs')(-1)
+SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1)
-- !query analysis
-Project [abs(-1) AS abs(-1)#x]
-+- OneRowRelation
+Project [abs(c1#x) AS abs(c1)#x]
++- SubqueryAlias T
+ +- LocalRelation [c1#x]
-- !query
@@ -664,6 +665,27 @@ org.apache.spark.sql.AnalysisException
}
+-- !query
+SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_ROUTINE",
+ "sqlState" : "42883",
+ "messageParameters" : {
+ "routineName" : "`IDENTIFIER`",
+ "searchPath" : "[`system`.`builtin`, `system`.`session`,
`spark_catalog`.`default`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 26,
+ "fragment" : "`IDENTIFIER`('abs')"
+ } ]
+}
+
+
-- !query
CREATE TABLE IDENTIFIER(1)(c1 INT)
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index a1bd500455de..07ae15707293 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -36,7 +36,7 @@ DROP SCHEMA s;
-- Function reference
SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1);
-SELECT IDENTIFIER('abs')(-1);
+SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1);
SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1);
-- Table DDL
@@ -107,6 +107,7 @@ SELECT IDENTIFIER('') FROM VALUES(1) AS T(``);
VALUES(IDENTIFIER(CAST(NULL AS STRING)));
VALUES(IDENTIFIER(1));
VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)));
+SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1);
CREATE TABLE IDENTIFIER(1)(c1 INT);
CREATE TABLE IDENTIFIER('a.b.c')(c1 INT);
diff --git
a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index 8eabb74da97b..ed87f69fc5e6 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -205,9 +205,9 @@ struct<coalesce(NULL, 1):int>
-- !query
-SELECT IDENTIFIER('abs')(-1)
+SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1)
-- !query schema
-struct<abs(-1):int>
+struct<abs(c1):int>
-- !query output
1
@@ -770,6 +770,29 @@ org.apache.spark.sql.AnalysisException
}
+-- !query
+SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_ROUTINE",
+ "sqlState" : "42883",
+ "messageParameters" : {
+ "routineName" : "`IDENTIFIER`",
+ "searchPath" : "[`system`.`builtin`, `system`.`session`,
`spark_catalog`.`default`]"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 26,
+ "fragment" : "`IDENTIFIER`('abs')"
+ } ]
+}
+
+
-- !query
CREATE TABLE IDENTIFIER(1)(c1 INT)
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]