This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b345b23b76c4 [SPARK-46999][SQL] ExpressionWithUnresolvedIdentifier
should include other expressions in the expression tree
b345b23b76c4 is described below
commit b345b23b76c4f9a334a115b7397d1adcc6bef185
Author: Wenchen Fan <[email protected]>
AuthorDate: Thu Feb 8 19:31:49 2024 +0800
[SPARK-46999][SQL] ExpressionWithUnresolvedIdentifier should include other
expressions in the expression tree
### What changes were proposed in this pull request?
The plan/expression tree will be expanded during analysis, due to our
implementation of the IDENTIFIER clause: we keep a lambda function in the
plan/expression node to lazily return a new plan/expression.
This is usually fine, but doesn't work well with query parameter binding,
which needs to see the entire plan/expression tree and bind all parameters at
once. The feature EXECUTE IMMEDIATE also needs to see the entire plan tree to
determine if it's a `PosParameterizedQuery` or `NameParameterizedQuery`.
This PR fixes the problem for `ExpressionWithUnresolvedIdentifier`, to make
the lambda function only return the expression node that needs the identifier,
and other expressions should be kept in `ExpressionWithUnresolvedIdentifier`
and passed to the lambda function later.
`PlanWithUnresolvedIdentifier` will be fixed in a similar way in another PR.
### Why are the changes needed?
To make IDENTIFIER clause work with query parameter binding
### Does this PR introduce _any_ user-facing change?
Yes, certain queries can run now while they failed before. See the test
### How was this patch tested?
new test
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #45058 from cloud-fan/param.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../analysis/ResolveIdentifierClause.scala | 2 +-
.../spark/sql/catalyst/analysis/unresolved.scala | 22 ++++++++++----
.../spark/sql/catalyst/parser/AstBuilder.scala | 35 ++++++++++++----------
.../sql/catalyst/analysis/AnalysisSuite.scala | 4 +--
.../org/apache/spark/sql/ParametersSuite.scala | 10 +++++++
5 files changed, 49 insertions(+), 24 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
index e0d3e5629ef8..ced7123dfcc1 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
@@ -36,7 +36,7 @@ object ResolveIdentifierClause extends Rule[LogicalPlan] with
AliasHelper with E
case other =>
other.transformExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_IDENTIFIER))
{
case e: ExpressionWithUnresolvedIdentifier if
e.identifierExpr.resolved =>
- e.exprBuilder.apply(evalIdentifierExpr(e.identifierExpr))
+ e.exprBuilder.apply(evalIdentifierExpr(e.identifierExpr),
e.otherExprs)
}
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 63d4cfeb83fe..7a3cc4bc8e83 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -68,18 +68,30 @@ case class PlanWithUnresolvedIdentifier(
/**
* An expression placeholder that holds the identifier clause string
expression. It will be
* replaced by the actual expression with the evaluated identifier string.
+ *
+ * Note, the `exprBuilder` is a lambda and may hide other expressions from the
expression tree. To
+ * avoid it, this placeholder has a field to hold other expressions, so that
they can be properly
+ * transformed by catalyst rules.
*/
case class ExpressionWithUnresolvedIdentifier(
identifierExpr: Expression,
- exprBuilder: Seq[String] => Expression)
- extends UnaryExpression with Unevaluable {
+ otherExprs: Seq[Expression],
+ exprBuilder: (Seq[String], Seq[Expression]) => Expression)
+ extends Expression with Unevaluable {
+
+ def this(identifierExpr: Expression, exprBuilder: Seq[String] => Expression)
= {
+ this(identifierExpr, Nil, (ident, _) => exprBuilder(ident))
+ }
+
override lazy val resolved = false
- override def child: Expression = identifierExpr
+ override def children: Seq[Expression] = identifierExpr +: otherExprs
override def dataType: DataType = throw new UnresolvedException("dataType")
override def nullable: Boolean = throw new UnresolvedException("nullable")
final override val nodePatterns: Seq[TreePattern] =
Seq(UNRESOLVED_IDENTIFIER)
- override protected def withNewChildInternal(newChild: Expression):
Expression = {
- copy(identifierExpr = newChild)
+
+ override protected def withNewChildrenInternal(
+ newChildren: IndexedSeq[Expression]): Expression = {
+ copy(identifierExpr = newChildren.head, otherExprs = newChildren.drop(1))
}
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 3a9bf8803643..6ab2394777a2 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -74,17 +74,6 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
}
}
- protected def withIdentClause(
- ctx: IdentifierReferenceContext,
- builder: Seq[String] => Expression): Expression = {
- val exprCtx = ctx.expression
- if (exprCtx != null) {
- ExpressionWithUnresolvedIdentifier(withOrigin(exprCtx) {
expression(exprCtx) }, builder)
- } else {
- builder.apply(visitMultipartIdentifier(ctx.multipartIdentifier))
- }
- }
-
protected def withFuncIdentClause(
ctx: FunctionNameContext,
builder: Seq[String] => LogicalPlan): LogicalPlan = {
@@ -98,12 +87,16 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
protected def withFuncIdentClause(
ctx: FunctionNameContext,
- builder: Seq[String] => Expression): Expression = {
+ otherExprs: Seq[Expression],
+ builder: (Seq[String], Seq[Expression]) => Expression): Expression = {
val exprCtx = ctx.expression
if (exprCtx != null) {
- ExpressionWithUnresolvedIdentifier(withOrigin(exprCtx) {
expression(exprCtx) }, builder)
+ ExpressionWithUnresolvedIdentifier(
+ withOrigin(exprCtx) { expression(exprCtx) },
+ otherExprs,
+ builder)
} else {
- builder.apply(getFunctionMultiparts(ctx))
+ builder.apply(getFunctionMultiparts(ctx), otherExprs)
}
}
@@ -2336,13 +2329,23 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
ctx.where == null &&
ctx.nullsOption == null &&
ctx.windowSpec == null) {
- ExpressionWithUnresolvedIdentifier(arguments.head,
UnresolvedAttribute(_))
+ new ExpressionWithUnresolvedIdentifier(arguments.head,
UnresolvedAttribute(_))
} else {
// It's a function call
val funcCtx = ctx.functionName
val func = withFuncIdentClause(
funcCtx,
- ident => UnresolvedFunction(ident, arguments, isDistinct, filter,
ignoreNulls, order.toSeq)
+ arguments ++ filter ++ order.toSeq,
+ (ident, otherExprs) => {
+ val orderings =
otherExprs.takeRight(order.size).asInstanceOf[Seq[SortOrder]]
+ val args = otherExprs.take(arguments.length)
+ val filterExpr = if (filter.isDefined) {
+ Some(otherExprs(args.length))
+ } else {
+ None
+ }
+ UnresolvedFunction(ident, args, isDistinct, filterExpr, ignoreNulls,
orderings)
+ }
)
// Check if the function is evaluated in a windowed context.
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 1dce98907326..3e37e4c8dc04 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1742,9 +1742,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
val name = Literal("a").as("name")
val replaceable = new Nvl(Literal("a"), Literal("b"))
withClue("IDENTIFIER as column") {
- val ident = ExpressionWithUnresolvedIdentifier(name,
UnresolvedAttribute.apply)
+ val ident = new ExpressionWithUnresolvedIdentifier(name,
UnresolvedAttribute.apply)
checkAnalysis(testRelation.select(ident),
testRelation.select($"a").analyze)
- val ident2 = ExpressionWithUnresolvedIdentifier(replaceable,
UnresolvedAttribute.apply)
+ val ident2 = new ExpressionWithUnresolvedIdentifier(replaceable,
UnresolvedAttribute.apply)
checkAnalysis(testRelation.select(ident2),
testRelation.select($"a").analyze)
}
withClue("IDENTIFIER as table") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
index 2801948f6837..013f36be4745 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
@@ -623,4 +623,14 @@ class ParametersSuite extends QueryTest with
SharedSparkSession with PlanTest {
comparePlans(expected, parameterizedSpark)
comparePlans(expected, parameterizedSql)
}
+
+ test("SPARK-46999: bind parameters for nested IDENTIFIER clause") {
+ val query = sql(
+ """
+ |EXECUTE IMMEDIATE
+ |'SELECT IDENTIFIER(?)(IDENTIFIER(?)) FROM VALUES (\'abc\') AS (col)'
+ |USING 'UPPER', 'col'
+ |""".stripMargin)
+ checkAnswer(query, Row("ABC"))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]