This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new e0b074a [SPARK-35673][SQL] Fix user-defined hint and unrecognized
hint in subquery
e0b074a is described below
commit e0b074a915ed39f0e51a21936787943dea8ae39f
Author: Fu Chen <[email protected]>
AuthorDate: Thu Jun 10 15:32:10 2021 +0800
[SPARK-35673][SQL] Fix user-defined hint and unrecognized hint in subquery
Use `UnresolvedHint.resolved = child.resolved` instead
`UnresolvedHint.resolved = false`, then the plan contains `UnresolvedHint`
child can be optimized by rule in batch `Resolution`.
For instance, before this pr, the following plan can't be optimized by
`ResolveReferences`.
```
!'Project [*]
+- SubqueryAlias __auto_generated_subquery_name
+- UnresolvedHint use_hash
+- Project [42 AS 42#10]
+- OneRowRelation
```
fix hint in subquery bug
No.
New test.
Closes #32841 from cfmcgrady/SPARK-35673.
Authored-by: Fu Chen <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 5280f02747eed9849e4a64562d38aee11e21616f)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/analysis/CheckAnalysis.scala | 3 +++
.../spark/sql/catalyst/plans/logical/hints.scala | 5 ++++-
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 16 +++++++++++++
.../spark/sql/SparkSessionExtensionSuite.scala | 26 ++++++++++++++++++++++
4 files changed, 49 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 3dfe7f4..84de9b9 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -112,6 +112,9 @@ trait CheckAnalysis extends PredicateHelper {
case u: UnresolvedRelation =>
u.failAnalysis(s"Table or view not found:
${u.multipartIdentifier.quoted}")
+ case u: UnresolvedHint =>
+ u.failAnalysis(s"Hint not found: ${u.name}")
+
case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _) =>
failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index 4b5e278..1b72d21 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -29,7 +29,10 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
case class UnresolvedHint(name: String, parameters: Seq[Any], child:
LogicalPlan)
extends UnaryNode {
- override lazy val resolved: Boolean = false
+ // we need it to be resolved so that the analyzer can continue to analyze
the rest of the query
+ // plan.
+ override lazy val resolved: Boolean = child.resolved
+
override def output: Seq[Attribute] = child.output
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 20ba9c5..d14c221 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -724,4 +724,20 @@ class AnalysisErrorSuite extends AnalysisTest {
assertAnalysisError(plan, s"Correlated column is not allowed in
predicate ($msg)" :: Nil)
}
}
+
+ test("SPARK-35673: fail if the plan still contains UnresolvedHint after
analysis") {
+ val hintName = "some_random_hint_that_does_not_exist"
+ val plan = UnresolvedHint(hintName, Seq.empty,
+ Project(Alias(Literal(1), "x")() :: Nil, OneRowRelation())
+ )
+ assert(plan.resolved)
+
+ val error = intercept[AnalysisException] {
+ SimpleAnalyzer.checkAnalysis(plan)
+ }
+ assert(error.message.contains(s"Hint not found: ${hintName}"))
+
+ // UnresolvedHint be removed by batch `Remove Unresolved Hints`
+ assertAnalysisSuccess(plan, true)
+ }
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 35d2513..03514d85 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -355,6 +355,32 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
stop(session)
}
}
+
+ test("SPARK-35673: user-defined hint and unrecognized hint in subquery") {
+ withSession(Seq(_.injectPostHocResolutionRule(MyHintRule))) { session =>
+ // unrecognized hint
+ QueryTest.checkAnswer(
+ session.sql(
+ """
+ |SELECT *
+ |FROM (
+ | SELECT /*+ some_random_hint_that_does_not_exist */ 42
+ |)
+ |""".stripMargin),
+ Row(42) :: Nil)
+
+ // user-defined hint
+ QueryTest.checkAnswer(
+ session.sql(
+ """
+ |SELECT *
+ |FROM (
+ | SELECT /*+ CONVERT_TO_EMPTY */ 42
+ |)
+ |""".stripMargin),
+ Nil)
+ }
+ }
}
case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]