This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new e0b074a [SPARK-35673][SQL] Fix user-defined hint and unrecognized hint in subquery e0b074a is described below commit e0b074a915ed39f0e51a21936787943dea8ae39f Author: Fu Chen <cfmcgr...@gmail.com> AuthorDate: Thu Jun 10 15:32:10 2021 +0800 [SPARK-35673][SQL] Fix user-defined hint and unrecognized hint in subquery Use `UnresolvedHint.resolved = child.resolved` instead `UnresolvedHint.resolved = false`, then the plan contains `UnresolvedHint` child can be optimized by rule in batch `Resolution`. For instance, before this pr, the following plan can't be optimized by `ResolveReferences`. ``` !'Project [*] +- SubqueryAlias __auto_generated_subquery_name +- UnresolvedHint use_hash +- Project [42 AS 42#10] +- OneRowRelation ``` fix hint in subquery bug No. New test. Closes #32841 from cfmcgrady/SPARK-35673. Authored-by: Fu Chen <cfmcgr...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 5280f02747eed9849e4a64562d38aee11e21616f) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/analysis/CheckAnalysis.scala | 3 +++ .../spark/sql/catalyst/plans/logical/hints.scala | 5 ++++- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 16 +++++++++++++ .../spark/sql/SparkSessionExtensionSuite.scala | 26 ++++++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 3dfe7f4..84de9b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -112,6 +112,9 @@ trait CheckAnalysis extends PredicateHelper { case u: UnresolvedRelation => u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}") + case u: UnresolvedHint => + u.failAnalysis(s"Hint not found: ${u.name}") + case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _) => failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala index 4b5e278..1b72d21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala @@ -29,7 +29,10 @@ import org.apache.spark.sql.catalyst.expressions.Attribute case class UnresolvedHint(name: String, parameters: Seq[Any], child: LogicalPlan) extends UnaryNode { - override lazy val resolved: Boolean = false + // we need it to be resolved so that the analyzer can continue to analyze the rest of the query + // plan. + override lazy val resolved: Boolean = child.resolved + override def output: Seq[Attribute] = child.output } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 20ba9c5..d14c221 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -724,4 +724,20 @@ class AnalysisErrorSuite extends AnalysisTest { assertAnalysisError(plan, s"Correlated column is not allowed in predicate ($msg)" :: Nil) } } + + test("SPARK-35673: fail if the plan still contains UnresolvedHint after analysis") { + val hintName = "some_random_hint_that_does_not_exist" + val plan = UnresolvedHint(hintName, Seq.empty, + Project(Alias(Literal(1), "x")() :: Nil, OneRowRelation()) + ) + assert(plan.resolved) + + val error = intercept[AnalysisException] { + SimpleAnalyzer.checkAnalysis(plan) + } + assert(error.message.contains(s"Hint not found: ${hintName}")) + + // UnresolvedHint be removed by batch `Remove Unresolved Hints` + assertAnalysisSuccess(plan, true) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 35d2513..03514d85 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -355,6 +355,32 @@ class SparkSessionExtensionSuite extends SparkFunSuite { stop(session) } } + + test("SPARK-35673: user-defined hint and unrecognized hint in subquery") { + withSession(Seq(_.injectPostHocResolutionRule(MyHintRule))) { session => + // unrecognized hint + QueryTest.checkAnswer( + session.sql( + """ + |SELECT * + |FROM ( + | SELECT /*+ some_random_hint_that_does_not_exist */ 42 + |) + |""".stripMargin), + Row(42) :: Nil) + + // user-defined hint + QueryTest.checkAnswer( + session.sql( + """ + |SELECT * + |FROM ( + | SELECT /*+ CONVERT_TO_EMPTY */ 42 + |) + |""".stripMargin), + Nil) + } + } } case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org