This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new c8d0bd0  [SPARK-35673][SQL] Fix user-defined hint and unrecognized 
hint in subquery
c8d0bd0 is described below

commit c8d0bd07fc34861a802f176c35a3beb8f8b8d375
Author: Fu Chen <cfmcgr...@gmail.com>
AuthorDate: Thu Jun 10 15:32:10 2021 +0800

    [SPARK-35673][SQL] Fix user-defined hint and unrecognized hint in subquery
    
    Use `UnresolvedHint.resolved = child.resolved` instead 
`UnresolvedHint.resolved = false`, then the plan contains `UnresolvedHint` 
child can be optimized by rule in batch `Resolution`.
    
    For instance, before this pr, the following plan can't be optimized by 
`ResolveReferences`.
    ```
    !'Project [*]
     +- SubqueryAlias __auto_generated_subquery_name
        +- UnresolvedHint use_hash
           +- Project [42 AS 42#10]
              +- OneRowRelation
    ```
    
    fix hint in subquery bug
    
    No.
    
    New test.
    
    Closes #32841 from cfmcgrady/SPARK-35673.
    
    Authored-by: Fu Chen <cfmcgr...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit 5280f02747eed9849e4a64562d38aee11e21616f)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala      |  3 +++
 .../spark/sql/catalyst/plans/logical/hints.scala   |  5 ++++-
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 16 +++++++++++++
 .../spark/sql/SparkSessionExtensionSuite.scala     | 26 ++++++++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 2887967..fe12dd4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -105,6 +105,9 @@ trait CheckAnalysis extends PredicateHelper {
       case u: UnresolvedRelation =>
         u.failAnalysis(s"Table or view not found: 
${u.multipartIdentifier.quoted}")
 
+      case u: UnresolvedHint =>
+        u.failAnalysis(s"Hint not found: ${u.name}")
+
       case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) =>
         failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index a325b61..1202d7d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -30,7 +30,10 @@ import org.apache.spark.util.Utils
 case class UnresolvedHint(name: String, parameters: Seq[Any], child: 
LogicalPlan)
   extends UnaryNode {
 
-  override lazy val resolved: Boolean = false
+  // we need it to be resolved so that the analyzer can continue to analyze 
the rest of the query
+  // plan.
+  override lazy val resolved: Boolean = child.resolved
+
   override def output: Seq[Attribute] = child.output
 }
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index afc9780..348c282 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -722,4 +722,20 @@ class AnalysisErrorSuite extends AnalysisTest {
       assertAnalysisError(plan, s"Correlated column is not allowed in 
predicate ($msg)" :: Nil)
     }
   }
+
+  test("SPARK-35673: fail if the plan still contains UnresolvedHint after 
analysis") {
+    val hintName = "some_random_hint_that_does_not_exist"
+    val plan = UnresolvedHint(hintName, Seq.empty,
+      Project(Alias(Literal(1), "x")() :: Nil, OneRowRelation())
+    )
+    assert(plan.resolved)
+
+    val error = intercept[AnalysisException] {
+      SimpleAnalyzer.checkAnalysis(plan)
+    }
+    assert(error.message.contains(s"Hint not found: ${hintName}"))
+
+    // UnresolvedHint be removed by batch `Remove Unresolved Hints`
+    assertAnalysisSuccess(plan, true)
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index e5e8bc6..b30d579 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -349,6 +349,32 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
       stop(session)
     }
   }
+
+  test("SPARK-35673: user-defined hint and unrecognized hint in subquery") {
+    withSession(Seq(_.injectPostHocResolutionRule(MyHintRule))) { session =>
+      // unrecognized hint
+      QueryTest.checkAnswer(
+        session.sql(
+          """
+            |SELECT *
+            |FROM (
+            |    SELECT /*+ some_random_hint_that_does_not_exist */ 42
+            |)
+            |""".stripMargin),
+        Row(42) :: Nil)
+
+      // user-defined hint
+      QueryTest.checkAnswer(
+        session.sql(
+          """
+            |SELECT *
+            |FROM (
+            |    SELECT /*+ CONVERT_TO_EMPTY */ 42
+            |)
+            |""".stripMargin),
+        Nil)
+    }
+  }
 }
 
 case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to