This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 29dee664dd0 [SPARK-41144][SQL] Unresolved hint should not cause query 
failure
29dee664dd0 is described below

commit 29dee664dd0d3296318d3551577c868320cbbf78
Author: ulysses-you <ulyssesyo...@gmail.com>
AuthorDate: Tue Nov 15 16:49:03 2022 +0800

    [SPARK-41144][SQL] Unresolved hint should not cause query failure
    
    Skip `UnresolvedHint` in rule `AddMetadataColumns` to avoid call exprId on 
`UnresolvedAttribute`.
    
    ```
    CREATE TABLE t1(c1 bigint) USING PARQUET;
    CREATE TABLE t2(c2 bigint) USING PARQUET;
    SELECT /*+ hash(t2) */ * FROM t1 join t2 on c1 = c2;
    ```
    
    failed with msg:
    ```
    org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to 
exprId on unresolved object
      at 
org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.exprId(unresolved.scala:147)
      at 
org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$4(Analyzer.scala:1005)
      at 
org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$4$adapted(Analyzer.scala:1005)
      at scala.collection.Iterator.exists(Iterator.scala:969)
      at scala.collection.Iterator.exists$(Iterator.scala:967)
      at scala.collection.AbstractIterator.exists(Iterator.scala:1431)
      at scala.collection.IterableLike.exists(IterableLike.scala:79)
      at scala.collection.IterableLike.exists$(IterableLike.scala:78)
      at scala.collection.AbstractIterable.exists(Iterable.scala:56)
      at 
org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$3(Analyzer.scala:1005)
      at 
org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$3$adapted(Analyzer.scala:1005)
    ```
    
    But before just a warning: `WARN HintErrorLogger: Unrecognized hint: 
hash(t2)`
    
    yes, fix regression from 3.3.1.
    
    Note, the root reason is we mark `UnresolvedHint` is resolved if child is 
resolved since https://github.com/apache/spark/pull/32841, then 
https://github.com/apache/spark/pull/37758 trigger this bug.
    
    add test
    
    Closes #38662 from ulysses-you/hint.
    
    Authored-by: ulysses-you <ulyssesyo...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit a9bf5d2b3f5b3331e3b024a3ad631fcbe88a9d18)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala   | 1 +
 sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala  | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ad40f924ef8..2a2fe6f2957 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -923,6 +923,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
     def apply(plan: LogicalPlan): LogicalPlan = 
plan.resolveOperatorsDownWithPruning(
       AlwaysProcess.fn, ruleId) {
+      case hint: UnresolvedHint => hint
       // Add metadata output to all node types
       case node if node.children.nonEmpty && node.resolved && 
hasMetadataCol(node) =>
         val inputAttrs = AttributeSet(node.children.flatMap(_.output))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 0a3107cdff6..5b42d05c237 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4564,6 +4564,14 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-41144: Unresolved hint should not cause query failure") {
+    withTable("t1", "t2") {
+      sql("CREATE TABLE t1(c1 bigint) USING PARQUET")
+      sql("CREATE TABLE t2(c2 bigint) USING PARQUET")
+      sql("SELECT /*+ hash(t2) */ * FROM t1 join t2 on c1 = c2")
+    }
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to