This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 380484494d2 [SPARK-43293][SQL] `__qualified_access_only` should be 
ignored in normal columns
380484494d2 is described below

commit 380484494d2ec8ae713ed6f2677b08ea8657cb29
Author: Wenchen Fan <[email protected]>
AuthorDate: Thu Apr 27 10:52:03 2023 +0800

    [SPARK-43293][SQL] `__qualified_access_only` should be ignored in normal 
columns
    
    This is a followup of https://github.com/apache/spark/pull/39596 to fix 
more corner cases. It ignores the special column flag that requires qualified 
access for normal output attributes, as the flag should be effective only to 
metadata columns.
    
    It's very hard to make sure that we don't leak the special column flag. 
Since the bug has been in the Spark release for a while, there may be tables 
created with CTAS and the table schema contains the special flag.
    
    No
    
    new analysis test
    
    Closes #40961 from cloud-fan/col.
    
    Authored-by: Wenchen Fan <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
    (cherry picked from commit 021f02e02fb88bbbccd810ae000e14e0c854e2e6)
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../org/apache/spark/sql/catalyst/expressions/package.scala      | 9 +++++++++
 .../apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala    | 4 ++--
 .../spark/sql/catalyst/plans/logical/basicLogicalOperators.scala | 3 ++-
 .../org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala   | 6 ++++++
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 67936c36b41..b32ef3d95aa 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -77,6 +77,15 @@ package object expressions  {
     override def apply(row: InternalRow): InternalRow = row
   }
 
+  object AttributeSeq {
+    def fromNormalOutput(attr: Seq[Attribute]): AttributeSeq = {
+      // Normal output attributes should never have the special flag that 
allows only qualified
+      // access. In case something goes wrong, like a scan relation from a 
custom data source,
+      // we explicitly remove that special flag to be safe.
+      new AttributeSeq(attr.map(_.markAsAllowAnyAccess()))
+    }
+  }
+
   /**
    * Helper functions for working with `Seq[Attribute]`.
    */
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 36187bb2d55..aa82d7a3354 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -96,11 +96,11 @@ abstract class LogicalPlan
     }
   }
 
-  private[this] lazy val childAttributes = 
AttributeSeq(children.flatMap(_.output))
+  private[this] lazy val childAttributes = 
AttributeSeq.fromNormalOutput(children.flatMap(_.output))
 
   private[this] lazy val childMetadataAttributes = 
AttributeSeq(children.flatMap(_.metadataOutput))
 
-  private[this] lazy val outputAttributes = AttributeSeq(output)
+  private[this] lazy val outputAttributes = 
AttributeSeq.fromNormalOutput(output)
 
   private[this] lazy val outputMetadataAttributes = 
AttributeSeq(metadataOutput)
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index cdb4ba3fe22..b5a2f097424 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1960,7 +1960,8 @@ case class LateralJoin(
     }
   }
 
-  private[this] lazy val childAttributes = AttributeSeq(left.output ++ 
right.plan.output)
+  private[this] lazy val childAttributes = AttributeSeq.fromNormalOutput(
+    left.output ++ right.plan.output)
 
   private[this] lazy val childMetadataAttributes =
     AttributeSeq(left.metadataOutput ++ right.plan.metadataOutput)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 54ea4086c9b..8a3d5c13d3c 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1437,4 +1437,10 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       ).analyze
     )
   }
+
+  test("SPARK-43293: __qualified_access_only should be ignored in normal 
columns") {
+    val attr = $"a".int.markAsQualifiedAccessOnly()
+    val rel = LocalRelation(attr)
+    checkAnalysis(rel.select($"a"), rel.select(attr.markAsAllowAnyAccess()))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to