This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 021f02e02fb [SPARK-43293][SQL] `__qualified_access_only` should be 
ignored in normal columns
021f02e02fb is described below

commit 021f02e02fb88bbbccd810ae000e14e0c854e2e6
Author: Wenchen Fan <[email protected]>
AuthorDate: Thu Apr 27 10:52:03 2023 +0800

    [SPARK-43293][SQL] `__qualified_access_only` should be ignored in normal 
columns
    
    ### What changes were proposed in this pull request?
    
    This is a followup of https://github.com/apache/spark/pull/39596 to fix 
more corner cases. It ignores the special column flag that requires qualified 
access for normal output attributes, as the flag should be effective only to 
metadata columns.
    
    ### Why are the changes needed?
    
    It's very hard to make sure that we don't leak the special column flag. 
Since the bug has been in the Spark release for a while, there may be tables 
created with CTAS and the table schema contains the special flag.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    new analysis test
    
    Closes #40961 from cloud-fan/col.
    
    Authored-by: Wenchen Fan <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../org/apache/spark/sql/catalyst/expressions/package.scala      | 9 +++++++++
 .../apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala    | 4 ++--
 .../spark/sql/catalyst/plans/logical/basicLogicalOperators.scala | 3 ++-
 .../org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala   | 6 ++++++
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 67936c36b41..b32ef3d95aa 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -77,6 +77,15 @@ package object expressions  {
     override def apply(row: InternalRow): InternalRow = row
   }
 
+  object AttributeSeq {
+    def fromNormalOutput(attr: Seq[Attribute]): AttributeSeq = {
+      // Normal output attributes should never have the special flag that 
allows only qualified
+      // access. In case something goes wrong, like a scan relation from a 
custom data source,
+      // we explicitly remove that special flag to be safe.
+      new AttributeSeq(attr.map(_.markAsAllowAnyAccess()))
+    }
+  }
+
   /**
    * Helper functions for working with `Seq[Attribute]`.
    */
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index e11bd77fe04..374eb070db1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -125,11 +125,11 @@ abstract class LogicalPlan
     }
   }
 
-  private[this] lazy val childAttributes = 
AttributeSeq(children.flatMap(_.output))
+  private[this] lazy val childAttributes = 
AttributeSeq.fromNormalOutput(children.flatMap(_.output))
 
   private[this] lazy val childMetadataAttributes = 
AttributeSeq(children.flatMap(_.metadataOutput))
 
-  private[this] lazy val outputAttributes = AttributeSeq(output)
+  private[this] lazy val outputAttributes = 
AttributeSeq.fromNormalOutput(output)
 
   private[this] lazy val outputMetadataAttributes = 
AttributeSeq(metadataOutput)
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 91726185090..f887361d508 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1997,7 +1997,8 @@ case class LateralJoin(
     }
   }
 
-  private[this] lazy val childAttributes = AttributeSeq(left.output ++ 
right.plan.output)
+  private[this] lazy val childAttributes = AttributeSeq.fromNormalOutput(
+    left.output ++ right.plan.output)
 
   private[this] lazy val childMetadataAttributes =
     AttributeSeq(left.metadataOutput ++ right.plan.metadataOutput)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index fe3a74f66a5..b0ac59c8cc7 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1524,4 +1524,10 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       assert(l.childOutputs == l.plan.output)
     }
   }
+
+  test("SPARK-43293: __qualified_access_only should be ignored in normal 
columns") {
+    val attr = $"a".int.markAsQualifiedAccessOnly()
+    val rel = LocalRelation(attr)
+    checkAnalysis(rel.select($"a"), rel.select(attr.markAsAllowAnyAccess()))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to