This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 021f02e02fb [SPARK-43293][SQL] `__qualified_access_only` should be
ignored in normal columns
021f02e02fb is described below
commit 021f02e02fb88bbbccd810ae000e14e0c854e2e6
Author: Wenchen Fan <[email protected]>
AuthorDate: Thu Apr 27 10:52:03 2023 +0800
[SPARK-43293][SQL] `__qualified_access_only` should be ignored in normal
columns
### What changes were proposed in this pull request?
This is a followup of https://github.com/apache/spark/pull/39596 to fix
more corner cases. It ignores the special column flag that requires qualified
access for normal output attributes, as the flag should be effective only to
metadata columns.
### Why are the changes needed?
It's very hard to make sure that we don't leak the special column flag.
Since the bug has been in the Spark release for a while, there may be tables
created with CTAS and the table schema contains the special flag.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
new analysis test
Closes #40961 from cloud-fan/col.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/catalyst/expressions/package.scala | 9 +++++++++
.../apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala | 4 ++--
.../spark/sql/catalyst/plans/logical/basicLogicalOperators.scala | 3 ++-
.../org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala | 6 ++++++
4 files changed, 19 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 67936c36b41..b32ef3d95aa 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -77,6 +77,15 @@ package object expressions {
override def apply(row: InternalRow): InternalRow = row
}
+ object AttributeSeq {
+ def fromNormalOutput(attr: Seq[Attribute]): AttributeSeq = {
+ // Normal output attributes should never have the special flag that
allows only qualified
+ // access. In case something goes wrong, like a scan relation from a
custom data source,
+ // we explicitly remove that special flag to be safe.
+ new AttributeSeq(attr.map(_.markAsAllowAnyAccess()))
+ }
+ }
+
/**
* Helper functions for working with `Seq[Attribute]`.
*/
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index e11bd77fe04..374eb070db1 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -125,11 +125,11 @@ abstract class LogicalPlan
}
}
- private[this] lazy val childAttributes =
AttributeSeq(children.flatMap(_.output))
+ private[this] lazy val childAttributes =
AttributeSeq.fromNormalOutput(children.flatMap(_.output))
private[this] lazy val childMetadataAttributes =
AttributeSeq(children.flatMap(_.metadataOutput))
- private[this] lazy val outputAttributes = AttributeSeq(output)
+ private[this] lazy val outputAttributes =
AttributeSeq.fromNormalOutput(output)
private[this] lazy val outputMetadataAttributes =
AttributeSeq(metadataOutput)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 91726185090..f887361d508 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1997,7 +1997,8 @@ case class LateralJoin(
}
}
- private[this] lazy val childAttributes = AttributeSeq(left.output ++
right.plan.output)
+ private[this] lazy val childAttributes = AttributeSeq.fromNormalOutput(
+ left.output ++ right.plan.output)
private[this] lazy val childMetadataAttributes =
AttributeSeq(left.metadataOutput ++ right.plan.metadataOutput)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index fe3a74f66a5..b0ac59c8cc7 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1524,4 +1524,10 @@ class AnalysisSuite extends AnalysisTest with Matchers {
assert(l.childOutputs == l.plan.output)
}
}
+
+ test("SPARK-43293: __qualified_access_only should be ignored in normal
columns") {
+ val attr = $"a".int.markAsQualifiedAccessOnly()
+ val rel = LocalRelation(attr)
+ checkAnalysis(rel.select($"a"), rel.select(attr.markAsAllowAnyAccess()))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]