This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new af53ee455e21 [SPARK-50689][SQL] Enforce deterministic ordering in LCA
project lists
af53ee455e21 is described below
commit af53ee455e217ec59313555df33ebb4b038f76c7
Author: Mihailo Timotic <[email protected]>
AuthorDate: Fri Dec 27 23:18:56 2024 +0800
[SPARK-50689][SQL] Enforce deterministic ordering in LCA project lists
### What changes were proposed in this pull request?
Using `Set` to produce project lists may result in those projects being
non-deterministic. Instead we switch to using `LinkedHashSet`.
### Why are the changes needed?
It's better for the analyzer to produce stable query plans, regardless of
the java/scala versions.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
existing tests
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #49319 from mihailotim-db/mihailotim-db/linked_set_lca.
Authored-by: Mihailo Timotic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../catalyst/analysis/ResolveLateralColumnAliasReference.scala | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
index da8065eab606..677d852ebad5 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.analysis
+import java.util.LinkedHashSet
+
import org.apache.spark.sql.catalyst.expressions._
import
org.apache.spark.sql.catalyst.expressions.WindowExpression.hasWindowExpression
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
@@ -147,7 +149,7 @@ object ResolveLateralColumnAliasReference extends
Rule[LogicalPlan] {
&&
pOriginal.projectList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE))
=>
val p @ Project(projectList, child) = pOriginal.mapChildren(apply0)
var aliasMap = AttributeMap.empty[AliasEntry]
- val referencedAliases = collection.mutable.Set.empty[AliasEntry]
+ val referencedAliases = new LinkedHashSet[AliasEntry]
def unwrapLCAReference(e: NamedExpression): NamedExpression = {
e.transformWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
case lcaRef: LateralColumnAliasReference if
aliasMap.contains(lcaRef.a) =>
@@ -156,7 +158,7 @@ object ResolveLateralColumnAliasReference extends
Rule[LogicalPlan] {
// and unwrap the LateralColumnAliasReference to the
NamedExpression inside
// If there is chaining, don't resolve and save to future rounds
if
(!aliasEntry.alias.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
- referencedAliases += aliasEntry
+ referencedAliases.add(aliasEntry)
lcaRef.ne
} else {
lcaRef
@@ -182,7 +184,7 @@ object ResolveLateralColumnAliasReference extends
Rule[LogicalPlan] {
val outerProjectList = collection.mutable.Seq(newProjectList: _*)
val innerProjectList =
collection.mutable.ArrayBuffer(child.output.map(_.asInstanceOf[NamedExpression]):
_*)
- referencedAliases.foreach { case AliasEntry(alias: Alias, idx) =>
+ referencedAliases.forEach { case AliasEntry(alias: Alias, idx) =>
outerProjectList.update(idx, alias.toAttribute)
innerProjectList += alias
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]