NGA-TRAN commented on code in PR #10259:
URL: https://github.com/apache/datafusion/pull/10259#discussion_r1581568671
##########
datafusion/core/src/physical_optimizer/enforce_distribution.rs:
##########
@@ -3097,7 +3110,67 @@ pub(crate) mod tests {
"ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c,
d, e]",
];
assert_optimized!(expected, plan.clone(), true);
- assert_optimized!(expected, plan, false);
+ assert_optimized!(expected, plan.clone(), false);
+
+ Ok(())
+ }
+
+ #[test]
+ fn union_not_to_interleave() -> Result<()> {
+ // group by (a as a1)
+ let left = aggregate_exec_with_alias(
+ parquet_exec(),
+ vec![("a".to_string(), "a1".to_string())],
+ );
+ // group by (a as a2)
+ let right = aggregate_exec_with_alias(
+ parquet_exec(),
+ vec![("a".to_string(), "a1".to_string())],
+ );
+
+ // Union
+ let plan = Arc::new(UnionExec::new(vec![left, right]));
+
+ // final agg
+ let plan =
+ aggregate_exec_with_alias(plan, vec![("a1".to_string(),
"a2".to_string())]);
+
+ // Only two RepartitionExecs added, no final RepartitionExec required
+ let expected = &[
+ "AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
+ "RepartitionExec: partitioning=Hash([a2@0], 10),
input_partitions=20",
+ "AggregateExec: mode=Partial, gby=[a1@0 as a2], aggr=[]",
+ "UnionExec",
+ "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+ "RepartitionExec: partitioning=Hash([a1@0], 10),
input_partitions=10",
+ "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+ "RepartitionExec: partitioning=RoundRobinBatch(10),
input_partitions=1",
+ "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c,
d, e]",
+ "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+ "RepartitionExec: partitioning=Hash([a1@0], 10),
input_partitions=10",
+ "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+ "RepartitionExec: partitioning=RoundRobinBatch(10),
input_partitions=1",
+ "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c,
d, e]",
+ ];
+ // no sort in the plan but since we need it as a parameter, make it
default false
+ let prefer_existing_sort = false;
+ let first_enforce_distribution = true;
+ let prefer_existing_union = true;
+
+ assert_optimized!(
+ expected,
+ plan.clone(),
+ first_enforce_distribution,
+ prefer_existing_sort,
+ prefer_existing_union
+ );
+ assert_optimized!(
+ expected,
+ plan,
+ !first_enforce_distribution,
+ prefer_existing_sort,
+ prefer_existing_union
Review Comment:
Observing the test `union_to_interleave` above it. If it covers all needed
cases, these 2 tests will cover all needed cases for the not-convert, too. Let
me know if it is not the case
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]