gene-bordegaray commented on code in PR #18919:
URL: https://github.com/apache/datafusion/pull/18919#discussion_r2563351086
##########
datafusion/physical-optimizer/src/enforce_distribution.rs:
##########
@@ -1274,17 +1285,18 @@ pub fn ensure_distribution(
}
Distribution::HashPartitioned(exprs) => {
// See
https://github.com/apache/datafusion/issues/18341#issuecomment-3503238325 for
background
- if add_roundrobin && !hash_necessary {
- // Add round-robin repartitioning on top of the
operator
- // to increase parallelism.
- child = add_roundrobin_on_top(child,
target_partitions)?;
- }
- // When inserting hash is necessary to satisfy hash
requirement, insert hash repartition.
if hash_necessary {
+ // Hash repartition required to satisfy distribution
requirement
child =
add_hash_on_top(child, exprs.to_vec(),
target_partitions)?;
+ } else if add_roundrobin {
+ // Add round-robin repartitioning to increase
parallelism
+ child = add_roundrobin_on_top(child,
target_partitions)?;
}
}
+ Distribution::KeyPartitioned(_) => {
+ // Nothing to do: treated as satisfied upstream
+ }
Review Comment:
No-op because we can guarantee that our data is correctly distributed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]