gene-bordegaray commented on code in PR #23184:
URL: https://github.com/apache/datafusion/pull/23184#discussion_r3486673351
##########
datafusion/physical-expr/src/partitioning.rs:
##########
@@ -406,37 +391,56 @@ impl Partitioning {
}
}
- /// Returns true when `self` and `other` describe compatible partition
maps.
+ /// Returns true when two partitionings both satisfy their own distribution
+ /// requirements and can be paired by partition index.
+ ///
+ /// Use this for multi-input operators, such as partitioned joins, where
+ /// each child has a different schema, required [`Distribution`], and
+ /// expression-equivalence context.
+ ///
+ /// ```text
+ /// # co-partitioned: each side satisfies its own requirement, and
boundaries match
+ /// left: Range(left.a ASC, [10, 20]), required KeyPartitioned(left.a)
+ /// right: Range(right.x ASC, [10, 20]), required KeyPartitioned(right.x)
+ ///
+ /// # not compatible: right side does not satisfy a hash-specific
requirement
+ /// left: Range(left.a ASC, [10, 20]), required KeyPartitioned(left.a)
+ /// right: Range(right.x ASC, [10, 20]), required HashPartitioned(right.x)
///
- /// Compatible partition maps can be used for partition-local behavior: if
- /// this returns true, partition `i` from both partitionings can be treated
- /// as covering the same partition domain. This is stricter than
- /// [`Self::satisfaction`], which only answers whether this partitioning
can
- /// satisfy a required distribution.
- pub fn compatible_with(
+ /// # not compatible: boundaries differ
+ /// left: Range(left.a ASC, [10, 20]), required KeyPartitioned(left.a)
+ /// right: Range(right.x ASC, [15, 20]), required KeyPartitioned(right.x)
+ /// ```
+ pub fn co_partitioned_with(
&self,
- other: &Self,
+ required: &Distribution,
eq_properties: &EquivalenceProperties,
+ other: &Self,
+ other_required: &Distribution,
+ other_eq_properties: &EquivalenceProperties,
) -> bool {
+ if !self
+ .satisfaction(required, eq_properties, false)
+ .is_satisfied()
+ || !other
+ .satisfaction(other_required, other_eq_properties, false)
+ .is_satisfied()
+ {
+ return false;
+ }
+
if self.partition_count() == 1 && other.partition_count() == 1 {
return true;
}
+ if self.partition_count() != other.partition_count() {
+ return false;
+ }
+
match (self, other) {
- (
- Partitioning::Hash(left_exprs, left_count),
- Partitioning::Hash(right_exprs, right_count),
- ) => {
- if left_count != right_count {
- return false;
- }
- if left_exprs.is_empty() || right_exprs.is_empty() {
- return false;
- }
- equivalent_exprs(left_exprs, right_exprs, eq_properties)
- }
+ (Partitioning::Hash(_, _), Partitioning::Hash(_, _)) => true,
Review Comment:
This is only correct check before:
```rust
if !self
.satisfaction(required, eq_properties, false)
.is_satisfied()
|| !other
.satisfaction(other_required, other_eq_properties, false)
.is_satisfied()
{
return false;
}
```
since this makes sure each side satisfies its join distribution. So
`Hash(a)` vs `Hash(b, c, ...)` wouldnt reach this.
Something like this may up front be clenaer and more obvious though:
```rust
let left_satisfies = matches!(
self.satisfaction(required,
eq_properties, false),
PartitioningSatisfaction::Exact
);
let right_satisfies = matches!(
other.satisfaction(other_required,
other_eq_properties, false),
PartitioningSatisfaction::Exact
);
if !left_satisfies || !right_satisfies {
return false;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]