gene-bordegaray commented on code in PR #19304:
URL: https://github.com/apache/datafusion/pull/19304#discussion_r2621076500
##########
datafusion/physical-expr/src/partitioning.rs:
##########
@@ -339,4 +407,173 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_subset_partitioning_normal() -> Result<()> {
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("a", DataType::Int64, false),
+ Field::new("b", DataType::Int64, false),
+ Field::new("c", DataType::Int64, false),
+ ]));
+
+ let col_a: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("a", &schema)?);
+ let col_b: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("b", &schema)?);
+ let col_c: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("c", &schema)?);
+ let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+ let test_cases = vec![
+ // Overlap: requirement is superset of partitions
+ (
+ "Overlap: Hash([a]) vs Hash([a, b])",
+ Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ ]),
+ PartitioningSatisfaction::Subset,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ (
+ "Overlap: Hash([a]) vs Hash([a, b, c])",
+ Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ Arc::clone(&col_c),
+ ]),
+ PartitioningSatisfaction::Subset,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ (
+ "Overlap: Hash([a, b]) vs Hash([a, b, c])",
+ Partitioning::Hash(vec![Arc::clone(&col_a),
Arc::clone(&col_b)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ Arc::clone(&col_c),
+ ]),
+ PartitioningSatisfaction::Subset,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ // No overlap: partitions and requirements are disjoint
+ (
+ "No overlap: Hash([a]) vs Hash([b, c])",
+ Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_b),
+ Arc::clone(&col_c),
+ ]),
+ PartitioningSatisfaction::NotSatisfied,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ (
+ "No overlap: Hash([a, b]) vs Hash([c])",
+ Partitioning::Hash(vec![Arc::clone(&col_a),
Arc::clone(&col_b)], 4),
+ Distribution::HashPartitioned(vec![Arc::clone(&col_c)]),
+ PartitioningSatisfaction::NotSatisfied,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ // Partial overlap: not a strict subset
+ (
+ "Partial overlap: Hash([a, c]) vs Hash([a, b])",
+ Partitioning::Hash(vec![Arc::clone(&col_a),
Arc::clone(&col_c)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ ]),
+ PartitioningSatisfaction::NotSatisfied,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
Review Comment:
I check for Hash([]) although we should never see it, I can add a test for
covereage
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]