gene-bordegaray commented on code in PR #19304:
URL: https://github.com/apache/datafusion/pull/19304#discussion_r2632608198
##########
datafusion/physical-expr/src/partitioning.rs:
##########
@@ -339,4 +414,403 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_partitioning_satisfy_by_subset() -> Result<()> {
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("a", DataType::Int64, false),
+ Field::new("b", DataType::Int64, false),
+ Field::new("c", DataType::Int64, false),
+ ]));
+
+ let col_a: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("a", &schema)?);
+ let col_b: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("b", &schema)?);
+ let col_c: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("c", &schema)?);
+ let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+ let test_cases = vec![
+ (
+ "Hash([a]) vs Hash([a, b])",
+ Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ ]),
+ PartitioningSatisfaction::Subset,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ (
+ "Hash([a]) vs Hash([a, b, c])",
+ Partitioning::Hash(vec![Arc::clone(&col_a)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ Arc::clone(&col_c),
+ ]),
+ PartitioningSatisfaction::Subset,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ (
+ "Hash([a, b]) vs Hash([a, b, c])",
+ Partitioning::Hash(vec![Arc::clone(&col_a),
Arc::clone(&col_b)], 4),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ Arc::clone(&col_c),
+ ]),
+ PartitioningSatisfaction::Subset,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ ];
+
+ for (desc, partition, required, expected_with_subset,
expected_without_subset) in
+ test_cases
+ {
+ let result = partition.satisfy(&required, &eq_properties, true);
+ assert_eq!(
+ result, expected_with_subset,
+ "Failed for {desc} with subset enabled"
+ );
+
+ let result = partition.satisfy(&required, &eq_properties, false);
+ assert_eq!(
+ result, expected_without_subset,
+ "Failed for {desc} with subset disabled"
+ );
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_partitioning_current_superset() -> Result<()> {
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("a", DataType::Int64, false),
+ Field::new("b", DataType::Int64, false),
+ Field::new("c", DataType::Int64, false),
+ ]));
+
+ let col_a: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("a", &schema)?);
+ let col_b: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("b", &schema)?);
+ let col_c: Arc<dyn PhysicalExpr> =
+ Arc::new(Column::new_with_schema("c", &schema)?);
+ let eq_properties = EquivalenceProperties::new(Arc::clone(&schema));
+
+ let test_cases = vec![
+ (
+ "Hash([a, b]) vs Hash([a])",
+ Partitioning::Hash(vec![Arc::clone(&col_a),
Arc::clone(&col_b)], 4),
+ Distribution::HashPartitioned(vec![Arc::clone(&col_a)]),
+ PartitioningSatisfaction::NotSatisfied,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ (
+ "Hash([a, b, c]) vs Hash([a])",
+ Partitioning::Hash(
+ vec![Arc::clone(&col_a), Arc::clone(&col_b),
Arc::clone(&col_c)],
+ 4,
+ ),
+ Distribution::HashPartitioned(vec![Arc::clone(&col_a)]),
+ PartitioningSatisfaction::NotSatisfied,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ (
+ "Hash([a, b, c]) vs Hash([a, b])",
+ Partitioning::Hash(
+ vec![Arc::clone(&col_a), Arc::clone(&col_b),
Arc::clone(&col_c)],
+ 4,
+ ),
+ Distribution::HashPartitioned(vec![
+ Arc::clone(&col_a),
+ Arc::clone(&col_b),
+ ]),
+ PartitioningSatisfaction::NotSatisfied,
+ PartitioningSatisfaction::NotSatisfied,
+ ),
+ ];
+
+ for (desc, partition, required, expected_with_subset,
expected_without_subset) in
Review Comment:
was trying to make them more modular to have each test be dedicate to a
specific behavior of this property, think it would make it easier if anything
breaks.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]