alamb commented on code in PR #15074:
URL: https://github.com/apache/datafusion/pull/15074#discussion_r1986048918


##########
datafusion/core/tests/physical_optimizer/enforce_distribution.rs:
##########
@@ -442,40 +445,30 @@ impl TestConfig {
         self.config.execution.target_partitions = target_partitions;
         self
     }
-}
-
-/// Runs the repartition optimizer and asserts the plan against the expected
-/// Arguments
-/// * `EXPECTED_LINES` - Expected output plan
-/// * `PLAN` - Input plan
-/// * `CONFIG` - [`TestConfig`]
-macro_rules! assert_optimized {
-    ($EXPECTED_LINES: expr, $PLAN: expr, $CONFIG: expr) => {
-        let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| 
*s).collect();
 
-        let TestConfig {
-            config,
-            optimizers_to_run,
-        } = $CONFIG;
-
-        // NOTE: These tests verify the joint `EnforceDistribution` + 
`EnforceSorting` cascade
-        //       because they were written prior to the separation of 
`BasicEnforcement` into
-        //       `EnforceSorting` and `EnforceDistribution`.
-        // TODO: Orthogonalize the tests here just to verify 
`EnforceDistribution` and create
-        //       new tests for the cascade.
+    /// Perform a series of runs using the current [`TestConfig`],
+    /// assert the expected plan result,
+    /// and return the result plan (for potentional subsequent runs).
+    fn run(
+        &self,
+        expected_lines: &[&str],
+        plan: Arc<dyn ExecutionPlan>,
+        optimizers_to_run: Vec<Run>,

Review Comment:
   I think this could be
   
   ```suggestion
           optimizers_to_run: &[Run],
   ```
   
   I think then you could avoid having to call `into()` as much. I gave this a 
try and it seems to work well. Here is a proposed PR (that targets this one):
   - https://github.com/influxdata/arrow-datafusion/pull/62
   



##########
datafusion/core/tests/physical_optimizer/enforce_distribution.rs:
##########
@@ -2918,30 +3188,61 @@ fn parallelization_sort_preserving_merge_with_union() 
-> Result<()> {
     let plan_parquet = sort_preserving_merge_exec(sort_key.clone(), 
input_parquet);
     let plan_csv = sort_preserving_merge_exec(sort_key, input_csv);
 
+    let test_config = TestConfig::default();
+
+    // Expected Outcome:
     // should not repartition (union doesn't benefit from increased 
parallelism)
     // should not sort (as the data was already sorted)
+
+    // Test: with parquet
     let expected_parquet = &[
         "SortPreservingMergeExec: [c@2 ASC]",
         "  UnionExec",
         "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, 
c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
         "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, 
c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
     ];
+    test_config.run(
+        expected_parquet,
+        plan_parquet.clone(),
+        DISTRIB_DISTRIB_SORT.into(),
+    )?;
+    let expected_parquet_first_sort_enforcement = &[

Review Comment:
   This looks like you added some coverage too (which is good, I just wanted to 
verify that was the case)



##########
datafusion/core/tests/physical_optimizer/enforce_distribution.rs:
##########
@@ -647,12 +630,18 @@ fn multi_hash_joins() -> Result<()> {
                         "      DataSourceExec: file_groups={1 group: [[x]]}, 
projection=[a, b, c, d, e], file_type=parquet",
                     ],
                 };
-                assert_optimized!(
-                    expected,
+
+                let test_config = TestConfig::default();
+                test_config.run(
+                    &expected,
                     top_join.clone(),
-                    &TestConfig::new(DoFirst::Distribution)
-                );
-                assert_optimized!(expected, top_join, 
&TestConfig::new(DoFirst::Sorting));
+                    DISTRIB_DISTRIB_SORT.into(), // same if distribution 
enforced before sort.

Review Comment:
   a minor nitpick is I don't think these extra comments add much additional 
value. It could be
   
   ```suggestion
                       DISTRIB_DISTRIB_SORT.into(), 
   ```



##########
datafusion/core/tests/physical_optimizer/enforce_distribution.rs:
##########
@@ -394,29 +394,32 @@ fn test_suite_default_config_options() -> ConfigOptions {
     config
 }
 
-/// How the optimizers are run.
 #[derive(PartialEq, Clone)]
-enum DoFirst {
-    /// Runs: (EnforceDistribution, EnforceDistribution, EnforceSorting)
+enum Run {
     Distribution,
-    /// Runs: (EnforceSorting, EnforceDistribution, EnforceDistribution)
     Sorting,
 }
 
+/// Standard sets of the series of optimizer runs:
+const DISTRIB_DISTRIB_SORT: [Run; 3] =
+    [Run::Distribution, Run::Distribution, Run::Sorting];
+const SORT_DISTRIB_DISTRIB: [Run; 3] =
+    [Run::Sorting, Run::Distribution, Run::Distribution];

Review Comment:
   Thank you -- I find this much clearer than having to look in a macro to know 
the runs are being invoked in a certain order



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to