isidentical commented on code in PR #3880:
URL: https://github.com/apache/arrow-datafusion/pull/3880#discussion_r998491734


##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -189,38 +198,55 @@ impl Optimizer {
     where
         F: FnMut(&LogicalPlan, &dyn OptimizerRule),
     {
+        let mut plan_str = format!("{}", plan.display_indent());
         let mut new_plan = plan.clone();
-        log_plan("Optimizer input", plan);
+        let mut i = 0;
+        while i < optimizer_config.max_passes {
+            log_plan(&format!("Optimizer input (pass {})", i), &new_plan);
 
-        for rule in &self.rules {
-            let result = rule.optimize(&new_plan, optimizer_config);
-            match result {
-                Ok(plan) => {
-                    new_plan = plan;
-                    observer(&new_plan, rule.as_ref());
-                    log_plan(rule.name(), &new_plan);
-                }
-                Err(ref e) => {
-                    if optimizer_config.skip_failing_rules {
-                        // Note to future readers: if you see this warning it 
signals a
-                        // bug in the DataFusion optimizer. Please consider 
filing a ticket
-                        // https://github.com/apache/arrow-datafusion
-                        warn!(
+            for rule in &self.rules {
+                let result = rule.optimize(&new_plan, optimizer_config);
+                match result {
+                    Ok(plan) => {
+                        new_plan = plan;
+                        observer(&new_plan, rule.as_ref());
+                        log_plan(rule.name(), &new_plan);
+                    }
+                    Err(ref e) => {
+                        if optimizer_config.skip_failing_rules {
+                            // Note to future readers: if you see this warning 
it signals a
+                            // bug in the DataFusion optimizer. Please 
consider filing a ticket
+                            // https://github.com/apache/arrow-datafusion
+                            warn!(
                             "Skipping optimizer rule '{}' due to unexpected 
error: {}",
                             rule.name(),
                             e
                         );
-                    } else {
-                        return Err(DataFusionError::Internal(format!(
-                            "Optimizer rule '{}' failed due to unexpected 
error: {}",
-                            rule.name(),
-                            e
-                        )));
+                        } else {
+                            return Err(DataFusionError::Internal(format!(
+                                "Optimizer rule '{}' failed due to unexpected 
error: {}",
+                                rule.name(),
+                                e
+                            )));
+                        }
                     }
                 }
             }
+            log_plan(&format!("Optimized plan (pass {})", i), &new_plan);
+
+            // TODO this is an expensive way to see if the optimizer did 
anything and
+            // it would be better to change the OptimizerRule trait to return 
an Option
+            // instead
+            let new_plan_str = format!("{}", new_plan.display_indent());
+            if plan_str == new_plan_str {
+                // plan did not change, so no need to continue trying to 
optimize
+                debug!("optimizer pass {} did not make changes", i);
+                break;
+            }

Review Comment:
   +1! This is probably a follow-up item; but instead of having the optimizer 
decide on this (by returning an option), it might also make sense to compute a 
unique plan id (bottom up) so that we can also use this to detect optimization 
cycles. 
   
   A very basic example is (assuming each letter is a unique plan id) `A -> B 
-> C -> A -> B -> [max passes times more]`, where even though the previous plan 
is different from the current one we would still need to exit the loop. Having 
a unique id would mean we can just store a set somewhere and check against `if 
known_plans.contains(new_plan.id)` and it would break the loop.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to