isidentical commented on code in PR #3880:
URL: https://github.com/apache/arrow-datafusion/pull/3880#discussion_r998491734
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -189,38 +198,55 @@ impl Optimizer {
where
F: FnMut(&LogicalPlan, &dyn OptimizerRule),
{
+ let mut plan_str = format!("{}", plan.display_indent());
let mut new_plan = plan.clone();
- log_plan("Optimizer input", plan);
+ let mut i = 0;
+ while i < optimizer_config.max_passes {
+ log_plan(&format!("Optimizer input (pass {})", i), &new_plan);
- for rule in &self.rules {
- let result = rule.optimize(&new_plan, optimizer_config);
- match result {
- Ok(plan) => {
- new_plan = plan;
- observer(&new_plan, rule.as_ref());
- log_plan(rule.name(), &new_plan);
- }
- Err(ref e) => {
- if optimizer_config.skip_failing_rules {
- // Note to future readers: if you see this warning it
signals a
- // bug in the DataFusion optimizer. Please consider
filing a ticket
- // https://github.com/apache/arrow-datafusion
- warn!(
+ for rule in &self.rules {
+ let result = rule.optimize(&new_plan, optimizer_config);
+ match result {
+ Ok(plan) => {
+ new_plan = plan;
+ observer(&new_plan, rule.as_ref());
+ log_plan(rule.name(), &new_plan);
+ }
+ Err(ref e) => {
+ if optimizer_config.skip_failing_rules {
+ // Note to future readers: if you see this warning
it signals a
+ // bug in the DataFusion optimizer. Please
consider filing a ticket
+ // https://github.com/apache/arrow-datafusion
+ warn!(
"Skipping optimizer rule '{}' due to unexpected
error: {}",
rule.name(),
e
);
- } else {
- return Err(DataFusionError::Internal(format!(
- "Optimizer rule '{}' failed due to unexpected
error: {}",
- rule.name(),
- e
- )));
+ } else {
+ return Err(DataFusionError::Internal(format!(
+ "Optimizer rule '{}' failed due to unexpected
error: {}",
+ rule.name(),
+ e
+ )));
+ }
}
}
}
+ log_plan(&format!("Optimized plan (pass {})", i), &new_plan);
+
+ // TODO this is an expensive way to see if the optimizer did
anything and
+ // it would be better to change the OptimizerRule trait to return
an Option
+ // instead
+ let new_plan_str = format!("{}", new_plan.display_indent());
+ if plan_str == new_plan_str {
+ // plan did not change, so no need to continue trying to
optimize
+ debug!("optimizer pass {} did not make changes", i);
+ break;
+ }
Review Comment:
+1! This is probably a follow-up item; but instead of having the optimizer
decide on this (by returning an option), it might also make sense to compute a
unique plan id (bottom up) so that we can also use this to detect optimization
cycles.
A very basic example is (assuming each letter is a unique plan id) `A -> B
-> C -> A -> B -> [max passes times more]`, where even though the previous plan
is different from the current one we would still need to exit the loop. Having
a unique id would mean we can just store a set somewhere and check against `if
known_plans.contains(new_plan.id)` and it would break the loop.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]