alamb commented on code in PR #4618:
URL: https://github.com/apache/arrow-datafusion/pull/4618#discussion_r1050032062
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -59,6 +59,11 @@ pub trait OptimizerRule {
/// A human readable name for this optimizer rule
fn name(&self) -> &str;
+
+ /// If a rule use default None, its should traverse recursively plan
inside itself
Review Comment:
```suggestion
/// How should the rule be applied by the optimizer? See comments on
[`ApplyOrder`] for details.
///
/// If a rule returns `None`, the default, its should traverse the plan
recursively inside itself
```
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -274,6 +284,77 @@ impl Optimizer {
debug!("Optimizer took {} ms", start_time.elapsed().as_millis());
Ok(new_plan)
}
+
+ fn optimize_node(
Review Comment:
What value does this function add? In other words, why not call
`rule.try_optimize` directly at the callsite 🤔
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -274,6 +284,77 @@ impl Optimizer {
debug!("Optimizer took {} ms", start_time.elapsed().as_millis());
Ok(new_plan)
}
+
+ fn optimize_node(
+ &self,
+ rule: &Arc<dyn OptimizerRule + Send + Sync>,
+ plan: &LogicalPlan,
+ optimizer_config: &mut OptimizerConfig,
+ ) -> Result<Option<LogicalPlan>> {
+ rule.try_optimize(plan, optimizer_config)
+ }
+
+ fn optimize_inputs(
+ &self,
+ rule: &Arc<dyn OptimizerRule + Send + Sync>,
+ plan: &LogicalPlan,
+ optimizer_config: &mut OptimizerConfig,
+ ) -> Result<Option<LogicalPlan>> {
+ let inputs = plan.inputs();
+ let result = inputs
+ .iter()
+ .map(|sub_plan| self.optimize_recursively(rule, sub_plan,
optimizer_config))
+ .collect::<Result<Vec<_>>>()?;
+ if result.is_empty() || result.iter().all(|o| o.is_none()) {
+ return Ok(None);
+ }
+
+ let new_inputs = result
+ .into_iter()
+ .enumerate()
+ .map(|(i, o)| match o {
+ Some(plan) => plan,
+ None => (*(inputs.get(i).unwrap())).clone(),
+ })
+ .collect::<Vec<_>>();
+
+ Ok(Some(plan.with_new_inputs(new_inputs.as_slice())?))
+ }
+
+ pub fn optimize_recursively(
Review Comment:
Can you please add docstrings about what this API does?
Also, I wonder if it needs to be `pub` or if it could be private 🤔
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -274,6 +284,77 @@ impl Optimizer {
debug!("Optimizer took {} ms", start_time.elapsed().as_millis());
Ok(new_plan)
}
+
+ fn optimize_node(
+ &self,
+ rule: &Arc<dyn OptimizerRule + Send + Sync>,
+ plan: &LogicalPlan,
+ optimizer_config: &mut OptimizerConfig,
+ ) -> Result<Option<LogicalPlan>> {
+ rule.try_optimize(plan, optimizer_config)
+ }
+
+ fn optimize_inputs(
+ &self,
+ rule: &Arc<dyn OptimizerRule + Send + Sync>,
+ plan: &LogicalPlan,
+ optimizer_config: &mut OptimizerConfig,
+ ) -> Result<Option<LogicalPlan>> {
+ let inputs = plan.inputs();
+ let result = inputs
+ .iter()
+ .map(|sub_plan| self.optimize_recursively(rule, sub_plan,
optimizer_config))
+ .collect::<Result<Vec<_>>>()?;
+ if result.is_empty() || result.iter().all(|o| o.is_none()) {
+ return Ok(None);
+ }
+
+ let new_inputs = result
+ .into_iter()
+ .enumerate()
+ .map(|(i, o)| match o {
+ Some(plan) => plan,
+ None => (*(inputs.get(i).unwrap())).clone(),
+ })
+ .collect::<Vec<_>>();
+
+ Ok(Some(plan.with_new_inputs(new_inputs.as_slice())?))
+ }
+
+ pub fn optimize_recursively(
+ &self,
+ rule: &Arc<dyn OptimizerRule + Send + Sync>,
+ plan: &LogicalPlan,
+ optimizer_config: &mut OptimizerConfig,
+ ) -> Result<Option<LogicalPlan>> {
+ match rule.apply_order() {
+ Some(order) => match order {
+ ApplyOrder::TopDown => {
+ let optimize_self_opt =
+ self.optimize_node(rule, plan, optimizer_config)?;
Review Comment:
I took a shot at rewriting this stuff to use Result::and_then, etc and I
think how you have written it here is clearer 👍
##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -147,6 +152,11 @@ pub struct Optimizer {
pub rules: Vec<Arc<dyn OptimizerRule + Send + Sync>>,
}
+pub enum ApplyOrder {
Review Comment:
I think we should add docstring comments to this struct, perhaps with some
examples, to illustrate what it means and how to use it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]