alamb commented on code in PR #4645:
URL: https://github.com/apache/arrow-datafusion/pull/4645#discussion_r1050017494


##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -54,33 +55,53 @@ pub trait OptimizerRule {
     fn try_optimize(
         &self,
         plan: &LogicalPlan,
-        optimizer_config: &mut OptimizerConfig,
+        config: &dyn OptimizerConfig,
     ) -> Result<Option<LogicalPlan>> {
-        self.optimize(plan, optimizer_config).map(Some)
+        self.optimize(plan, config).map(Some)
     }
 
     /// Rewrite `plan` to an optimized form. This method will eventually be 
deprecated and
     /// replace by `try_optimize`.
     fn optimize(
         &self,
         plan: &LogicalPlan,
-        optimizer_config: &mut OptimizerConfig,
+        config: &dyn OptimizerConfig,
     ) -> Result<LogicalPlan>;
 
     /// A human readable name for this optimizer rule
     fn name(&self) -> &str;
 }
 
 /// Options to control the DataFusion Optimizer.
+pub trait OptimizerConfig {
+    /// Return the time at which the query execution started. This
+    /// time is used as the value for now()
+    fn query_execution_start_time(&self) -> DateTime<Utc>;
+
+    /// Returns false if the given rule should be skipped
+    fn rule_enabled(&self, name: &str) -> bool;
+
+    /// The optimizer will skip failing rules if this returns true
+    fn skip_failing_rules(&self) -> bool;
+
+    /// How many times to attempt to optimize the plan
+    fn max_passes(&self) -> u8;
+
+    /// Return a unique ID
+    ///
+    /// This is useful for assigning unique names to aliases
+    fn next_id(&self) -> usize;
+}
+
+/// A standalone [`OptimizerConfig`] that can be used independently
+/// of DataFusion's config management
 #[derive(Debug)]
-pub struct OptimizerConfig {
+pub struct OptimizerContext {
     /// Query execution start time that can be used to rewrite
     /// expressions such as `now()` to use a literal value instead
     query_execution_start_time: DateTime<Utc>,
     /// id generator for optimizer passes
-    // TODO this should not be on the config,
-    // it should be its own 'OptimizerState' or something)
-    next_id: usize,
+    next_id: AtomicUsize,

Review Comment:
   I agree -- cc @waynexia 



##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -209,11 +245,15 @@ impl Optimizer {
         let mut plan_str = format!("{}", plan.display_indent());
         let mut new_plan = plan.clone();
         let mut i = 0;
-        while i < optimizer_config.max_passes {
+        while i < config.max_passes() {
             log_plan(&format!("Optimizer input (pass {})", i), &new_plan);
 
             for rule in &self.rules {
-                let result = rule.try_optimize(&new_plan, optimizer_config);
+                if !config.rule_enabled(rule.name()) {
+                    continue;

Review Comment:
   I think this is an important decision to log; Can we add `debug!`?
   
   ```suggestion
                       debug!("Skipping rule {} due to optimizer config", 
rule.name())
                       continue;
   ```



##########
datafusion/core/src/execution/context.rs:
##########
@@ -1557,14 +1557,6 @@ impl SessionState {
                 .register_catalog(config.default_catalog.clone(), 
default_catalog);
         }
 
-        let optimizer_config = OptimizerConfig::new().filter_null_keys(

Review Comment:
   👍 



##########
datafusion/optimizer/src/optimizer.rs:
##########
@@ -54,33 +55,53 @@ pub trait OptimizerRule {
     fn try_optimize(
         &self,
         plan: &LogicalPlan,
-        optimizer_config: &mut OptimizerConfig,
+        config: &dyn OptimizerConfig,
     ) -> Result<Option<LogicalPlan>> {
-        self.optimize(plan, optimizer_config).map(Some)
+        self.optimize(plan, config).map(Some)
     }
 
     /// Rewrite `plan` to an optimized form. This method will eventually be 
deprecated and
     /// replace by `try_optimize`.
     fn optimize(
         &self,
         plan: &LogicalPlan,
-        optimizer_config: &mut OptimizerConfig,
+        config: &dyn OptimizerConfig,
     ) -> Result<LogicalPlan>;
 
     /// A human readable name for this optimizer rule
     fn name(&self) -> &str;
 }
 
 /// Options to control the DataFusion Optimizer.
+pub trait OptimizerConfig {
+    /// Return the time at which the query execution started. This
+    /// time is used as the value for now()
+    fn query_execution_start_time(&self) -> DateTime<Utc>;
+
+    /// Returns false if the given rule should be skipped
+    fn rule_enabled(&self, name: &str) -> bool;
+
+    /// The optimizer will skip failing rules if this returns true
+    fn skip_failing_rules(&self) -> bool;
+
+    /// How many times to attempt to optimize the plan
+    fn max_passes(&self) -> u8;
+
+    /// Return a unique ID
+    ///
+    /// This is useful for assigning unique names to aliases
+    fn next_id(&self) -> usize;
+}
+
+/// A standalone [`OptimizerConfig`] that can be used independently
+/// of DataFusion's config management
 #[derive(Debug)]
-pub struct OptimizerConfig {
+pub struct OptimizerContext {
     /// Query execution start time that can be used to rewrite
     /// expressions such as `now()` to use a literal value instead
     query_execution_start_time: DateTime<Utc>,
     /// id generator for optimizer passes
-    // TODO this should not be on the config,
-    // it should be its own 'OptimizerState' or something)
-    next_id: usize,
+    next_id: AtomicUsize,

Review Comment:
   It is also good that `OptimizerContext` doesn't implement `Clone` which will 
discourage making copies that could get out of date



##########
datafusion/core/src/execution/context.rs:
##########
@@ -1741,32 +1733,37 @@ impl SessionState {
 
     /// Optimizes the logical plan by applying optimizer rules.
     pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
-        let mut optimizer_config = OptimizerConfig::new()
-            .with_skip_failing_rules(
-                self.config
-                    .config_options
-                    .read()
-                    .get_bool(OPT_OPTIMIZER_SKIP_FAILED_RULES)
-                    .unwrap_or_default(),
-            )
-            .with_max_passes(
-                self.config
-                    .config_options
-                    .read()
-                    .get_u64(OPT_OPTIMIZER_MAX_PASSES)
-                    .unwrap_or_default() as u8,
-            )
-            .with_query_execution_start_time(
-                self.execution_props.query_execution_start_time,
-            );
+        // TODO: Implement OptimizerContext directly on DataFrame (#4631) 
(#4626)

Review Comment:
   I think having the sub crates depend on a traits that are implemented in the 
core trait sounds like a good idea to me



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to