This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 89a8576171 docs: Document that adding new optimizer rules are 
expensive (#20348)
89a8576171 is described below

commit 89a85761717cbe7523b4270dbdf7d16317133204
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Feb 23 06:21:17 2026 -0500

    docs: Document that adding new optimizer rules are expensive (#20348)
    
    ## Which issue does this PR close?
    
    - Similarly to https://github.com/apache/datafusion/pull/20346
    
    ## Rationale for this change
    
    As part of PR reviews, it seems like it is not obvious to some
    contributors that there is a non trivial cost to adding new optimizer
    rules. Let's add that knowledge into the codebase as comments, so it may
    be less of a surprise
    
    ## What changes are included in this PR?
    
    Add comments
    ## Are these changes tested?
    N/A
    ## Are there any user-facing changes?
    No this is entirely internal comments oly
    
    ---------
    
    Co-authored-by: Adrian Garcia Badaracco 
<[email protected]>
---
 datafusion/optimizer/src/optimizer.rs          | 9 +++++++++
 datafusion/physical-optimizer/src/optimizer.rs | 6 ++++++
 2 files changed, 15 insertions(+)

diff --git a/datafusion/optimizer/src/optimizer.rs 
b/datafusion/optimizer/src/optimizer.rs
index 118ddef49b..0da4d6352a 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -236,6 +236,15 @@ impl Default for Optimizer {
 impl Optimizer {
     /// Create a new optimizer using the recommended list of rules
     pub fn new() -> Self {
+        // NOTEs:
+        // - The order of rules in this list is important, as it determines the
+        //   order in which they are applied.
+        // - Adding a new rule here is expensive as it will be applied to all
+        //   queries, and will likely increase the optimization time. Please 
extend
+        //   existing rules when possible, rather than adding a new rule.
+        //   If you do add a new rule considering having aggressive no-op paths
+        //   (e.g. if the plan doesn't contain any of the nodes you are 
looking for
+        //    return `Transformed::no`; only works if you control the 
traversal).
         let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
             Arc::new(RewriteSetComparison::new()),
             Arc::new(OptimizeUnions::new()),
diff --git a/datafusion/physical-optimizer/src/optimizer.rs 
b/datafusion/physical-optimizer/src/optimizer.rs
index ff71c9ec64..49225db03a 100644
--- a/datafusion/physical-optimizer/src/optimizer.rs
+++ b/datafusion/physical-optimizer/src/optimizer.rs
@@ -82,6 +82,12 @@ impl Default for PhysicalOptimizer {
 impl PhysicalOptimizer {
     /// Create a new optimizer using the recommended list of rules
     pub fn new() -> Self {
+        // NOTEs:
+        // - The order of rules in this list is important, as it determines the
+        //   order in which they are applied.
+        // - Adding a new rule here is expensive as it will be applied to all
+        //   queries, and will likely increase the optimization time. Please 
extend
+        //   existing rules when possible, rather than adding a new rule.
         let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
             // If there is a output requirement of the query, make sure that
             // this information is not lost across different rules during 
optimization.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to