This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 89a8576171 docs: Document that adding new optimizer rules are
expensive (#20348)
89a8576171 is described below
commit 89a85761717cbe7523b4270dbdf7d16317133204
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Feb 23 06:21:17 2026 -0500
docs: Document that adding new optimizer rules are expensive (#20348)
## Which issue does this PR close?
- Similarly to https://github.com/apache/datafusion/pull/20346
## Rationale for this change
As part of PR reviews, it seems like it is not obvious to some
contributors that there is a non trivial cost to adding new optimizer
rules. Let's add that knowledge into the codebase as comments, so it may
be less of a surprise
## What changes are included in this PR?
Add comments
## Are these changes tested?
N/A
## Are there any user-facing changes?
No this is entirely internal comments oly
---------
Co-authored-by: Adrian Garcia Badaracco
<[email protected]>
---
datafusion/optimizer/src/optimizer.rs | 9 +++++++++
datafusion/physical-optimizer/src/optimizer.rs | 6 ++++++
2 files changed, 15 insertions(+)
diff --git a/datafusion/optimizer/src/optimizer.rs
b/datafusion/optimizer/src/optimizer.rs
index 118ddef49b..0da4d6352a 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -236,6 +236,15 @@ impl Default for Optimizer {
impl Optimizer {
/// Create a new optimizer using the recommended list of rules
pub fn new() -> Self {
+ // NOTEs:
+ // - The order of rules in this list is important, as it determines the
+ // order in which they are applied.
+ // - Adding a new rule here is expensive as it will be applied to all
+ // queries, and will likely increase the optimization time. Please
extend
+ // existing rules when possible, rather than adding a new rule.
+ // If you do add a new rule considering having aggressive no-op paths
+ // (e.g. if the plan doesn't contain any of the nodes you are
looking for
+ // return `Transformed::no`; only works if you control the
traversal).
let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
Arc::new(RewriteSetComparison::new()),
Arc::new(OptimizeUnions::new()),
diff --git a/datafusion/physical-optimizer/src/optimizer.rs
b/datafusion/physical-optimizer/src/optimizer.rs
index ff71c9ec64..49225db03a 100644
--- a/datafusion/physical-optimizer/src/optimizer.rs
+++ b/datafusion/physical-optimizer/src/optimizer.rs
@@ -82,6 +82,12 @@ impl Default for PhysicalOptimizer {
impl PhysicalOptimizer {
/// Create a new optimizer using the recommended list of rules
pub fn new() -> Self {
+ // NOTEs:
+ // - The order of rules in this list is important, as it determines the
+ // order in which they are applied.
+ // - Adding a new rule here is expensive as it will be applied to all
+ // queries, and will likely increase the optimization time. Please
extend
+ // existing rules when possible, rather than adding a new rule.
let rules: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>> = vec![
// If there is a output requirement of the query, make sure that
// this information is not lost across different rules during
optimization.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]