This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 15045a8849 Introduce `Expr::is_volatile()`, adjust 
`TreeNode::exists()` (#10191)
15045a8849 is described below

commit 15045a8849e80327de68b1ec59e4a0644f05582c
Author: Peter Toth <[email protected]>
AuthorDate: Tue Apr 23 20:08:31 2024 +0200

    Introduce `Expr::is_volatile()`, adjust `TreeNode::exists()` (#10191)
---
 datafusion/common/src/tree_node.rs                   |  7 +++----
 datafusion/expr/src/expr.rs                          | 20 ++++++++++----------
 datafusion/optimizer/src/common_subexpr_eliminate.rs |  5 ++---
 datafusion/optimizer/src/push_down_filter.rs         |  5 +----
 datafusion/optimizer/src/utils.rs                    | 16 ----------------
 5 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/datafusion/common/src/tree_node.rs 
b/datafusion/common/src/tree_node.rs
index 7003f5ac7f..43026f3a92 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -405,18 +405,17 @@ pub trait TreeNode: Sized {
     /// Returns true if `f` returns true for any node in the tree.
     ///
     /// Stops recursion as soon as a matching node is found
-    fn exists<F: FnMut(&Self) -> bool>(&self, mut f: F) -> bool {
+    fn exists<F: FnMut(&Self) -> Result<bool>>(&self, mut f: F) -> 
Result<bool> {
         let mut found = false;
         self.apply(|n| {
-            Ok(if f(n) {
+            Ok(if f(n)? {
                 found = true;
                 TreeNodeRecursion::Stop
             } else {
                 TreeNodeRecursion::Continue
             })
         })
-        .unwrap();
-        found
+        .map(|_| found)
     }
 
     /// Low-level API used to implement other APIs.
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 6f76936806..ea2cfeafe6 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1271,7 +1271,16 @@ impl Expr {
 
     /// Return true when the expression contains out reference(correlated) 
expressions.
     pub fn contains_outer(&self) -> bool {
-        self.exists(|expr| matches!(expr, Expr::OuterReferenceColumn { .. }))
+        self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. 
})))
+            .unwrap()
+    }
+
+    /// Returns true if the expression is volatile, i.e. whether it can return 
different
+    /// results when evaluated multiple times with the same input.
+    pub fn is_volatile(&self) -> Result<bool> {
+        self.exists(|expr| {
+            Ok(matches!(expr, Expr::ScalarFunction(func) if 
func.func_def.is_volatile()?))
+        })
     }
 
     /// Recursively find all [`Expr::Placeholder`] expressions, and
@@ -1931,15 +1940,6 @@ fn create_names(exprs: &[Expr]) -> Result<String> {
         .join(", "))
 }
 
-/// Whether the given expression is volatile, i.e. whether it can return 
different results
-/// when evaluated multiple times with the same input.
-pub fn is_volatile(expr: &Expr) -> Result<bool> {
-    match expr {
-        Expr::ScalarFunction(func) => func.func_def.is_volatile(),
-        _ => Ok(false),
-    }
-}
-
 #[cfg(test)]
 mod test {
     use crate::expr::Cast;
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs 
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index b859dda9d5..081d9c2505 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -21,7 +21,6 @@ use std::collections::hash_map::Entry;
 use std::collections::{BTreeSet, HashMap};
 use std::sync::Arc;
 
-use crate::utils::is_volatile_expression;
 use crate::{utils, OptimizerConfig, OptimizerRule};
 
 use arrow::datatypes::{DataType, Field};
@@ -661,7 +660,7 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
     fn f_down(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
         // related to https://github.com/apache/datafusion/issues/8814
         // If the expr contain volatile expression or is a short-circuit 
expression, skip it.
-        if expr.short_circuits() || is_volatile_expression(expr)? {
+        if expr.short_circuits() || expr.is_volatile()? {
             self.visit_stack
                 .push(VisitRecord::JumpMark(self.node_count));
             return Ok(TreeNodeRecursion::Jump); // go to f_up
@@ -717,7 +716,7 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_> {
         // The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to 
generate
         // the `id_array`, which records the expr's identifier used to rewrite 
expr. So if we
         // skip an expr in `ExprIdentifierVisitor`, we should skip it here, 
too.
-        if expr.short_circuits() || is_volatile_expression(&expr)? {
+        if expr.short_circuits() || expr.is_volatile()? {
             return Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump));
         }
 
diff --git a/datafusion/optimizer/src/push_down_filter.rs 
b/datafusion/optimizer/src/push_down_filter.rs
index 950932f479..e1561ad9d6 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -18,7 +18,6 @@ use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
 use crate::optimizer::ApplyOrder;
-use crate::utils::is_volatile_expression;
 use crate::{OptimizerConfig, OptimizerRule};
 
 use datafusion_common::tree_node::{
@@ -705,9 +704,7 @@ impl OptimizerRule for PushDownFilter {
 
                             (qualified_name(qualifier, field.name()), expr)
                         })
-                        .partition(|(_, value)| {
-                            is_volatile_expression(value).unwrap_or(true)
-                        });
+                        .partition(|(_, value)| 
value.is_volatile().unwrap_or(true));
 
                 let mut push_predicates = vec![];
                 let mut keep_predicates = vec![];
diff --git a/datafusion/optimizer/src/utils.rs 
b/datafusion/optimizer/src/utils.rs
index aad89be7db..1c20501da5 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -21,9 +21,7 @@ use std::collections::{BTreeSet, HashMap};
 
 use crate::{OptimizerConfig, OptimizerRule};
 
-use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
 use datafusion_common::{Column, DFSchema, DFSchemaRef, Result};
-use datafusion_expr::expr::is_volatile;
 use datafusion_expr::expr_rewriter::replace_col;
 use datafusion_expr::utils as expr_utils;
 use datafusion_expr::{logical_plan::LogicalPlan, Expr, Operator};
@@ -97,20 +95,6 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) {
     trace!("{description}::\n{}\n", plan.display_indent_schema());
 }
 
-/// check whether the expression is volatile predicates
-pub(crate) fn is_volatile_expression(e: &Expr) -> Result<bool> {
-    let mut is_volatile_expr = false;
-    e.apply(|expr| {
-        Ok(if is_volatile(expr)? {
-            is_volatile_expr = true;
-            TreeNodeRecursion::Stop
-        } else {
-            TreeNodeRecursion::Continue
-        })
-    })?;
-    Ok(is_volatile_expr)
-}
-
 /// Splits a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
 ///
 /// See [`split_conjunction_owned`] for more details and an example.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to