This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new c2f3d6541f Support API for "pre-image" for pruning predicate 
evaluation (#19722)
c2f3d6541f is described below

commit c2f3d6541f898d13af24153f102388aa95e3167b
Author: Kosta Tarasov <[email protected]>
AuthorDate: Thu Jan 22 18:01:55 2026 -0500

    Support API for "pre-image" for pruning predicate evaluation (#19722)
    
    ## Which issue does this PR close?
    
    - closes #18320
    
    ## Rationale for this change
    
    Splitting the PR to make it more readable.
    
    ## What changes are included in this PR?
    
    Adding the udf_preimage logic without date_part implementation.
    
    ## Are these changes tested?
    
    Added unit tests for a test specific function
    
    ## Are there any user-facing changes?
    
    No
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/expr/src/lib.rs                         |   1 +
 .../mod.rs => expr/src/preimage.rs}                |  29 +-
 datafusion/expr/src/udf.rs                         |  48 +++
 .../src/simplify_expressions/expr_simplifier.rs    |  80 ++++-
 .../optimizer/src/simplify_expressions/mod.rs      |   1 +
 .../src/simplify_expressions/udf_preimage.rs       | 364 +++++++++++++++++++++
 6 files changed, 502 insertions(+), 21 deletions(-)

diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 4fb78933d7..978e9f6275 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -77,6 +77,7 @@ pub mod statistics {
     pub use datafusion_expr_common::statistics::*;
 }
 mod predicate_bounds;
+pub mod preimage;
 pub mod ptr_eq;
 pub mod test;
 pub mod tree_node;
diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs 
b/datafusion/expr/src/preimage.rs
similarity index 57%
copy from datafusion/optimizer/src/simplify_expressions/mod.rs
copy to datafusion/expr/src/preimage.rs
index 3ab76119cc..67ca7a91bb 100644
--- a/datafusion/optimizer/src/simplify_expressions/mod.rs
+++ b/datafusion/expr/src/preimage.rs
@@ -15,24 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`SimplifyExpressions`] simplifies expressions in the logical plan,
-//! [`ExprSimplifier`] simplifies individual `Expr`s.
+use datafusion_expr_common::interval_arithmetic::Interval;
 
-pub mod expr_simplifier;
-mod inlist_simplifier;
-mod regex;
-pub mod simplify_exprs;
-pub mod simplify_literal;
-mod simplify_predicates;
-mod unwrap_cast;
-mod utils;
+use crate::Expr;
 
-// backwards compatibility
-pub use datafusion_expr::simplify::SimplifyContext;
-
-pub use expr_simplifier::*;
-pub use simplify_exprs::*;
-pub use simplify_predicates::simplify_predicates;
-
-// Export for test in datafusion/core/tests/optimizer_integration.rs
-pub use datafusion_expr::expr_rewriter::GuaranteeRewriter;
+/// Return from [`crate::ScalarUDFImpl::preimage`]
+pub enum PreimageResult {
+    /// No preimage exists for the specified value
+    None,
+    /// The expression always evaluates to the specified constant
+    /// given that `expr` is within the interval
+    Range { expr: Expr, interval: Box<Interval> },
+}
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 0654370ac7..870e318a62 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -19,6 +19,7 @@
 
 use crate::async_udf::AsyncScalarUDF;
 use crate::expr::schema_name_from_exprs_comma_separated_without_space;
+use crate::preimage::PreimageResult;
 use crate::simplify::{ExprSimplifyResult, SimplifyContext};
 use crate::sort_properties::{ExprProperties, SortProperties};
 use crate::udf_eq::UdfEq;
@@ -232,6 +233,18 @@ impl ScalarUDF {
         self.inner.is_nullable(args, schema)
     }
 
+    /// Return a preimage
+    ///
+    /// See [`ScalarUDFImpl::preimage`] for more details.
+    pub fn preimage(
+        &self,
+        args: &[Expr],
+        lit_expr: &Expr,
+        info: &SimplifyContext,
+    ) -> Result<PreimageResult> {
+        self.inner.preimage(args, lit_expr, info)
+    }
+
     /// Invoke the function on `args`, returning the appropriate result.
     ///
     /// See [`ScalarUDFImpl::invoke_with_args`] for details.
@@ -696,6 +709,32 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + 
Sync {
         Ok(ExprSimplifyResult::Original(args))
     }
 
+    /// Returns the [preimage] for this function and the specified scalar 
value, if any.
+    ///
+    /// A preimage is a single contiguous [`Interval`] of values where the 
function
+    /// will always return `lit_value`
+    ///
+    /// Implementations should return intervals with an inclusive lower bound 
and
+    /// exclusive upper bound.
+    ///
+    /// This rewrite is described in the [ClickHouse Paper] and is particularly
+    /// useful for simplifying expressions `date_part` or equivalent 
functions. The
+    /// idea is that if you have an expression like `date_part(YEAR, k) = 
2024` and you
+    /// can find a [preimage] for `date_part(YEAR, k)`, which is the range of 
dates
+    /// covering the entire year of 2024. Thus, you can rewrite the expression 
to `k
+    /// >= '2024-01-01' AND k < '2025-01-01' which is often more optimizable.
+    ///
+    /// [ClickHouse Paper]:  https://www.vldb.org/pvldb/vol17/p3731-schulze.pdf
+    /// [preimage]: 
https://en.wikipedia.org/wiki/Image_(mathematics)#Inverse_image
+    fn preimage(
+        &self,
+        _args: &[Expr],
+        _lit_expr: &Expr,
+        _info: &SimplifyContext,
+    ) -> Result<PreimageResult> {
+        Ok(PreimageResult::None)
+    }
+
     /// Returns true if some of this `exprs` subexpressions may not be 
evaluated
     /// and thus any side effects (like divide by zero) may not be encountered.
     ///
@@ -926,6 +965,15 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
         self.inner.simplify(args, info)
     }
 
+    fn preimage(
+        &self,
+        args: &[Expr],
+        lit_expr: &Expr,
+        info: &SimplifyContext,
+    ) -> Result<PreimageResult> {
+        self.inner.preimage(args, lit_expr, info)
+    }
+
     fn conditional_arguments<'a>(
         &self,
         args: &'a [Expr],
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs 
b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index b9ef69dd08..7bbb7e79d1 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -39,7 +39,7 @@ use datafusion_common::{
 };
 use datafusion_expr::{
     BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility, and,
-    binary::BinaryTypeCoercer, lit, or,
+    binary::BinaryTypeCoercer, lit, or, preimage::PreimageResult,
 };
 use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult};
 use datafusion_expr::{expr::ScalarFunction, 
interval_arithmetic::NullableInterval};
@@ -51,7 +51,6 @@ use datafusion_physical_expr::{create_physical_expr, 
execution_props::ExecutionP
 
 use super::inlist_simplifier::ShortenInListSimplifier;
 use super::utils::*;
-use crate::analyzer::type_coercion::TypeCoercionRewriter;
 use crate::simplify_expressions::SimplifyContext;
 use crate::simplify_expressions::regex::simplify_regex_expr;
 use crate::simplify_expressions::unwrap_cast::{
@@ -59,6 +58,10 @@ use crate::simplify_expressions::unwrap_cast::{
     is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
     unwrap_cast_in_comparison_for_binary,
 };
+use crate::{
+    analyzer::type_coercion::TypeCoercionRewriter,
+    simplify_expressions::udf_preimage::rewrite_with_preimage,
+};
 use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map;
 use datafusion_expr_common::casts::try_cast_literal_to_type;
 use indexmap::IndexSet;
@@ -1969,12 +1972,85 @@ impl TreeNodeRewriter for Simplifier<'_> {
                 }))
             }
 
+            // =======================================
+            // preimage_in_comparison
+            // =======================================
+            //
+            // For case:
+            // date_part('YEAR', expr) op literal
+            //
+            // For details see datafusion_expr::ScalarUDFImpl::preimage
+            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
+                use datafusion_expr::Operator::*;
+                let is_preimage_op = matches!(
+                    op,
+                    Eq | NotEq
+                        | Lt
+                        | LtEq
+                        | Gt
+                        | GtEq
+                        | IsDistinctFrom
+                        | IsNotDistinctFrom
+                );
+                if !is_preimage_op || is_null(&right) {
+                    return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
+                        left,
+                        op,
+                        right,
+                    })));
+                }
+
+                if let PreimageResult::Range { interval, expr } =
+                    get_preimage(left.as_ref(), right.as_ref(), info)?
+                {
+                    rewrite_with_preimage(*interval, op, expr)?
+                } else if let Some(swapped) = op.swap() {
+                    if let PreimageResult::Range { interval, expr } =
+                        get_preimage(right.as_ref(), left.as_ref(), info)?
+                    {
+                        rewrite_with_preimage(*interval, swapped, expr)?
+                    } else {
+                        Transformed::no(Expr::BinaryExpr(BinaryExpr { left, 
op, right }))
+                    }
+                } else {
+                    Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, 
right }))
+                }
+            }
+
             // no additional rewrites possible
             expr => Transformed::no(expr),
         })
     }
 }
 
+fn get_preimage(
+    left_expr: &Expr,
+    right_expr: &Expr,
+    info: &SimplifyContext,
+) -> Result<PreimageResult> {
+    let Expr::ScalarFunction(ScalarFunction { func, args }) = left_expr else {
+        return Ok(PreimageResult::None);
+    };
+    if !is_literal_or_literal_cast(right_expr) {
+        return Ok(PreimageResult::None);
+    }
+    if func.signature().volatility != Volatility::Immutable {
+        return Ok(PreimageResult::None);
+    }
+    func.preimage(args, right_expr, info)
+}
+
+fn is_literal_or_literal_cast(expr: &Expr) -> bool {
+    match expr {
+        Expr::Literal(_, _) => true,
+        Expr::Cast(Cast { expr, .. }) => matches!(expr.as_ref(), 
Expr::Literal(_, _)),
+        Expr::TryCast(TryCast { expr, .. }) => {
+            matches!(expr.as_ref(), Expr::Literal(_, _))
+        }
+        _ => false,
+    }
+}
+
 fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> {
     match expr {
         Expr::Literal(ScalarValue::Utf8(s), _) => Some((DataType::Utf8, s)),
diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs 
b/datafusion/optimizer/src/simplify_expressions/mod.rs
index 3ab76119cc..b85b000821 100644
--- a/datafusion/optimizer/src/simplify_expressions/mod.rs
+++ b/datafusion/optimizer/src/simplify_expressions/mod.rs
@@ -24,6 +24,7 @@ mod regex;
 pub mod simplify_exprs;
 pub mod simplify_literal;
 mod simplify_predicates;
+mod udf_preimage;
 mod unwrap_cast;
 mod utils;
 
diff --git a/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs 
b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs
new file mode 100644
index 0000000000..e0837196ca
--- /dev/null
+++ b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs
@@ -0,0 +1,364 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::{Result, internal_err, tree_node::Transformed};
+use datafusion_expr::{Expr, Operator, and, lit, or};
+use datafusion_expr_common::interval_arithmetic::Interval;
+
+/// Rewrites a binary expression using its "preimage"
+///
+/// Specifically it rewrites expressions of the form `<expr> OP x` (e.g. 
`<expr> =
+/// x`) where `<expr>` is known to have a pre-image (aka the entire single
+/// range for which it is valid) and `x` is not `NULL`
+///
+/// For details see [`datafusion_expr::ScalarUDFImpl::preimage`]
+///
+pub(super) fn rewrite_with_preimage(
+    preimage_interval: Interval,
+    op: Operator,
+    expr: Expr,
+) -> Result<Transformed<Expr>> {
+    let (lower, upper) = preimage_interval.into_bounds();
+    let (lower, upper) = (lit(lower), lit(upper));
+
+    let rewritten_expr = match op {
+        // <expr> < x   ==>  <expr> < lower
+        Operator::Lt => expr.lt(lower),
+        // <expr> >= x  ==>  <expr> >= lower
+        Operator::GtEq => expr.gt_eq(lower),
+        // <expr> > x ==> <expr> >= upper
+        Operator::Gt => expr.gt_eq(upper),
+        // <expr> <= x ==> <expr> < upper
+        Operator::LtEq => expr.lt(upper),
+        // <expr> = x ==> (<expr> >= lower) and (<expr> < upper)
+        Operator::Eq => and(expr.clone().gt_eq(lower), expr.lt(upper)),
+        // <expr> != x ==> (<expr> < lower) or (<expr> >= upper)
+        Operator::NotEq => or(expr.clone().lt(lower), expr.gt_eq(upper)),
+        // <expr> is not distinct from x ==> (<expr> is NULL and x is NULL) or 
((<expr> >= lower) and (<expr> < upper))
+        // but since x is always not NULL => (<expr> is not NULL) and (<expr> 
>= lower) and (<expr> < upper)
+        Operator::IsNotDistinctFrom => expr
+            .clone()
+            .is_not_null()
+            .and(expr.clone().gt_eq(lower))
+            .and(expr.lt(upper)),
+        // <expr> is distinct from x ==> (<expr> < lower) or (<expr> >= upper) 
or (<expr> is NULL and x is not NULL) or (<expr> is not NULL and x is NULL)
+        // but given that x is always not NULL => (<expr> < lower) or (<expr> 
>= upper) or (<expr> is NULL)
+        Operator::IsDistinctFrom => expr
+            .clone()
+            .lt(lower)
+            .or(expr.clone().gt_eq(upper))
+            .or(expr.is_null()),
+        _ => return internal_err!("Expect comparison operators"),
+    };
+    Ok(Transformed::yes(rewritten_expr))
+}
+
+#[cfg(test)]
+mod test {
+    use std::any::Any;
+    use std::sync::Arc;
+
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue};
+    use datafusion_expr::{
+        ColumnarValue, Expr, Operator, ScalarFunctionArgs, ScalarUDF, 
ScalarUDFImpl,
+        Signature, Volatility, and, binary_expr, col, lit, 
preimage::PreimageResult,
+        simplify::SimplifyContext,
+    };
+
+    use super::Interval;
+    use crate::simplify_expressions::ExprSimplifier;
+
+    fn is_distinct_from(left: Expr, right: Expr) -> Expr {
+        binary_expr(left, Operator::IsDistinctFrom, right)
+    }
+
+    fn is_not_distinct_from(left: Expr, right: Expr) -> Expr {
+        binary_expr(left, Operator::IsNotDistinctFrom, right)
+    }
+
+    #[derive(Debug, PartialEq, Eq, Hash)]
+    struct PreimageUdf {
+        /// Defaults to an exact signature with one Int32 argument and 
Immutable volatility
+        signature: Signature,
+        /// If true, returns a preimage; otherwise, returns None
+        enabled: bool,
+    }
+
+    impl PreimageUdf {
+        fn new() -> Self {
+            Self {
+                signature: Signature::exact(vec![DataType::Int32], 
Volatility::Immutable),
+                enabled: true,
+            }
+        }
+
+        /// Set the enabled flag
+        fn with_enabled(mut self, enabled: bool) -> Self {
+            self.enabled = enabled;
+            self
+        }
+
+        /// Set the volatility
+        fn with_volatility(mut self, volatility: Volatility) -> Self {
+            self.signature.volatility = volatility;
+            self
+        }
+    }
+
+    impl ScalarUDFImpl for PreimageUdf {
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn name(&self) -> &str {
+            "preimage_func"
+        }
+
+        fn signature(&self) -> &Signature {
+            &self.signature
+        }
+
+        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+            Ok(DataType::Int32)
+        }
+
+        fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+            Ok(ColumnarValue::Scalar(ScalarValue::Int32(Some(500))))
+        }
+
+        fn preimage(
+            &self,
+            args: &[Expr],
+            lit_expr: &Expr,
+            _info: &SimplifyContext,
+        ) -> Result<PreimageResult> {
+            if !self.enabled {
+                return Ok(PreimageResult::None);
+            }
+            if args.len() != 1 {
+                return Ok(PreimageResult::None);
+            }
+
+            let expr = args.first().cloned().expect("Should be column 
expression");
+            match lit_expr {
+                Expr::Literal(ScalarValue::Int32(Some(500)), _) => {
+                    Ok(PreimageResult::Range {
+                        expr,
+                        interval: Box::new(Interval::try_new(
+                            ScalarValue::Int32(Some(100)),
+                            ScalarValue::Int32(Some(200)),
+                        )?),
+                    })
+                }
+                _ => Ok(PreimageResult::None),
+            }
+        }
+    }
+
+    fn optimize_test(expr: Expr, schema: &DFSchemaRef) -> Expr {
+        let simplify_context = 
SimplifyContext::default().with_schema(Arc::clone(schema));
+        ExprSimplifier::new(simplify_context)
+            .simplify(expr)
+            .unwrap()
+    }
+
+    fn preimage_udf_expr() -> Expr {
+        ScalarUDF::new_from_impl(PreimageUdf::new()).call(vec![col("x")])
+    }
+
+    fn non_immutable_udf_expr() -> Expr {
+        
ScalarUDF::new_from_impl(PreimageUdf::new().with_volatility(Volatility::Volatile))
+            .call(vec![col("x")])
+    }
+
+    fn no_preimage_udf_expr() -> Expr {
+        ScalarUDF::new_from_impl(PreimageUdf::new().with_enabled(false))
+            .call(vec![col("x")])
+    }
+
+    fn test_schema() -> DFSchemaRef {
+        Arc::new(
+            DFSchema::from_unqualified_fields(
+                vec![Field::new("x", DataType::Int32, true)].into(),
+                Default::default(),
+            )
+            .unwrap(),
+        )
+    }
+
+    fn test_schema_xy() -> DFSchemaRef {
+        Arc::new(
+            DFSchema::from_unqualified_fields(
+                vec![
+                    Field::new("x", DataType::Int32, false),
+                    Field::new("y", DataType::Int32, false),
+                ]
+                .into(),
+                Default::default(),
+            )
+            .unwrap(),
+        )
+    }
+
+    #[test]
+    fn test_preimage_eq_rewrite() {
+        // Equality rewrite when preimage and column expression are available.
+        let schema = test_schema();
+        let expr = preimage_udf_expr().eq(lit(500));
+        let expected = and(col("x").gt_eq(lit(100)), col("x").lt(lit(200)));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_noteq_rewrite() {
+        // Inequality rewrite expands to disjoint ranges.
+        let schema = test_schema();
+        let expr = preimage_udf_expr().not_eq(lit(500));
+        let expected = col("x").lt(lit(100)).or(col("x").gt_eq(lit(200)));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_eq_rewrite_swapped() {
+        // Equality rewrite works when the literal appears on the left.
+        let schema = test_schema();
+        let expr = lit(500).eq(preimage_udf_expr());
+        let expected = and(col("x").gt_eq(lit(100)), col("x").lt(lit(200)));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_lt_rewrite() {
+        // Less-than comparison rewrites to the lower bound.
+        let schema = test_schema();
+        let expr = preimage_udf_expr().lt(lit(500));
+        let expected = col("x").lt(lit(100));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_lteq_rewrite() {
+        // Less-than-or-equal comparison rewrites to the upper bound.
+        let schema = test_schema();
+        let expr = preimage_udf_expr().lt_eq(lit(500));
+        let expected = col("x").lt(lit(200));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_gt_rewrite() {
+        // Greater-than comparison rewrites to the upper bound (inclusive).
+        let schema = test_schema();
+        let expr = preimage_udf_expr().gt(lit(500));
+        let expected = col("x").gt_eq(lit(200));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_gteq_rewrite() {
+        // Greater-than-or-equal comparison rewrites to the lower bound.
+        let schema = test_schema();
+        let expr = preimage_udf_expr().gt_eq(lit(500));
+        let expected = col("x").gt_eq(lit(100));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_is_not_distinct_from_rewrite() {
+        // IS NOT DISTINCT FROM rewrites to equality plus expression not-null 
check
+        // for non-null literal RHS.
+        let schema = test_schema();
+        let expr = is_not_distinct_from(preimage_udf_expr(), lit(500));
+        let expected = col("x")
+            .is_not_null()
+            .and(col("x").gt_eq(lit(100)))
+            .and(col("x").lt(lit(200)));
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_is_distinct_from_rewrite() {
+        // IS DISTINCT FROM adds an explicit NULL branch for the column.
+        let schema = test_schema();
+        let expr = is_distinct_from(preimage_udf_expr(), lit(500));
+        let expected = col("x")
+            .lt(lit(100))
+            .or(col("x").gt_eq(lit(200)))
+            .or(col("x").is_null());
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_non_literal_rhs_no_rewrite() {
+        // Non-literal RHS should not be rewritten.
+        let schema = test_schema_xy();
+        let expr = preimage_udf_expr().eq(col("y"));
+        let expected = expr.clone();
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_null_literal_no_rewrite_distinct_ops() {
+        // NULL literal RHS should not be rewritten for DISTINCTness operators:
+        // - `expr IS DISTINCT FROM NULL`  <=> `NOT (expr IS NULL)`
+        // - `expr IS NOT DISTINCT FROM NULL` <=> `expr IS NULL`
+        //
+        // For normal comparisons (=, !=, <, <=, >, >=), `expr OP NULL` 
evaluates to NULL
+        // under SQL tri-state logic, and DataFusion's simplifier 
constant-folds it.
+        // 
https://docs.rs/datafusion/latest/datafusion/physical_optimizer/pruning/struct.PruningPredicate.html#boolean-tri-state-logic
+
+        let schema = test_schema();
+
+        let expr = is_distinct_from(preimage_udf_expr(), 
lit(ScalarValue::Int32(None)));
+        assert_eq!(optimize_test(expr.clone(), &schema), expr);
+
+        let expr =
+            is_not_distinct_from(preimage_udf_expr(), 
lit(ScalarValue::Int32(None)));
+        assert_eq!(optimize_test(expr.clone(), &schema), expr);
+    }
+
+    #[test]
+    fn test_preimage_non_immutable_no_rewrite() {
+        // Non-immutable UDFs should not participate in preimage rewrites.
+        let schema = test_schema();
+        let expr = non_immutable_udf_expr().eq(lit(500));
+        let expected = expr.clone();
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+
+    #[test]
+    fn test_preimage_no_preimage_no_rewrite() {
+        // If the UDF provides no preimage, the expression should remain 
unchanged.
+        let schema = test_schema();
+        let expr = no_preimage_udf_expr().eq(lit(500));
+        let expected = expr.clone();
+
+        assert_eq!(optimize_test(expr, &schema), expected);
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to