This is an automated email from the ASF dual-hosted git repository.

blaginin pushed a commit to branch annarose/dict-coercion
in repository https://gitbox.apache.org/repos/asf/datafusion-sandbox.git

commit 35e78ca10a1631876c945faa48583f4a21bfdc82
Author: Kosta Tarasov <[email protected]>
AuthorDate: Tue Feb 3 11:55:53 2026 -0500

    Optimize the evaluation of date_part(<col>) == <constant> when pushed down 
(#19733)
    
    ## Which issue does this PR close?
    
    - closes #19889.
    
    ## Rationale for this change
    
    Check issue.
    
    ## What changes are included in this PR?
    
    Added `preimage` impl for `date_part` udf.
    
    Added sqllogictests for the impl.
    
    ## Are these changes tested?
    
    Yes, sqllogictests.
    
    ## Are there any user-facing changes?
    
    No
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions/src/datetime/date_part.rs     | 122 ++++-
 .../sqllogictest/test_files/datetime/date_part.slt | 539 ++++++++++++++++++++-
 2 files changed, 657 insertions(+), 4 deletions(-)

diff --git a/datafusion/functions/src/datetime/date_part.rs 
b/datafusion/functions/src/datetime/date_part.rs
index e9ae31886..e3080c9d1 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -19,6 +19,7 @@ use std::any::Any;
 use std::str::FromStr;
 use std::sync::Arc;
 
+use arrow::array::timezone::Tz;
 use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
 use arrow::compute::kernels::cast_utils::IntervalUnit;
 use arrow::compute::{DatePart, binary, date_part};
@@ -27,8 +28,10 @@ use arrow::datatypes::DataType::{
 };
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{
-    DataType, Field, FieldRef, IntervalUnit as ArrowIntervalUnit, TimeUnit,
+    DataType, Date32Type, Date64Type, Field, FieldRef, IntervalUnit as 
ArrowIntervalUnit,
+    TimeUnit,
 };
+use chrono::{Datelike, NaiveDate, TimeZone, Utc};
 use datafusion_common::types::{NativeType, logical_date};
 
 use datafusion_common::{
@@ -44,9 +47,11 @@ use datafusion_common::{
     types::logical_string,
     utils::take_function_args,
 };
+use datafusion_expr::preimage::PreimageResult;
+use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::{
-    ColumnarValue, Documentation, ReturnFieldArgs, ScalarUDFImpl, Signature,
-    TypeSignature, Volatility,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDFImpl, 
Signature,
+    TypeSignature, Volatility, interval_arithmetic,
 };
 use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
 use datafusion_macros::user_doc;
@@ -237,6 +242,71 @@ impl ScalarUDFImpl for DatePartFunc {
         })
     }
 
+    // Only casting the year is supported since pruning other IntervalUnit is 
not possible
+    // date_part(col, YEAR) = 2024 => col >= '2024-01-01' and col < 
'2025-01-01'
+    // But for anything less than YEAR simplifying is not possible without 
specifying the bigger interval
+    // date_part(col, MONTH) = 1 => col = '2023-01-01' or col = '2024-01-01' 
or ... or col = '3000-01-01'
+    fn preimage(
+        &self,
+        args: &[Expr],
+        lit_expr: &Expr,
+        info: &SimplifyContext,
+    ) -> Result<PreimageResult> {
+        let [part, col_expr] = take_function_args(self.name(), args)?;
+
+        // Get the interval unit from the part argument
+        let interval_unit = part
+            .as_literal()
+            .and_then(|sv| sv.try_as_str().flatten())
+            .map(part_normalization)
+            .and_then(|s| IntervalUnit::from_str(s).ok());
+
+        // only support extracting year
+        match interval_unit {
+            Some(IntervalUnit::Year) => (),
+            _ => return Ok(PreimageResult::None),
+        }
+
+        // Check if the argument is a literal (e.g. date_part(YEAR, col) = 
2024)
+        let Some(argument_literal) = lit_expr.as_literal() else {
+            return Ok(PreimageResult::None);
+        };
+
+        // Extract i32 year from Scalar value
+        let year = match argument_literal {
+            ScalarValue::Int32(Some(y)) => *y,
+            _ => return Ok(PreimageResult::None),
+        };
+
+        // Can only extract year from Date32/64 and Timestamp column
+        let target_type = match info.get_data_type(col_expr)? {
+            Date32 | Date64 | Timestamp(_, _) => 
&info.get_data_type(col_expr)?,
+            _ => return Ok(PreimageResult::None),
+        };
+
+        // Compute the Interval bounds
+        let Some(start_time) = NaiveDate::from_ymd_opt(year, 1, 1) else {
+            return Ok(PreimageResult::None);
+        };
+        let Some(end_time) = start_time.with_year(year + 1) else {
+            return Ok(PreimageResult::None);
+        };
+
+        // Convert to ScalarValues
+        let (Some(lower), Some(upper)) = (
+            date_to_scalar(start_time, target_type),
+            date_to_scalar(end_time, target_type),
+        ) else {
+            return Ok(PreimageResult::None);
+        };
+        let interval = Box::new(interval_arithmetic::Interval::try_new(lower, 
upper)?);
+
+        Ok(PreimageResult::Range {
+            expr: col_expr.clone(),
+            interval,
+        })
+    }
+
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
@@ -251,6 +321,52 @@ fn is_epoch(part: &str) -> bool {
     matches!(part.to_lowercase().as_str(), "epoch")
 }
 
+fn date_to_scalar(date: NaiveDate, target_type: &DataType) -> 
Option<ScalarValue> {
+    Some(match target_type {
+        Date32 => ScalarValue::Date32(Some(Date32Type::from_naive_date(date))),
+        Date64 => ScalarValue::Date64(Some(Date64Type::from_naive_date(date))),
+
+        Timestamp(unit, tz_opt) => {
+            let naive_midnight = date.and_hms_opt(0, 0, 0)?;
+
+            let utc_dt = if let Some(tz_str) = tz_opt {
+                let tz: Tz = tz_str.parse().ok()?;
+
+                let local = tz.from_local_datetime(&naive_midnight);
+
+                let local_dt = match local {
+                    chrono::offset::LocalResult::Single(dt) => dt,
+                    chrono::offset::LocalResult::Ambiguous(dt1, _dt2) => dt1,
+                    chrono::offset::LocalResult::None => local.earliest()?,
+                };
+
+                local_dt.with_timezone(&Utc)
+            } else {
+                Utc.from_utc_datetime(&naive_midnight)
+            };
+
+            match unit {
+                Second => {
+                    ScalarValue::TimestampSecond(Some(utc_dt.timestamp()), 
tz_opt.clone())
+                }
+                Millisecond => ScalarValue::TimestampMillisecond(
+                    Some(utc_dt.timestamp_millis()),
+                    tz_opt.clone(),
+                ),
+                Microsecond => ScalarValue::TimestampMicrosecond(
+                    Some(utc_dt.timestamp_micros()),
+                    tz_opt.clone(),
+                ),
+                Nanosecond => ScalarValue::TimestampNanosecond(
+                    Some(utc_dt.timestamp_nanos_opt()?),
+                    tz_opt.clone(),
+                ),
+            }
+        }
+        _ => return None,
+    })
+}
+
 // Try to remove quote if exist, if the quote is invalid, return original 
string and let the downstream function handle the error
 fn part_normalization(part: &str) -> &str {
     part.strip_prefix(|c| c == '\'' || c == '\"')
diff --git a/datafusion/sqllogictest/test_files/datetime/date_part.slt 
b/datafusion/sqllogictest/test_files/datetime/date_part.slt
index 019a988a9..bffcf76bb 100644
--- a/datafusion/sqllogictest/test_files/datetime/date_part.slt
+++ b/datafusion/sqllogictest/test_files/datetime/date_part.slt
@@ -19,7 +19,7 @@
 # for the same function).
 
 
-## Begin tests fo rdate_part with columns and timestamp's with timezones
+## Begin tests for date_part with columns and timestamp's with timezones
 
 # Source data table has
 # timestamps with millisecond (very common timestamp precision) and nanosecond 
(maximum precision) timestamps
@@ -1194,3 +1194,540 @@ query I
 SELECT EXTRACT('isodow' FROM to_timestamp('2020-09-08T12:00:00+00:00'))
 ----
 1
+
+## Preimage tests
+
+statement ok
+create table t1(c DATE) as VALUES (NULL), ('1990-01-01'), ('2024-01-01'), 
('2030-01-01');
+
+# Simple optimizations, col on LHS
+
+query D
+select c from t1 where extract(year from c) = 2024;
+----
+2024-01-01
+
+query D
+select c from t1 where extract(year from c) <> 2024;
+----
+1990-01-01
+2030-01-01 
+
+query D
+select c from t1 where extract(year from c) > 2024;
+----
+2030-01-01
+
+query D
+select c from t1 where extract(year from c) < 2024;
+----
+1990-01-01
+
+query D
+select c from t1 where extract(year from c) >= 2024;
+----
+2024-01-01
+2030-01-01
+
+query D
+select c from t1 where extract(year from c) <= 2024;
+----
+1990-01-01
+2024-01-01
+
+query D
+select c from t1 where extract(year from c) is not distinct from 2024
+----
+2024-01-01
+
+query D
+select c from t1 where extract(year from c) is distinct from 2024
+----
+NULL
+1990-01-01
+2030-01-01
+
+# Check that date_part is not in the explain statements
+
+query TT
+explain select c from t1 where extract (year from c) = 2024
+----
+logical_plan
+01)Filter: t1.c >= Date32("2024-01-01") AND t1.c < Date32("2025-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 >= 2024-01-01 AND c@0 < 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (year from c) <> 2024
+----
+logical_plan
+01)Filter: t1.c < Date32("2024-01-01") OR t1.c >= Date32("2025-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 < 2024-01-01 OR c@0 >= 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (year from c) > 2024
+----
+logical_plan
+01)Filter: t1.c >= Date32("2025-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 >= 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (year from c) < 2024
+----
+logical_plan
+01)Filter: t1.c < Date32("2024-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 < 2024-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (year from c) >= 2024
+----
+logical_plan
+01)Filter: t1.c >= Date32("2024-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 >= 2024-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (year from c) <= 2024
+----
+logical_plan
+01)Filter: t1.c < Date32("2025-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 < 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (year from c) is not distinct from 2024
+----
+logical_plan
+01)Filter: t1.c IS NOT NULL AND t1.c >= Date32("2024-01-01") AND t1.c < 
Date32("2025-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 IS NOT NULL AND c@0 >= 2024-01-01 AND c@0 < 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (year from c) is distinct from 2024
+----
+logical_plan
+01)Filter: t1.c < Date32("2024-01-01") OR t1.c >= Date32("2025-01-01") OR t1.c 
IS NULL
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 < 2024-01-01 OR c@0 >= 2025-01-01 OR c@0 IS NULL
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+# Simple optimizations, column on RHS
+
+query D
+select c from t1 where 2024 = extract(year from c);
+----
+2024-01-01
+
+query D
+select c from t1 where 2024 <> extract(year from c);
+----
+1990-01-01
+2030-01-01
+
+query D
+select c from t1 where 2024 < extract(year from c);
+----
+2030-01-01
+
+query D
+select c from t1 where 2024 > extract(year from c);
+----
+1990-01-01
+
+query D
+select c from t1 where 2024 <= extract(year from c);
+----
+2024-01-01
+2030-01-01
+
+query D
+select c from t1 where 2024 >= extract(year from c);
+----
+1990-01-01
+2024-01-01
+
+query D
+select c from t1 where 2024 is not distinct from extract(year from c);
+----
+2024-01-01
+
+query D
+select c from t1 where 2024 is distinct from extract(year from c);
+----
+NULL
+1990-01-01
+2030-01-01
+
+# Check explain statements for optimizations for other interval types
+
+query TT
+explain select c from t1 where extract (quarter from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("QUARTER"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(QUARTER, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (month from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("MONTH"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(MONTH, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (week from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("WEEK"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(WEEK, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (day from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("DAY"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(DAY, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (hour from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("HOUR"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(HOUR, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (minute from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("MINUTE"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(MINUTE, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (second from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("SECOND"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(SECOND, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (millisecond from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("MILLISECOND"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(MILLISECOND, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (microsecond from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("MICROSECOND"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(MICROSECOND, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (nanosecond from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("NANOSECOND"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(NANOSECOND, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (dow from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("DOW"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(DOW, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (doy from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("DOY"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(DOY, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (epoch from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("EPOCH"), t1.c) = Float64(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(EPOCH, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c from t1 where extract (isodow from c) = 2024
+----
+logical_plan
+01)Filter: date_part(Utf8("ISODOW"), t1.c) = Int32(2024)
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: date_part(ISODOW, c@0) = 2024
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+# Simple optimize different datatypes
+
+statement ok
+create table t2(
+    c1_date32 DATE,
+    c2_ts_sec timestamp,
+    c3_ts_mili timestamp,
+    c4_ts_micro timestamp,
+    c5_ts_nano timestamp
+) as VALUES
+    (NULL,
+     NULL,
+     NULL,
+     NULL,
+     NULL),
+    ('1990-05-20',
+     '1990-05-20T00:00:10'::timestamp,
+     '1990-05-20T00:00:10.987'::timestamp,
+     '1990-05-20T00:00:10.987654'::timestamp,
+     '1990-05-20T00:00:10.987654321'::timestamp),
+    ('2024-01-01',
+     '2024-01-01T00:00:00'::timestamp,
+     '2024-01-01T00:00:00.123'::timestamp,
+     '2024-01-01T00:00:00.123456'::timestamp,
+     '2024-01-01T00:00:00.123456789'::timestamp),
+    ('2030-12-31',
+     '2030-12-31T23:59:59'::timestamp,
+     '2030-12-31T23:59:59.001'::timestamp,
+     '2030-12-31T23:59:59.001234'::timestamp,
+     '2030-12-31T23:59:59.001234567'::timestamp)
+;
+
+query D
+select c1_date32 from t2 where extract(year from c1_date32) = 2024;
+----
+2024-01-01
+
+query D
+select c1_date32 from t2 where extract(year from c1_date32) <> 2024;
+----
+1990-05-20
+2030-12-31 
+
+query P
+select c2_ts_sec from t2 where extract(year from c2_ts_sec) > 2024;
+----
+2030-12-31T23:59:59
+
+query P
+select c3_ts_mili from t2 where extract(year from c3_ts_mili) < 2024;
+----
+1990-05-20T00:00:10.987
+
+query P
+select c4_ts_micro from t2 where extract(year from c4_ts_micro) >= 2024;
+----
+2024-01-01T00:00:00.123456
+2030-12-31T23:59:59.001234
+
+query P
+select c5_ts_nano from t2 where extract(year from c5_ts_nano) <= 2024;
+----
+1990-05-20T00:00:10.987654321
+2024-01-01T00:00:00.123456789
+
+query D
+select c1_date32 from t2 where extract(year from c1_date32) is not distinct 
from 2024
+----
+2024-01-01
+
+query D
+select c1_date32 from t2 where extract(year from c1_date32) is distinct from 
2024
+----
+NULL
+1990-05-20
+2030-12-31
+
+# Check that date_part is not in the explain statements for other datatypes
+
+query TT
+explain select c1_date32 from t2 where extract (year from c1_date32) = 2024
+----
+logical_plan
+01)Filter: t2.c1_date32 >= Date32("2024-01-01") AND t2.c1_date32 < 
Date32("2025-01-01")
+02)--TableScan: t2 projection=[c1_date32]
+physical_plan
+01)FilterExec: c1_date32@0 >= 2024-01-01 AND c1_date32@0 < 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c1_date32 from t2 where extract (year from c1_date32) <> 2024
+----
+logical_plan
+01)Filter: t2.c1_date32 < Date32("2024-01-01") OR t2.c1_date32 >= 
Date32("2025-01-01")
+02)--TableScan: t2 projection=[c1_date32]
+physical_plan
+01)FilterExec: c1_date32@0 < 2024-01-01 OR c1_date32@0 >= 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c2_ts_sec from t2 where extract (year from c2_ts_sec) > 2024
+----
+logical_plan
+01)Filter: t2.c2_ts_sec >= TimestampNanosecond(1735689600000000000, None)
+02)--TableScan: t2 projection=[c2_ts_sec]
+physical_plan
+01)FilterExec: c2_ts_sec@0 >= 1735689600000000000
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c3_ts_mili from t2 where extract (year from c3_ts_mili) < 2024
+----
+logical_plan
+01)Filter: t2.c3_ts_mili < TimestampNanosecond(1704067200000000000, None)
+02)--TableScan: t2 projection=[c3_ts_mili]
+physical_plan
+01)FilterExec: c3_ts_mili@0 < 1704067200000000000
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c4_ts_micro from t2 where extract (year from c4_ts_micro) >= 
2024
+----
+logical_plan
+01)Filter: t2.c4_ts_micro >= TimestampNanosecond(1704067200000000000, None)
+02)--TableScan: t2 projection=[c4_ts_micro]
+physical_plan
+01)FilterExec: c4_ts_micro@0 >= 1704067200000000000
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c5_ts_nano from t2 where extract (year from c5_ts_nano) <= 2024
+----
+logical_plan
+01)Filter: t2.c5_ts_nano < TimestampNanosecond(1735689600000000000, None)
+02)--TableScan: t2 projection=[c5_ts_nano]
+physical_plan
+01)FilterExec: c5_ts_nano@0 < 1735689600000000000
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c1_date32 from t2 where extract (year from c1_date32) is not 
distinct from 2024
+----
+logical_plan
+01)Filter: t2.c1_date32 IS NOT NULL AND t2.c1_date32 >= Date32("2024-01-01") 
AND t2.c1_date32 < Date32("2025-01-01")
+02)--TableScan: t2 projection=[c1_date32]
+physical_plan
+01)FilterExec: c1_date32@0 IS NOT NULL AND c1_date32@0 >= 2024-01-01 AND 
c1_date32@0 < 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+query TT
+explain select c1_date32 from t2 where extract (year from c1_date32) is 
distinct from 2024
+----
+logical_plan
+01)Filter: t2.c1_date32 < Date32("2024-01-01") OR t2.c1_date32 >= 
Date32("2025-01-01") OR t2.c1_date32 IS NULL
+02)--TableScan: t2 projection=[c1_date32]
+physical_plan
+01)FilterExec: c1_date32@0 < 2024-01-01 OR c1_date32@0 >= 2025-01-01 OR 
c1_date32@0 IS NULL
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+# Preimage with timestamp with America/New_York timezone
+
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
+
+statement ok
+create table t3(
+    c1_ts_tz timestamptz
+) as VALUES
+    (NULL),
+    ('2024-01-01T04:59:59Z'::timestamptz), -- local 2023-12-31 23:59:59 -05
+    ('2024-01-01T05:00:00Z'::timestamptz), -- local 2024-01-01 00:00:00 -05
+    ('2025-01-01T04:59:59Z'::timestamptz), -- local 2024-12-31 23:59:59 -05
+    ('2025-01-01T05:00:00Z'::timestamptz)  -- local 2025-01-01 00:00:00 -05
+;
+
+query P
+select c1_ts_tz
+from t3
+where extract(year from c1_ts_tz) = 2024
+order by c1_ts_tz
+----
+2024-01-01T00:00:00-05:00
+2024-12-31T23:59:59-05:00
+
+query TT
+explain select c1_ts_tz from t3 where extract(year from c1_ts_tz) = 2024
+----
+logical_plan
+01)Filter: t3.c1_ts_tz >= TimestampNanosecond(1704085200000000000, 
Some("America/New_York")) AND t3.c1_ts_tz < 
TimestampNanosecond(1735707600000000000, Some("America/New_York"))
+02)--TableScan: t3 projection=[c1_ts_tz]
+physical_plan
+01)FilterExec: c1_ts_tz@0 >= 1704085200000000000 AND c1_ts_tz@0 < 
1735707600000000000
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+statement ok
+RESET datafusion.execution.time_zone;
+
+# Test non-Int32 rhs argument
+
+query D
+select c from t1 where extract(year from c) = cast(2024 as bigint);
+----
+2024-01-01
+
+query TT
+explain select c from t1 where extract (year from c) = cast(2024 as bigint)
+----
+logical_plan
+01)Filter: t1.c >= Date32("2024-01-01") AND t1.c < Date32("2025-01-01")
+02)--TableScan: t1 projection=[c]
+physical_plan
+01)FilterExec: c@0 >= 2024-01-01 AND c@0 < 2025-01-01
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to