This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 4ed808a83c feat(spark): add trunc, date_trunc and time_trunc functions 
(#19829)
4ed808a83c is described below

commit 4ed808a83c4a4d73edc7d50dc53ab737977339ef
Author: cht42 <[email protected]>
AuthorDate: Mon Jan 19 08:13:57 2026 +0400

    feat(spark): add trunc, date_trunc and time_trunc functions (#19829)
    
    ## Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax. For example
    `Closes #123` indicates that this PR will close issue #123.
    -->
    
    - Closes #19828.
    - Part of #15914
    
    ## Rationale for this change
    
    implement spark:
    - https://spark.apache.org/docs/latest/api/sql/index.html#trunc
    - https://spark.apache.org/docs/latest/api/sql/index.html#date_trunc
    - https://spark.apache.org/docs/latest/api/sql/index.html#time_trunc
    
    ## What changes are included in this PR?
    
    Add spark compatible wrappers around datafusion date_trunc function to
    handle spark specificities.
    
    ## Are these changes tested?
    
    Yes in SLT
    
    ## Are there any user-facing changes?
    
    Yes
---
 datafusion/functions/src/datetime/date_trunc.rs    |  20 ++-
 .../spark/src/function/datetime/date_trunc.rs      | 172 ++++++++++++++++++++
 datafusion/spark/src/function/datetime/mod.rs      |  33 +++-
 .../spark/src/function/datetime/time_trunc.rs      | 122 ++++++++++++++
 datafusion/spark/src/function/datetime/trunc.rs    | 143 +++++++++++++++++
 .../test_files/spark/datetime/date_trunc.slt       | 177 +++++++++++++++++----
 .../test_files/spark/datetime/time_trunc.slt       |  74 +++++++++
 .../test_files/spark/datetime/trunc.slt            | 101 +++++++++---
 8 files changed, 782 insertions(+), 60 deletions(-)

diff --git a/datafusion/functions/src/datetime/date_trunc.rs 
b/datafusion/functions/src/datetime/date_trunc.rs
index 8c8a4a1c1b..951ce7e882 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -34,6 +34,7 @@ use arrow::array::types::{
 use arrow::array::{Array, ArrayRef, PrimitiveArray};
 use arrow::datatypes::DataType::{self, Time32, Time64, Timestamp};
 use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, 
Second};
+use arrow::datatypes::{Field, FieldRef};
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::types::{NativeType, logical_date, logical_string};
 use datafusion_common::{
@@ -41,7 +42,8 @@ use datafusion_common::{
 };
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, 
Volatility,
+    ColumnarValue, Documentation, ReturnFieldArgs, ScalarUDFImpl, Signature,
+    TypeSignature, Volatility,
 };
 use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
 use datafusion_macros::user_doc;
@@ -221,6 +223,7 @@ impl ScalarUDFImpl for DateTruncFunc {
         &self.signature
     }
 
+    // keep return_type implementation for information schema generation
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         if arg_types[1].is_null() {
             Ok(Timestamp(Nanosecond, None))
@@ -229,6 +232,21 @@ impl ScalarUDFImpl for DateTruncFunc {
         }
     }
 
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> 
Result<FieldRef> {
+        let data_types = args
+            .arg_fields
+            .iter()
+            .map(|f| f.data_type())
+            .cloned()
+            .collect::<Vec<_>>();
+        let return_type = self.return_type(&data_types)?;
+        Ok(Arc::new(Field::new(
+            self.name(),
+            return_type,
+            args.arg_fields[1].is_nullable(),
+        )))
+    }
+
     fn invoke_with_args(
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
diff --git a/datafusion/spark/src/function/datetime/date_trunc.rs 
b/datafusion/spark/src/function/datetime/date_trunc.rs
new file mode 100644
index 0000000000..2199c90703
--- /dev/null
+++ b/datafusion/spark/src/function/datetime/date_trunc.rs
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit};
+use datafusion_common::types::{NativeType, logical_string};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, ScalarValue, internal_err, plan_err};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext};
+use datafusion_expr::{
+    Coercion, ColumnarValue, Expr, ExprSchemable, ReturnFieldArgs, 
ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, TypeSignatureClass, Volatility,
+};
+
+/// Spark date_trunc supports extra format aliases.
+/// It also handles timestamps with timezones by converting to session 
timezone first.
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#date_trunc>
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkDateTrunc {
+    signature: Signature,
+}
+
+impl Default for SparkDateTrunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkDateTrunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::coercible(
+                vec![
+                    
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Timestamp,
+                        vec![TypeSignatureClass::Native(logical_string())],
+                        NativeType::Timestamp(TimeUnit::Microsecond, None),
+                    ),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkDateTrunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "date_trunc"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> 
Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            args.arg_fields[1].data_type().clone(),
+            nullable,
+        )))
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+        internal_err!(
+            "spark date_trunc should have been simplified to standard 
date_trunc"
+        )
+    }
+
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        info: &SimplifyContext,
+    ) -> Result<ExprSimplifyResult> {
+        let [fmt_expr, ts_expr] = take_function_args(self.name(), args)?;
+
+        let fmt = match fmt_expr.as_literal() {
+            Some(ScalarValue::Utf8(Some(v)))
+            | Some(ScalarValue::Utf8View(Some(v)))
+            | Some(ScalarValue::LargeUtf8(Some(v))) => v.to_lowercase(),
+            _ => {
+                return plan_err!(
+                    "First argument of `DATE_TRUNC` must be non-null scalar 
Utf8"
+                );
+            }
+        };
+
+        // Map Spark-specific fmt aliases to datafusion ones
+        let fmt = match fmt.as_str() {
+            "yy" | "yyyy" => "year",
+            "mm" | "mon" => "month",
+            "dd" => "day",
+            other => other,
+        };
+
+        let session_tz = info.config_options().execution.time_zone.clone();
+        let ts_type = ts_expr.get_type(info.schema())?;
+
+        // Spark interprets timestamps in the session timezone before 
truncating,
+        // then returns a timestamp at microsecond precision.
+        // See: 
https://github.com/apache/spark/blob/f310f4fcc95580a6824bc7d22b76006f79b8804a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala#L492
+        //
+        // For sub-second truncations (second, millisecond, microsecond), 
timezone
+        // adjustment is unnecessary since timezone offsets are whole seconds.
+        let ts_expr = match (&ts_type, fmt) {
+            // Sub-second truncations don't need timezone adjustment
+            (_, "second" | "millisecond" | "microsecond") => ts_expr,
+
+            // convert to session timezone, strip timezone and convert back to 
original timezone
+            (DataType::Timestamp(unit, tz), _) => {
+                let ts_expr = match &session_tz {
+                    Some(session_tz) => ts_expr.cast_to(
+                        &DataType::Timestamp(
+                            TimeUnit::Microsecond,
+                            Some(Arc::from(session_tz.as_str())),
+                        ),
+                        info.schema(),
+                    )?,
+                    None => ts_expr,
+                };
+                Expr::ScalarFunction(ScalarFunction::new_udf(
+                    datafusion_functions::datetime::to_local_time(),
+                    vec![ts_expr],
+                ))
+                .cast_to(&DataType::Timestamp(*unit, tz.clone()), 
info.schema())?
+            }
+
+            _ => {
+                return plan_err!(
+                    "Second argument of `DATE_TRUNC` must be Timestamp, got 
{}",
+                    ts_type
+                );
+            }
+        };
+
+        let fmt_expr = Expr::Literal(ScalarValue::new_utf8(fmt), None);
+
+        Ok(ExprSimplifyResult::Simplified(Expr::ScalarFunction(
+            ScalarFunction::new_udf(
+                datafusion_functions::datetime::date_trunc(),
+                vec![fmt_expr, ts_expr],
+            ),
+        )))
+    }
+}
diff --git a/datafusion/spark/src/function/datetime/mod.rs 
b/datafusion/spark/src/function/datetime/mod.rs
index 99618320e1..92d7eab32c 100644
--- a/datafusion/spark/src/function/datetime/mod.rs
+++ b/datafusion/spark/src/function/datetime/mod.rs
@@ -18,18 +18,23 @@
 pub mod date_add;
 pub mod date_part;
 pub mod date_sub;
+pub mod date_trunc;
 pub mod extract;
 pub mod last_day;
 pub mod make_dt_interval;
 pub mod make_interval;
 pub mod next_day;
+pub mod time_trunc;
+pub mod trunc;
 
 use datafusion_expr::ScalarUDF;
 use datafusion_functions::make_udf_function;
 use std::sync::Arc;
 
 make_udf_function!(date_add::SparkDateAdd, date_add);
+make_udf_function!(date_part::SparkDatePart, date_part);
 make_udf_function!(date_sub::SparkDateSub, date_sub);
+make_udf_function!(date_trunc::SparkDateTrunc, date_trunc);
 make_udf_function!(extract::SparkHour, hour);
 make_udf_function!(extract::SparkMinute, minute);
 make_udf_function!(extract::SparkSecond, second);
@@ -37,7 +42,8 @@ make_udf_function!(last_day::SparkLastDay, last_day);
 make_udf_function!(make_dt_interval::SparkMakeDtInterval, make_dt_interval);
 make_udf_function!(make_interval::SparkMakeInterval, make_interval);
 make_udf_function!(next_day::SparkNextDay, next_day);
-make_udf_function!(date_part::SparkDatePart, date_part);
+make_udf_function!(time_trunc::SparkTimeTrunc, time_trunc);
+make_udf_function!(trunc::SparkTrunc, trunc);
 
 pub mod expr_fn {
     use datafusion_functions::export_functions;
@@ -85,24 +91,43 @@ pub mod expr_fn {
         "Returns the first date which is later than start_date and named as 
indicated. The function returns NULL if at least one of the input parameters is 
NULL.",
         arg1 arg2
     ));
+    export_functions!((
+        date_trunc,
+        "Truncates a timestamp `ts` to the unit specified by the format 
`fmt`.",
+        fmt ts
+    ));
+    export_functions!((
+        time_trunc,
+        "Truncates a time `t` to the unit specified by the format `fmt`.",
+        fmt t
+    ));
+    export_functions!((
+        trunc,
+        "Truncates a date `dt` to the unit specified by the format `fmt`.",
+        dt fmt
+    ));
     export_functions!((
         date_part,
         "Extracts a part of the date or time from a date, time, or timestamp 
expression.",
         arg1 arg2
+
     ));
 }
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         date_add(),
+        date_part(),
         date_sub(),
+        date_trunc(),
         hour(),
-        minute(),
-        second(),
         last_day(),
         make_dt_interval(),
         make_interval(),
+        minute(),
         next_day(),
-        date_part(),
+        second(),
+        time_trunc(),
+        trunc(),
     ]
 }
diff --git a/datafusion/spark/src/function/datetime/time_trunc.rs 
b/datafusion/spark/src/function/datetime/time_trunc.rs
new file mode 100644
index 0000000000..718502a05e
--- /dev/null
+++ b/datafusion/spark/src/function/datetime/time_trunc.rs
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::types::logical_string;
+use datafusion_common::{Result, ScalarValue, internal_err, plan_err};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext};
+use datafusion_expr::{
+    Coercion, ColumnarValue, Expr, ReturnFieldArgs, ScalarFunctionArgs, 
ScalarUDFImpl,
+    Signature, TypeSignatureClass, Volatility,
+};
+
+/// Spark time_trunc function only handles time inputs.
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#time_trunc>
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkTimeTrunc {
+    signature: Signature,
+}
+
+impl Default for SparkTimeTrunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkTimeTrunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::coercible(
+                vec![
+                    
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                    Coercion::new_exact(TypeSignatureClass::Time),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkTimeTrunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "time_trunc"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> 
Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            args.arg_fields[1].data_type().clone(),
+            nullable,
+        )))
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+        internal_err!(
+            "spark time_trunc should have been simplified to standard 
date_trunc"
+        )
+    }
+
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        _info: &SimplifyContext,
+    ) -> Result<ExprSimplifyResult> {
+        let fmt_expr = &args[0];
+
+        let fmt = match fmt_expr.as_literal() {
+            Some(ScalarValue::Utf8(Some(v)))
+            | Some(ScalarValue::Utf8View(Some(v)))
+            | Some(ScalarValue::LargeUtf8(Some(v))) => v.to_lowercase(),
+            _ => {
+                return plan_err!(
+                    "First argument of `TIME_TRUNC` must be non-null scalar 
Utf8"
+                );
+            }
+        };
+
+        if !matches!(
+            fmt.as_str(),
+            "hour" | "minute" | "second" | "millisecond" | "microsecond"
+        ) {
+            return plan_err!(
+                "The format argument of `TIME_TRUNC` must be one of: hour, 
minute, second, millisecond, microsecond"
+            );
+        }
+
+        Ok(ExprSimplifyResult::Simplified(Expr::ScalarFunction(
+            
ScalarFunction::new_udf(datafusion_functions::datetime::date_trunc(), args),
+        )))
+    }
+}
diff --git a/datafusion/spark/src/function/datetime/trunc.rs 
b/datafusion/spark/src/function/datetime/trunc.rs
new file mode 100644
index 0000000000..b584cc9a70
--- /dev/null
+++ b/datafusion/spark/src/function/datetime/trunc.rs
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit};
+use datafusion_common::types::{NativeType, logical_date, logical_string};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, ScalarValue, internal_err, plan_err};
+use datafusion_expr::expr::ScalarFunction;
+use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext};
+use datafusion_expr::{
+    Coercion, ColumnarValue, Expr, ExprSchemable, ReturnFieldArgs, 
ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, TypeSignatureClass, Volatility,
+};
+
+/// Spark trunc supports date inputs only and extra format aliases.
+/// Also spark trunc's argument order is (date, format).
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#trunc>
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkTrunc {
+    signature: Signature,
+}
+
+impl Default for SparkTrunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkTrunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::coercible(
+                vec![
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_date()),
+                        vec![TypeSignatureClass::Native(logical_string())],
+                        NativeType::Date,
+                    ),
+                    
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkTrunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "trunc"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> 
Result<FieldRef> {
+        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            args.arg_fields[0].data_type().clone(),
+            nullable,
+        )))
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+        internal_err!("spark trunc should have been simplified to standard 
date_trunc")
+    }
+
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        info: &SimplifyContext,
+    ) -> Result<ExprSimplifyResult> {
+        let [dt_expr, fmt_expr] = take_function_args(self.name(), args)?;
+
+        let fmt = match fmt_expr.as_literal() {
+            Some(ScalarValue::Utf8(Some(v)))
+            | Some(ScalarValue::Utf8View(Some(v)))
+            | Some(ScalarValue::LargeUtf8(Some(v))) => v.to_lowercase(),
+            _ => {
+                return plan_err!(
+                    "Second argument of `TRUNC` must be non-null scalar Utf8"
+                );
+            }
+        };
+
+        // Map Spark-specific fmt aliases to datafusion ones
+        let fmt = match fmt.as_str() {
+            "yy" | "yyyy" => "year",
+            "mm" | "mon" => "month",
+            "year" | "month" | "day" | "week" | "quarter" => fmt.as_str(),
+            _ => {
+                return plan_err!(
+                    "The format argument of `TRUNC` must be one of: year, yy, 
yyyy, month, mm, mon, day, week, quarter."
+                );
+            }
+        };
+        let return_type = dt_expr.get_type(info.schema())?;
+
+        let fmt_expr = Expr::Literal(ScalarValue::new_utf8(fmt), None);
+
+        // Spark uses Dates so we need to cast to timestamp and back to work 
with datafusion's date_trunc
+        Ok(ExprSimplifyResult::Simplified(
+            Expr::ScalarFunction(ScalarFunction::new_udf(
+                datafusion_functions::datetime::date_trunc(),
+                vec![
+                    fmt_expr,
+                    dt_expr.cast_to(
+                        &DataType::Timestamp(TimeUnit::Nanosecond, None),
+                        info.schema(),
+                    )?,
+                ],
+            ))
+            .cast_to(&return_type, info.schema())?,
+        ))
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt 
b/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt
index 8a15254e67..7fc1583bb9 100644
--- a/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt
+++ b/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt
@@ -15,33 +15,150 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This file was originally created by a porting script from:
-#   
https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function
-# This file is part of the implementation of the datafusion-spark function 
library.
-# For more information, please see:
-#   https://github.com/apache/datafusion/issues/15914
-
-## Original Query: SELECT date_trunc('DD', '2015-03-05T09:32:05.359');
-## PySpark 3.5.5 Result: {'date_trunc(DD, 2015-03-05T09:32:05.359)': 
datetime.datetime(2015, 3, 5, 0, 0), 'typeof(date_trunc(DD, 
2015-03-05T09:32:05.359))': 'timestamp', 'typeof(DD)': 'string', 
'typeof(2015-03-05T09:32:05.359)': 'string'}
-#query
-#SELECT date_trunc('DD'::string, '2015-03-05T09:32:05.359'::string);
-
-## Original Query: SELECT date_trunc('HOUR', '2015-03-05T09:32:05.359');
-## PySpark 3.5.5 Result: {'date_trunc(HOUR, 2015-03-05T09:32:05.359)': 
datetime.datetime(2015, 3, 5, 9, 0), 'typeof(date_trunc(HOUR, 
2015-03-05T09:32:05.359))': 'timestamp', 'typeof(HOUR)': 'string', 
'typeof(2015-03-05T09:32:05.359)': 'string'}
-#query
-#SELECT date_trunc('HOUR'::string, '2015-03-05T09:32:05.359'::string);
-
-## Original Query: SELECT date_trunc('MILLISECOND', 
'2015-03-05T09:32:05.123456');
-## PySpark 3.5.5 Result: {'date_trunc(MILLISECOND, 
2015-03-05T09:32:05.123456)': datetime.datetime(2015, 3, 5, 9, 32, 5, 123000), 
'typeof(date_trunc(MILLISECOND, 2015-03-05T09:32:05.123456))': 'timestamp', 
'typeof(MILLISECOND)': 'string', 'typeof(2015-03-05T09:32:05.123456)': 'string'}
-#query
-#SELECT date_trunc('MILLISECOND'::string, 
'2015-03-05T09:32:05.123456'::string);
-
-## Original Query: SELECT date_trunc('MM', '2015-03-05T09:32:05.359');
-## PySpark 3.5.5 Result: {'date_trunc(MM, 2015-03-05T09:32:05.359)': 
datetime.datetime(2015, 3, 1, 0, 0), 'typeof(date_trunc(MM, 
2015-03-05T09:32:05.359))': 'timestamp', 'typeof(MM)': 'string', 
'typeof(2015-03-05T09:32:05.359)': 'string'}
-#query
-#SELECT date_trunc('MM'::string, '2015-03-05T09:32:05.359'::string);
-
-## Original Query: SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359');
-## PySpark 3.5.5 Result: {'date_trunc(YEAR, 2015-03-05T09:32:05.359)': 
datetime.datetime(2015, 1, 1, 0, 0), 'typeof(date_trunc(YEAR, 
2015-03-05T09:32:05.359))': 'timestamp', 'typeof(YEAR)': 'string', 
'typeof(2015-03-05T09:32:05.359)': 'string'}
-#query
-#SELECT date_trunc('YEAR'::string, '2015-03-05T09:32:05.359'::string);
+# YEAR - truncate to first date of year, time zeroed
+query P
+SELECT date_trunc('YEAR', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-01-01T00:00:00
+
+query P
+SELECT date_trunc('YYYY', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-01-01T00:00:00
+
+query P
+SELECT date_trunc('YY', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-01-01T00:00:00
+
+# QUARTER - truncate to first date of quarter, time zeroed
+query P
+SELECT date_trunc('QUARTER', '2015-05-05T09:32:05.123456'::timestamp);
+----
+2015-04-01T00:00:00
+
+# MONTH - truncate to first date of month, time zeroed
+query P
+SELECT date_trunc('MONTH', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-01T00:00:00
+
+query P
+SELECT date_trunc('MM', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-01T00:00:00
+
+query P
+SELECT date_trunc('MON', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-01T00:00:00
+
+# WEEK - truncate to Monday of the week, time zeroed
+query P
+SELECT date_trunc('WEEK', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-02T00:00:00
+
+# DAY - zero out time part
+query P
+SELECT date_trunc('DAY', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-05T00:00:00
+
+query P
+SELECT date_trunc('DD', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-05T00:00:00
+
+# HOUR - zero out minute and second with fraction
+query P
+SELECT date_trunc('HOUR', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-05T09:00:00
+
+# MINUTE - zero out second with fraction
+query P
+SELECT date_trunc('MINUTE', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-05T09:32:00
+
+# SECOND - zero out fraction
+query P
+SELECT date_trunc('SECOND', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-05T09:32:05
+
+# MILLISECOND - zero out microseconds
+query P
+SELECT date_trunc('MILLISECOND', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-05T09:32:05.123
+
+# MICROSECOND - everything remains
+query P
+SELECT date_trunc('MICROSECOND', '2015-03-05T09:32:05.123456'::timestamp);
+----
+2015-03-05T09:32:05.123456
+
+query P
+SELECT date_trunc('YEAR', column1)
+FROM VALUES
+('2015-03-05T09:32:05.123456'::timestamp),
+('2020-11-15T22:45:30.654321'::timestamp),
+('1999-07-20T14:20:10.000001'::timestamp),
+(NULL::timestamp);
+----
+2015-01-01T00:00:00
+2020-01-01T00:00:00
+1999-01-01T00:00:00
+NULL
+
+# String input
+query P
+SELECT date_trunc('YEAR', '2015-03-05T09:32:05.123456');
+----
+2015-01-01T00:00:00
+
+# Null handling
+query error DataFusion error: Optimizer rule 'simplify_expressions' 
failed\ncaused by\nError during planning: First argument of `DATE_TRUNC` must 
be non-null scalar Utf8
+SELECT date_trunc(NULL, '2015-03-05T09:32:05.123456');
+
+query P
+SELECT date_trunc('YEAR', NULL::timestamp);
+----
+NULL
+
+# incorrect format
+query error DataFusion error: Execution error: Unsupported date_trunc 
granularity: 'test'. Supported values are: microsecond, millisecond, second, 
minute, hour, day, week, month, quarter, year
+SELECT date_trunc('test', '2015-03-05T09:32:05.123456');
+
+# Timezone handling - Spark-compatible behavior
+# Spark converts timestamps to session timezone before truncating for coarse 
granularities
+
+query P
+SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 
'Timestamp(Microsecond, Some("UTC"))'));
+----
+2024-07-15T00:00:00Z
+
+query P
+SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 
'Timestamp(Microsecond, None)'));
+----
+2024-07-15T00:00:00
+
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
+
+# This timestamp is 03:30 UTC = 23:30 EDT (previous day) on July 14
+# With session timezone, truncation happens in America/New_York timezone
+query P
+SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 
'Timestamp(Microsecond, Some("UTC"))'));
+----
+2024-07-14T00:00:00Z
+
+query P
+SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 
'Timestamp(Microsecond, None)'));
+----
+2024-07-15T00:00:00
+
+statement ok
+RESET datafusion.execution.time_zone;
diff --git a/datafusion/sqllogictest/test_files/spark/datetime/time_trunc.slt 
b/datafusion/sqllogictest/test_files/spark/datetime/time_trunc.slt
new file mode 100644
index 0000000000..f00c40f0a9
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/datetime/time_trunc.slt
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# HOUR - zero out minute and second with fraction
+query D
+SELECT time_trunc('HOUR', '09:32:05.123456'::time);
+----
+09:00:00
+
+# MINUTE - zero out second with fraction
+query D
+SELECT time_trunc('MINUTE', '09:32:05.123456'::time);
+----
+09:32:00
+
+# SECOND - zero out fraction
+query D
+SELECT time_trunc('SECOND', '09:32:05.123456'::time);
+----
+09:32:05
+
+# MILLISECOND - zero out microseconds
+query D
+SELECT time_trunc('MILLISECOND', '09:32:05.123456'::time);
+----
+09:32:05.123
+
+# MICROSECOND - everything remains
+query D
+SELECT time_trunc('MICROSECOND', '09:32:05.123456'::time);
+----
+09:32:05.123456
+
+query D
+SELECT time_trunc('HOUR', column1)
+FROM VALUES
+('09:32:05.123456'::time),
+('22:45:30.654321'::time),
+('14:20:10.000001'::time),
+(NULL::time);
+----
+09:00:00
+22:00:00
+14:00:00
+NULL
+
+
+# Null handling
+query error DataFusion error: Optimizer rule 'simplify_expressions' 
failed\ncaused by\nError during planning: First argument of `TIME_TRUNC` must 
be non-null scalar Utf8
+SELECT time_trunc(NULL, '09:32:05.123456'::time);
+
+query D
+SELECT time_trunc('HOUR', NULL::time);
+----
+NULL
+
+# incorrect format
+query error DataFusion error: Optimizer rule 'simplify_expressions' 
failed\ncaused by\nError during planning: The format argument of `TIME_TRUNC` 
must be one of: hour, minute, second, millisecond, microsecond
+SELECT time_trunc('test', '09:32:05.123456'::time);
+
diff --git a/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt 
b/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt
index a502e2f7f7..f6bf6b5751 100644
--- a/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt
+++ b/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt
@@ -15,28 +15,79 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This file was originally created by a porting script from:
-#   
https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function
-# This file is part of the implementation of the datafusion-spark function 
library.
-# For more information, please see:
-#   https://github.com/apache/datafusion/issues/15914
-
-## Original Query: SELECT trunc('2009-02-12', 'MM');
-## PySpark 3.5.5 Result: {'trunc(2009-02-12, MM)': datetime.date(2009, 2, 1), 
'typeof(trunc(2009-02-12, MM))': 'date', 'typeof(2009-02-12)': 'string', 
'typeof(MM)': 'string'}
-#query
-#SELECT trunc('2009-02-12'::string, 'MM'::string);
-
-## Original Query: SELECT trunc('2015-10-27', 'YEAR');
-## PySpark 3.5.5 Result: {'trunc(2015-10-27, YEAR)': datetime.date(2015, 1, 
1), 'typeof(trunc(2015-10-27, YEAR))': 'date', 'typeof(2015-10-27)': 'string', 
'typeof(YEAR)': 'string'}
-#query
-#SELECT trunc('2015-10-27'::string, 'YEAR'::string);
-
-## Original Query: SELECT trunc('2019-08-04', 'quarter');
-## PySpark 3.5.5 Result: {'trunc(2019-08-04, quarter)': datetime.date(2019, 7, 
1), 'typeof(trunc(2019-08-04, quarter))': 'date', 'typeof(2019-08-04)': 
'string', 'typeof(quarter)': 'string'}
-#query
-#SELECT trunc('2019-08-04'::string, 'quarter'::string);
-
-## Original Query: SELECT trunc('2019-08-04', 'week');
-## PySpark 3.5.5 Result: {'trunc(2019-08-04, week)': datetime.date(2019, 7, 
29), 'typeof(trunc(2019-08-04, week))': 'date', 'typeof(2019-08-04)': 'string', 
'typeof(week)': 'string'}
-#query
-#SELECT trunc('2019-08-04'::string, 'week'::string);
+# YEAR - truncate to first date of year
+query D
+SELECT trunc('2009-02-12'::date, 'YEAR'::string);
+----
+2009-01-01
+
+query D
+SELECT trunc('2009-02-12'::date, 'YYYY'::string);
+----
+2009-01-01
+
+query D
+SELECT trunc('2009-02-12'::date, 'YY'::string);
+----
+2009-01-01
+
+# QUARTER - truncate to first date of quarter
+query D
+SELECT trunc('2009-02-12'::date, 'QUARTER'::string);
+----
+2009-01-01
+
+# MONTH - truncate to first date of month
+query D
+SELECT trunc('2009-02-12'::date, 'MONTH'::string);
+----
+2009-02-01
+
+query D
+SELECT trunc('2009-02-12'::date, 'MM'::string);
+----
+2009-02-01
+
+query D
+SELECT trunc('2009-02-12'::date, 'MON'::string);
+----
+2009-02-01
+
+# WEEK - truncate to Monday of the week
+query D
+SELECT trunc('2009-02-12'::date, 'WEEK'::string);
+----
+2009-02-09
+
+# string input
+query D
+SELECT trunc('2009-02-12'::string, 'YEAR'::string);
+----
+2009-01-01
+
+query D
+SELECT trunc(column1, 'YEAR'::string)
+FROM VALUES
+('2009-02-12'::date),
+('2000-02-12'::date),
+('2042-02-12'::date),
+(NULL::date);
+----
+2009-01-01
+2000-01-01
+2042-01-01
+NULL
+
+# Null handling
+query D
+SELECT trunc(NULL::date, 'YEAR'::string);
+----
+NULL
+
+query error DataFusion error: Optimizer rule 'simplify_expressions' 
failed\ncaused by\nError during planning: Second argument of `TRUNC` must be 
non-null scalar Utf8
+SELECT trunc('2009-02-12'::date, NULL::string);
+
+# incorrect format
+query error DataFusion error: Optimizer rule 'simplify_expressions' 
failed\ncaused by\nError during planning: The format argument of `TRUNC` must 
be one of: year, yy, yyyy, month, mm, mon, day, week, quarter.
+SELECT trunc('2009-02-12'::date, 'test'::string);
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to