This is an automated email from the ASF dual-hosted git repository.
andygrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 33f6bcb6eb fix: honor ANSI mode for make_date/next_day and stop
next_day trimming (#4566)
33f6bcb6eb is described below
commit 33f6bcb6ebcb06bad8f96e81169fa48ae20b8d64
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jun 4 10:39:16 2026 -0600
fix: honor ANSI mode for make_date/next_day and stop next_day trimming
(#4566)
---
native/core/src/execution/jni_api.rs | 2 -
native/spark-expr/src/comet_scalar_funcs.rs | 11 +-
native/spark-expr/src/datetime_funcs/make_date.rs | 47 ++++-
native/spark-expr/src/datetime_funcs/mod.rs | 2 +
native/spark-expr/src/datetime_funcs/next_day.rs | 201 +++++++++++++++++++++
native/spark-expr/src/lib.rs | 2 +-
.../scala/org/apache/comet/serde/datetime.scala | 36 +++-
.../expressions/datetime/make_date_ansi.sql | 40 ++++
.../sql-tests/expressions/datetime/next_day.sql | 5 +
.../expressions/datetime/next_day_ansi.sql | 35 ++++
10 files changed, 371 insertions(+), 10 deletions(-)
diff --git a/native/core/src/execution/jni_api.rs
b/native/core/src/execution/jni_api.rs
index ac223a462b..c0c6319d7a 100644
--- a/native/core/src/execution/jni_api.rs
+++ b/native/core/src/execution/jni_api.rs
@@ -52,7 +52,6 @@ use
datafusion_spark::function::datetime::date_add::SparkDateAdd;
use datafusion_spark::function::datetime::date_sub::SparkDateSub;
use
datafusion_spark::function::datetime::from_utc_timestamp::SparkFromUtcTimestamp;
use datafusion_spark::function::datetime::last_day::SparkLastDay;
-use datafusion_spark::function::datetime::next_day::SparkNextDay;
use
datafusion_spark::function::datetime::to_utc_timestamp::SparkToUtcTimestamp;
use datafusion_spark::function::hash::crc32::SparkCrc32;
use datafusion_spark::function::hash::sha1::SparkSha1;
@@ -583,7 +582,6 @@ fn register_datafusion_spark_function(session_ctx:
&SessionContext) {
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkDateSub::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkFromUtcTimestamp::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkLastDay::default()));
-
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkNextDay::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkToUtcTimestamp::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha1::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkConcat::default()));
diff --git a/native/spark-expr/src/comet_scalar_funcs.rs
b/native/spark-expr/src/comet_scalar_funcs.rs
index 61e86b3a5e..6a432ea71b 100644
--- a/native/spark-expr/src/comet_scalar_funcs.rs
+++ b/native/spark-expr/src/comet_scalar_funcs.rs
@@ -27,7 +27,7 @@ use crate::{
spark_round, spark_rpad, spark_to_time, spark_unhex, spark_unscaled_value,
EvalMode,
SparkArrayCompact, SparkArrayPositionFunc, SparkArraySlice,
SparkArraysOverlap, SparkContains,
SparkDateDiff, SparkDateFromUnixDate, SparkDateTrunc, SparkMakeDate,
SparkMakeTime,
- SparkSecondsToTimestamp, SparkSizeFunc,
+ SparkNextDay, SparkSecondsToTimestamp, SparkSizeFunc,
};
use arrow::datatypes::DataType;
use datafusion::common::{DataFusionError, Result as DataFusionResult};
@@ -208,6 +208,14 @@ pub fn create_comet_physical_fun_with_eval_mode(
"to_time" => {
make_comet_scalar_udf!("to_time", spark_to_time, without
data_type, fail_on_error)
}
+ // make_date and next_day must be constructed with the ANSI flag
(fail_on_error) so they
+ // throw on invalid input under ANSI rather than returning NULL.
+ "make_date" => Ok(Arc::new(ScalarUDF::new_from_impl(SparkMakeDate::new(
+ fail_on_error,
+ )))),
+ "next_day" => Ok(Arc::new(ScalarUDF::new_from_impl(SparkNextDay::new(
+ fail_on_error,
+ )))),
_ => registry.udf(fun_name).map_err(|e| {
DataFusionError::Execution(format!(
"Function {fun_name} not found in the registry: {e}",
@@ -228,6 +236,7 @@ fn all_scalar_functions() -> Vec<Arc<ScalarUDF>> {
Arc::new(ScalarUDF::new_from_impl(SparkDateTrunc::default())),
Arc::new(ScalarUDF::new_from_impl(SparkMakeDate::default())),
Arc::new(ScalarUDF::new_from_impl(SparkMakeTime::default())),
+ Arc::new(ScalarUDF::new_from_impl(SparkNextDay::default())),
Arc::new(ScalarUDF::new_from_impl(SparkSecondsToTimestamp::default())),
Arc::new(ScalarUDF::new_from_impl(SparkSizeFunc::default())),
]
diff --git a/native/spark-expr/src/datetime_funcs/make_date.rs
b/native/spark-expr/src/datetime_funcs/make_date.rs
index 58e4108580..4835680c3a 100644
--- a/native/spark-expr/src/datetime_funcs/make_date.rs
+++ b/native/spark-expr/src/datetime_funcs/make_date.rs
@@ -28,27 +28,61 @@ use std::sync::Arc;
/// Spark-compatible make_date function.
/// Creates a date from year, month, and day columns.
-/// Returns NULL for invalid dates (e.g., Feb 30, month 13, etc.) instead of
throwing an error.
+/// For an invalid `(year, month, day)` triple Spark returns NULL when
`spark.sql.ansi.enabled` is
+/// false, and throws otherwise. The ANSI flag is carried here as
`fail_on_error`.
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkMakeDate {
signature: Signature,
+ fail_on_error: bool,
}
impl SparkMakeDate {
- pub fn new() -> Self {
+ pub fn new(fail_on_error: bool) -> Self {
Self {
// Accept any numeric type - we'll cast to Int32 internally
signature: Signature::any(3, Volatility::Immutable),
+ fail_on_error,
}
}
}
impl Default for SparkMakeDate {
fn default() -> Self {
- Self::new()
+ Self::new(false)
}
}
+/// Build the error message Spark surfaces for an invalid date under ANSI
mode. Spark wraps the
+/// `java.time.DateTimeException` raised by `LocalDate.of` (via
`ansiDateTimeArgumentOutOfRange` /
+/// `ansiDateTimeError`), so we reproduce `java.time`'s messages and
validation order: month range,
+/// then day range, then the day-vs-month check.
+fn invalid_date_message(year: i32, month: i32, day: i32) -> String {
+ const MONTH_NAMES: [&str; 12] = [
+ "JANUARY",
+ "FEBRUARY",
+ "MARCH",
+ "APRIL",
+ "MAY",
+ "JUNE",
+ "JULY",
+ "AUGUST",
+ "SEPTEMBER",
+ "OCTOBER",
+ "NOVEMBER",
+ "DECEMBER",
+ ];
+ if !(1..=12).contains(&month) {
+ return format!("Invalid value for MonthOfYear (valid values 1 - 12):
{month}");
+ }
+ if !(1..=31).contains(&day) {
+ return format!("Invalid value for DayOfMonth (valid values 1 - 28/31):
{day}");
+ }
+ if day == 29 && month == 2 {
+ return format!("Invalid date 'February 29' as '{year}' is not a leap
year");
+ }
+ format!("Invalid date '{} {day}'", MONTH_NAMES[(month - 1) as usize])
+}
+
/// Cast an array to Int32Array if it's not already Int32.
fn cast_to_int32(arr: &Arc<dyn Array>) -> Result<Arc<dyn Array>> {
if arr.data_type() == &DataType::Int32 {
@@ -147,7 +181,12 @@ impl ScalarUDFImpl for SparkMakeDate {
match make_date(y, m, d) {
Some(days) => builder.append_value(days),
- None => builder.append_null(),
+ None => {
+ if self.fail_on_error {
+ return
Err(DataFusionError::Execution(invalid_date_message(y, m, d)));
+ }
+ builder.append_null();
+ }
}
}
}
diff --git a/native/spark-expr/src/datetime_funcs/mod.rs
b/native/spark-expr/src/datetime_funcs/mod.rs
index b62ef057df..37c5fa5dd2 100644
--- a/native/spark-expr/src/datetime_funcs/mod.rs
+++ b/native/spark-expr/src/datetime_funcs/mod.rs
@@ -23,6 +23,7 @@ mod extract_date_part;
mod hours;
mod make_date;
mod make_time;
+mod next_day;
mod seconds_to_timestamp;
mod timestamp_trunc;
mod to_time;
@@ -38,6 +39,7 @@ pub use extract_date_part::SparkSecond;
pub use hours::SparkHoursTransform;
pub use make_date::SparkMakeDate;
pub use make_time::SparkMakeTime;
+pub use next_day::SparkNextDay;
pub use seconds_to_timestamp::SparkSecondsToTimestamp;
pub use timestamp_trunc::TimestampTruncExpr;
pub use to_time::spark_to_time;
diff --git a/native/spark-expr/src/datetime_funcs/next_day.rs
b/native/spark-expr/src/datetime_funcs/next_day.rs
new file mode 100644
index 0000000000..207fc40a75
--- /dev/null
+++ b/native/spark-expr/src/datetime_funcs/next_day.rs
@@ -0,0 +1,201 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{Array, Date32Array, StringArray};
+use arrow::compute::cast;
+use arrow::datatypes::{DataType, Date32Type};
+use chrono::{Datelike, Duration, Weekday};
+use datafusion::common::{utils::take_function_args, DataFusionError, Result};
+use datafusion::logical_expr::{
+ ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+use std::any::Any;
+use std::sync::Arc;
+
+/// Spark-compatible `next_day(start_date, day_of_week)` function.
+///
+/// Returns the first date which is later than `start_date` and named as
`day_of_week`. Unlike the
+/// upstream `datafusion-spark` implementation, this matches Spark's
`DateTimeUtils
+/// .getDayOfWeekFromString` exactly: the `day_of_week` argument is *not*
trimmed before matching,
+/// and when it cannot be parsed the behaviour follows
`spark.sql.ansi.enabled` (carried here as
+/// `fail_on_error`): throw when ANSI is enabled, otherwise return NULL.
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkNextDay {
+ signature: Signature,
+ fail_on_error: bool,
+}
+
+impl SparkNextDay {
+ pub fn new(fail_on_error: bool) -> Self {
+ Self {
+ // Accept any 2 args - we cast to Date32 / Utf8 internally.
+ signature: Signature::any(2, Volatility::Immutable),
+ fail_on_error,
+ }
+ }
+}
+
+impl Default for SparkNextDay {
+ fn default() -> Self {
+ Self::new(false)
+ }
+}
+
+/// Match a day-of-week name to a [`Weekday`]. Mirrors Spark's
+/// `DateTimeUtils.getDayOfWeekFromString`: case-insensitive, but with no
whitespace trimming.
+fn day_of_week_from_string(day_of_week: &str) -> Option<Weekday> {
+ match day_of_week.to_uppercase().as_str() {
+ "SU" | "SUN" | "SUNDAY" => Some(Weekday::Sun),
+ "MO" | "MON" | "MONDAY" => Some(Weekday::Mon),
+ "TU" | "TUE" | "TUESDAY" => Some(Weekday::Tue),
+ "WE" | "WED" | "WEDNESDAY" => Some(Weekday::Wed),
+ "TH" | "THU" | "THURSDAY" => Some(Weekday::Thu),
+ "FR" | "FRI" | "FRIDAY" => Some(Weekday::Fri),
+ "SA" | "SAT" | "SATURDAY" => Some(Weekday::Sat),
+ _ => None,
+ }
+}
+
+/// The first date strictly after `days` (days since the Unix epoch) that
falls on `weekday`.
+/// Equivalent to Spark's `DateTimeUtils.getNextDateForDayOfWeek` (a
same-weekday start advances a
+/// full week). Returns None only if `days` is not a representable date.
+fn next_date_for_day_of_week(days: i32, weekday: Weekday) -> Option<i32> {
+ let date = Date32Type::to_naive_date_opt(days)?;
+ let advance = 7 - date.weekday().days_since(weekday) as i64;
+ Some(Date32Type::from_naive_date(date + Duration::days(advance)))
+}
+
+impl ScalarUDFImpl for SparkNextDay {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn name(&self) -> &str {
+ "next_day"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, _: &[DataType]) -> Result<DataType> {
+ // Spark marks next_day as always nullable because an invalid
day_of_week yields NULL
+ // (when ANSI is disabled) even for non-null inputs.
+ Ok(DataType::Date32)
+ }
+
+ fn invoke_with_args(&self, args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
+ let [date, day_of_week] = take_function_args(self.name(), args.args)?;
+
+ let num_rows = [&date, &day_of_week]
+ .iter()
+ .find_map(|arg| match arg {
+ ColumnarValue::Array(array) => Some(array.len()),
+ ColumnarValue::Scalar(_) => None,
+ })
+ .unwrap_or(1);
+
+ let date_arr = date.into_array(num_rows)?;
+ let day_of_week_arr = day_of_week.into_array(num_rows)?;
+
+ let date_arr = cast(date_arr.as_ref(), &DataType::Date32).map_err(|e| {
+ DataFusionError::Execution(format!(
+ "next_day: failed to cast start date to Date32: {e}"
+ ))
+ })?;
+ let day_of_week_arr = cast(day_of_week_arr.as_ref(),
&DataType::Utf8).map_err(|e| {
+ DataFusionError::Execution(format!("next_day: failed to cast day
of week to Utf8: {e}"))
+ })?;
+
+ let date_array = date_arr
+ .as_any()
+ .downcast_ref::<Date32Array>()
+ .ok_or_else(|| {
+ DataFusionError::Execution("next_day: failed to cast start
date to Date32".into())
+ })?;
+ let day_of_week_array = day_of_week_arr
+ .as_any()
+ .downcast_ref::<StringArray>()
+ .ok_or_else(|| {
+ DataFusionError::Execution("next_day: failed to cast day of
week to Utf8".into())
+ })?;
+
+ let len = date_array.len();
+ let mut builder = Date32Array::builder(len);
+
+ for i in 0..len {
+ if date_array.is_null(i) || day_of_week_array.is_null(i) {
+ builder.append_null();
+ continue;
+ }
+ let days = date_array.value(i);
+ let day_of_week = day_of_week_array.value(i);
+ match day_of_week_from_string(day_of_week) {
+ Some(weekday) => match next_date_for_day_of_week(days,
weekday) {
+ Some(result) => builder.append_value(result),
+ None => builder.append_null(),
+ },
+ None => {
+ if self.fail_on_error {
+ return Err(DataFusionError::Execution(format!(
+ "Illegal input for day of week: {day_of_week}"
+ )));
+ }
+ builder.append_null();
+ }
+ }
+ }
+
+ Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_day_of_week_from_string_no_trim() {
+ // Recognised names match case-insensitively.
+ assert_eq!(day_of_week_from_string("mon"), Some(Weekday::Mon));
+ assert_eq!(day_of_week_from_string("MONDAY"), Some(Weekday::Mon));
+ assert_eq!(day_of_week_from_string("Su"), Some(Weekday::Sun));
+ // Surrounding whitespace is NOT trimmed (Spark does not trim).
+ assert_eq!(day_of_week_from_string(" MO "), None);
+ assert_eq!(day_of_week_from_string("MO "), None);
+ assert_eq!(day_of_week_from_string(""), None);
+ assert_eq!(day_of_week_from_string("NOT_A_DAY"), None);
+ }
+
+ #[test]
+ fn test_next_date_for_day_of_week() {
+ // 2024-01-01 is a Monday (epoch day 19723). Next Monday is 7 days
later.
+ let monday =
+ Date32Type::from_naive_date(chrono::NaiveDate::from_ymd_opt(2024,
1, 1).unwrap());
+ let next_mon = next_date_for_day_of_week(monday,
Weekday::Mon).unwrap();
+ assert_eq!(
+ Date32Type::to_naive_date_opt(next_mon).unwrap(),
+ chrono::NaiveDate::from_ymd_opt(2024, 1, 8).unwrap()
+ );
+ // Next Tuesday after a Monday is the following day.
+ let next_tue = next_date_for_day_of_week(monday,
Weekday::Tue).unwrap();
+ assert_eq!(
+ Date32Type::to_naive_date_opt(next_tue).unwrap(),
+ chrono::NaiveDate::from_ymd_opt(2024, 1, 2).unwrap()
+ );
+ }
+}
diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs
index cb793026d6..174a4ada9a 100644
--- a/native/spark-expr/src/lib.rs
+++ b/native/spark-expr/src/lib.rs
@@ -78,7 +78,7 @@ pub use csv_funcs::*;
pub use datetime_funcs::{
spark_day_name, spark_month_name, spark_to_time, SparkDateDiff,
SparkDateFromUnixDate,
SparkDateTrunc, SparkHour, SparkHoursTransform, SparkMakeDate,
SparkMakeTime, SparkMinute,
- SparkSecond, SparkSecondsToTimestamp, SparkUnixTimestamp,
TimestampTruncExpr,
+ SparkNextDay, SparkSecond, SparkSecondsToTimestamp, SparkUnixTimestamp,
TimestampTruncExpr,
};
pub use error::{decimal_overflow_error, SparkError, SparkErrorWithContext,
SparkResult};
pub use hash_funcs::*;
diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
index 9afb4cc07e..2ce70008c9 100644
--- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
@@ -431,9 +431,41 @@ object CometConvertTimezone extends
CometExpressionSerde[ConvertTimezone] {
}
}
-object CometNextDay extends CometScalarFunction[NextDay]("next_day")
+object CometNextDay extends CometExpressionSerde[NextDay] {
-object CometMakeDate extends CometScalarFunction[MakeDate]("make_date")
+ /**
+ * `failOnError` mirrors `spark.sql.ansi.enabled`: under ANSI, Spark throws
on a malformed
+ * `dayOfWeek` rather than returning NULL. The resolved flag is passed to
native via the
+ * `ScalarFunc.fail_on_error` field.
+ */
+ override def convert(expr: NextDay, inputs: Seq[Attribute], binding:
Boolean): Option[Expr] = {
+ val childExpr = expr.children.map(exprToProtoInternal(_, inputs, binding))
+ val optExpr = scalarFunctionExprToProtoWithReturnType(
+ "next_day",
+ DateType,
+ expr.failOnError,
+ childExpr: _*)
+ optExprWithFallbackReason(optExpr, expr, expr.children: _*)
+ }
+}
+
+object CometMakeDate extends CometExpressionSerde[MakeDate] {
+
+ /**
+ * `failOnError` mirrors `spark.sql.ansi.enabled`: under ANSI, Spark throws
on an invalid
+ * `(year, month, day)` triple rather than returning NULL. The resolved flag
is passed to native
+ * via the `ScalarFunc.fail_on_error` field.
+ */
+ override def convert(expr: MakeDate, inputs: Seq[Attribute], binding:
Boolean): Option[Expr] = {
+ val childExpr = expr.children.map(exprToProtoInternal(_, inputs, binding))
+ val optExpr = scalarFunctionExprToProtoWithReturnType(
+ "make_date",
+ DateType,
+ expr.failOnError,
+ childExpr: _*)
+ optExprWithFallbackReason(optExpr, expr, expr.children: _*)
+ }
+}
object CometSecondsToTimestamp
extends CometScalarFunction[SecondsToTimestamp]("seconds_to_timestamp") {
diff --git
a/spark/src/test/resources/sql-tests/expressions/datetime/make_date_ansi.sql
b/spark/src/test/resources/sql-tests/expressions/datetime/make_date_ansi.sql
new file mode 100644
index 0000000000..44880e96b1
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/datetime/make_date_ansi.sql
@@ -0,0 +1,40 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode: Spark's MakeDate wraps the java.time.DateTimeException raised by
LocalDate.of in
+-- ansiDateTimeArgumentOutOfRange (4.0, DATETIME_FIELD_OUT_OF_BOUNDS) /
ansiDateTimeError (3.4/3.5)
+-- when spark.sql.ansi.enabled=true. Comet's native SparkMakeDate now throws
the same
+-- java.time-style message under ANSI instead of returning NULL. The
expect_error patterns below
+-- are substrings of that message and match across Spark versions.
+-- Config: spark.sql.ansi.enabled=true
+
+-- Sentinel: a valid date must still execute natively under ANSI. This guards
against the
+-- expect_error queries passing vacuously if make_date were to fall back to
Spark.
+query
+SELECT make_date(2024, 2, 28)
+
+-- February 30 is not a valid date.
+query expect_error(Invalid date)
+SELECT make_date(2024, 2, 30)
+
+-- Month 13 is out of range.
+query expect_error(MonthOfYear)
+SELECT make_date(2024, 13, 1)
+
+-- Day 0 is out of range.
+query expect_error(DayOfMonth)
+SELECT make_date(2024, 6, 0)
diff --git
a/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql
b/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql
index 057c9daea6..dc57b5e872 100644
--- a/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql
+++ b/spark/src/test/resources/sql-tests/expressions/datetime/next_day.sql
@@ -72,3 +72,8 @@ SELECT next_day(date('2023-01-01'), 'Monday'),
next_day(date('2023-01-01'), 'Sun
-- null handling
query
SELECT next_day(NULL, 'Monday'), next_day(date('2023-01-01'), NULL)
+
+-- whitespace is NOT trimmed before matching the day name (Spark matches
character-for-character),
+-- so a padded value does not match any day and yields NULL (non-ANSI), same
as Spark.
+query
+SELECT next_day(date('2024-01-01'), ' MO '), next_day(date('2024-01-01'), 'MO
')
diff --git
a/spark/src/test/resources/sql-tests/expressions/datetime/next_day_ansi.sql
b/spark/src/test/resources/sql-tests/expressions/datetime/next_day_ansi.sql
new file mode 100644
index 0000000000..f289f5b34d
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/datetime/next_day_ansi.sql
@@ -0,0 +1,35 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ANSI mode: Spark's NextDay throws on a malformed dayOfWeek
(SparkIllegalArgumentException /
+-- ILLEGAL_DAY_OF_WEEK on 3.5+, IllegalArgumentException on 3.4) when
spark.sql.ansi.enabled=true.
+-- Comet's native next_day now throws the same "Illegal input for day of week"
message under ANSI
+-- instead of returning NULL.
+-- Config: spark.sql.ansi.enabled=true
+
+-- Sentinel: a recognised day name must still execute natively under ANSI.
This guards against the
+-- expect_error queries passing vacuously if next_day were to fall back to
Spark.
+query
+SELECT next_day(date('2024-01-01'), 'Monday')
+
+-- Unrecognised day name.
+query expect_error(Illegal input for day of week)
+SELECT next_day(date('2024-01-01'), 'NOT_A_DAY')
+
+-- Empty string is not a valid day name.
+query expect_error(Illegal input for day of week)
+SELECT next_day(date('2024-01-01'), '')
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]