Jefffrey commented on code in PR #22591:
URL: https://github.com/apache/datafusion/pull/22591#discussion_r3328119195
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -177,82 +177,97 @@ fn is_date_minus_date(lhs: &DataType, rhs: &DataType) ->
bool {
)
}
-/// Computes the difference between two dates and returns the result as Int64
(days)
-/// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date
returns an integer
+/// Milliseconds per day, used for Date64 subtraction.
+const MILLIS_PER_DAY: i64 = 86_400_000;
+
+/// Evaluates `Date32 - Date32` or `Date64 - Date64`, returning the difference
in
+/// whole days as `Int64`.
///
-/// Implementation: Uses Arrow's sub_wrapping to get Duration, then converts
to Int64 days
+/// This matches the behavior of PostgreSQL, DuckDB, and MySQL, where
+/// `date - date` yields an integer day count rather than an interval.
fn apply_date_subtraction(
lhs: &ColumnarValue,
rhs: &ColumnarValue,
) -> Result<ColumnarValue> {
- use arrow::compute::kernels::numeric::sub_wrapping;
-
- // Use Arrow's sub_wrapping to compute the Duration result
- let duration_result = apply(lhs, rhs, sub_wrapping)?;
-
- // Convert Duration to Int64 (days)
- match duration_result {
- ColumnarValue::Array(array) => {
- let int64_array = duration_to_days(&array)?;
- Ok(ColumnarValue::Array(int64_array))
+ match (lhs.data_type(), rhs.data_type()) {
+ (DataType::Date32, DataType::Date32) => {
+ subtract_date_to_days::<Date32Type>(lhs, rhs, |l, r| {
+ i64::from(l) - i64::from(r)
+ })
}
- ColumnarValue::Scalar(scalar) => {
- // Convert scalar Duration to Int64 days
- let array = scalar.to_array_of_size(1)?;
- let int64_array = duration_to_days(&array)?;
- let int64_scalar =
ScalarValue::try_from_array(int64_array.as_ref(), 0)?;
- Ok(ColumnarValue::Scalar(int64_scalar))
+ (DataType::Date64, DataType::Date64) => {
+ subtract_date_to_days::<Date64Type>(lhs, rhs, |l, r| {
+ l.wrapping_sub(r) / MILLIS_PER_DAY
+ })
}
+ (_, _) => unreachable!("apply_date_subtraction called with non-date
types"),
}
}
-/// Converts a Duration array to Int64 days
-/// Handles different Duration time units (Second, Millisecond, Microsecond,
Nanosecond)
-fn duration_to_days(array: &ArrayRef) -> Result<ArrayRef> {
- use datafusion_common::cast::{
- as_duration_microsecond_array, as_duration_millisecond_array,
- as_duration_nanosecond_array, as_duration_second_array,
- };
+/// Generic date subtraction: operates directly on the native primitive values
+/// of `T` (i32 for Date32, i64 for Date64), applying `day_diff_fn` to produce
+/// an Int64 day count.
+fn subtract_date_to_days<T: ArrowPrimitiveType>(
+ lhs: &ColumnarValue,
+ rhs: &ColumnarValue,
+ day_diff_fn: impl Fn(T::Native, T::Native) -> i64,
+) -> Result<ColumnarValue>
+where
+ T::Native: Copy,
+{
+ /// Extract the native value from a scalar by converting to a
single-element
+ /// array and reading index 0. Returns `None` for null scalars.
+ fn scalar_to_native<P: ArrowPrimitiveType>(
+ scalar: &ScalarValue,
+ ) -> Result<Option<P::Native>> {
+ if scalar.is_null() {
+ return Ok(None);
+ }
+ let array = scalar.to_array_of_size(1)?;
+ Ok(Some(array.as_primitive::<P>().value(0)))
+ }
- const SECONDS_PER_DAY: i64 = 86_400;
- const MILLIS_PER_DAY: i64 = 86_400_000;
- const MICROS_PER_DAY: i64 = 86_400_000_000;
- const NANOS_PER_DAY: i64 = 86_400_000_000_000;
-
- match array.data_type() {
- DataType::Duration(TimeUnit::Second) => {
- let duration_array = as_duration_second_array(array)?;
- let result: Int64Array = duration_array
- .iter()
- .map(|v| v.map(|val| val / SECONDS_PER_DAY))
- .collect();
- Ok(Arc::new(result))
+ match (lhs, rhs) {
+ (ColumnarValue::Array(left), ColumnarValue::Array(right)) => {
+ let left = left.as_primitive::<T>();
+ let right = right.as_primitive::<T>();
+ let result: Int64Array =
+ arrow::compute::binary::<_, _, _, Int64Type>(left, right,
&day_diff_fn)?;
+ Ok(ColumnarValue::Array(Arc::new(result)))
}
- DataType::Duration(TimeUnit::Millisecond) => {
- let duration_array = as_duration_millisecond_array(array)?;
- let result: Int64Array = duration_array
- .iter()
- .map(|v| v.map(|val| val / MILLIS_PER_DAY))
- .collect();
- Ok(Arc::new(result))
+ (ColumnarValue::Array(left), ColumnarValue::Scalar(right)) => {
+ let left = left.as_primitive::<T>();
+ match scalar_to_native::<T>(right)? {
+ Some(right_val) => {
+ let result: Int64Array = left.unary(|l| day_diff_fn(l,
right_val));
+ Ok(ColumnarValue::Array(Arc::new(result)))
+ }
+ None => Ok(ColumnarValue::Array(new_null_array(
Review Comment:
Can return a null scalar here instead of a null array
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -177,82 +177,97 @@ fn is_date_minus_date(lhs: &DataType, rhs: &DataType) ->
bool {
)
}
-/// Computes the difference between two dates and returns the result as Int64
(days)
-/// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date
returns an integer
+/// Milliseconds per day, used for Date64 subtraction.
+const MILLIS_PER_DAY: i64 = 86_400_000;
+
+/// Evaluates `Date32 - Date32` or `Date64 - Date64`, returning the difference
in
+/// whole days as `Int64`.
///
-/// Implementation: Uses Arrow's sub_wrapping to get Duration, then converts
to Int64 days
+/// This matches the behavior of PostgreSQL, DuckDB, and MySQL, where
+/// `date - date` yields an integer day count rather than an interval.
fn apply_date_subtraction(
lhs: &ColumnarValue,
rhs: &ColumnarValue,
) -> Result<ColumnarValue> {
- use arrow::compute::kernels::numeric::sub_wrapping;
-
- // Use Arrow's sub_wrapping to compute the Duration result
- let duration_result = apply(lhs, rhs, sub_wrapping)?;
-
- // Convert Duration to Int64 (days)
- match duration_result {
- ColumnarValue::Array(array) => {
- let int64_array = duration_to_days(&array)?;
- Ok(ColumnarValue::Array(int64_array))
+ match (lhs.data_type(), rhs.data_type()) {
+ (DataType::Date32, DataType::Date32) => {
+ subtract_date_to_days::<Date32Type>(lhs, rhs, |l, r| {
+ i64::from(l) - i64::from(r)
+ })
}
- ColumnarValue::Scalar(scalar) => {
- // Convert scalar Duration to Int64 days
- let array = scalar.to_array_of_size(1)?;
- let int64_array = duration_to_days(&array)?;
- let int64_scalar =
ScalarValue::try_from_array(int64_array.as_ref(), 0)?;
- Ok(ColumnarValue::Scalar(int64_scalar))
+ (DataType::Date64, DataType::Date64) => {
+ subtract_date_to_days::<Date64Type>(lhs, rhs, |l, r| {
+ l.wrapping_sub(r) / MILLIS_PER_DAY
+ })
}
+ (_, _) => unreachable!("apply_date_subtraction called with non-date
types"),
}
}
-/// Converts a Duration array to Int64 days
-/// Handles different Duration time units (Second, Millisecond, Microsecond,
Nanosecond)
-fn duration_to_days(array: &ArrayRef) -> Result<ArrayRef> {
- use datafusion_common::cast::{
- as_duration_microsecond_array, as_duration_millisecond_array,
- as_duration_nanosecond_array, as_duration_second_array,
- };
+/// Generic date subtraction: operates directly on the native primitive values
+/// of `T` (i32 for Date32, i64 for Date64), applying `day_diff_fn` to produce
+/// an Int64 day count.
+fn subtract_date_to_days<T: ArrowPrimitiveType>(
+ lhs: &ColumnarValue,
+ rhs: &ColumnarValue,
+ day_diff_fn: impl Fn(T::Native, T::Native) -> i64,
+) -> Result<ColumnarValue>
+where
+ T::Native: Copy,
+{
+ /// Extract the native value from a scalar by converting to a
single-element
+ /// array and reading index 0. Returns `None` for null scalars.
+ fn scalar_to_native<P: ArrowPrimitiveType>(
Review Comment:
I wonder if we're better off destructuring the scalar value directly instead
of needing to roundtrip through an array just to extract the scalar value 🤔
Tradeoff being that it'd be more verbose in terms of code
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]