alamb commented on code in PR #5764:
URL: https://github.com/apache/arrow-datafusion/pull/5764#discussion_r1152340222


##########
datafusion/core/tests/sqllogictests/test_files/timestamps.slt:
##########
@@ -237,4 +237,110 @@ SELECT INTERVAL '8' YEAR + 
'2000-01-01T00:00:00'::timestamp;
 query P
 SELECT INTERVAL '8' MONTH + '2000-01-01T00:00:00'::timestamp;
 ----
-2000-09-01T00:00:00
\ No newline at end of file
+2000-09-01T00:00:00
+
+# Interval columns are created with timestamp subtraction in subquery since 
they are not supported yet
+statement ok
+create table foo (val int, ts1 timestamp, ts2 timestamp) as values 
+(1, '2023-03-15T15:00:20.000000123'::timestamp, 
'2023-01-20T23:00:00.000000099'::timestamp),
+(2, '2023-02-28T12:01:55.000123456'::timestamp, 
'2000-02-23T11:00:00.123000001'::timestamp),
+(3, '2033-11-02T23:22:13.000123456'::timestamp, 
'1990-03-01T00:00:00.333000001'::timestamp),
+(4, '2003-07-11T01:31:15.000123456'::timestamp, 
'2045-04-11T15:00:00.000000001'::timestamp);
+
+# Timestamp - Timestamp
+query I?
+SELECT val, ts1 - ts2 FROM foo ORDER BY ts2 - ts1;
+----
+4 0 years 0 mons -15250 days -13 hours -28 mins -44.999876545 secs
+3 0 years 0 mons 15952 days 23 hours 22 mins 12.667123455 secs
+2 0 years 0 mons 8406 days 1 hours 1 mins 54.877123455 secs
+1 0 years 0 mons 53 days 16 hours 0 mins 20.000000024 secs
+
+# Interval - Interval
+query ?
+SELECT subq.interval1 - subq.interval2
+FROM (
+  SELECT ts1 - ts2 AS interval1,
+  ts2 - ts1 AS interval2
+  FROM foo
+) AS subq;
+----
+0 years 0 mons 106 days 32 hours 0 mins 40.000000048 secs
+0 years 0 mons 16812 days 2 hours 3 mins 49.754246910 secs
+0 years 0 mons 31904 days 46 hours 44 mins 25.334246910 secs
+0 years 0 mons -30500 days -26 hours -57 mins -29.999753090 secs
+
+# Interval + Interval
+query ?
+SELECT subq.interval1 + subq.interval2
+FROM (
+  SELECT ts1 - ts2 AS interval1,
+  ts2 - ts1 AS interval2
+  FROM foo
+) AS subq;
+----
+0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs
+0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs
+0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs
+0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs
+
+# Timestamp - Interval
+query P
+SELECT subq.ts1 - subq.interval1
+FROM (
+  SELECT ts1,
+  ts1 - ts2 AS interval1
+  FROM foo
+) AS subq;
+----
+2023-01-20T23:00:00.000000099
+2000-02-23T11:00:00.123000001
+1990-03-01T00:00:00.333000001
+2045-04-11T15:00:00.000000001
+
+# Interval + Timestamp
+query P
+SELECT subq.interval1 + subq.ts1
+FROM (
+  SELECT ts1,
+  ts1 - ts2 AS interval1
+  FROM foo
+) AS subq;
+----
+2023-05-08T07:00:40.000000147
+2046-03-05T13:03:49.877246911
+2077-07-07T22:44:25.667246911
+1961-10-08T12:02:30.000246911
+
+# Timestamp + Interval
+query P
+SELECT  subq.ts1 + subq.interval1
+FROM (
+  SELECT ts1,
+  ts1 - ts2 AS interval1
+  FROM foo
+) AS subq;
+----
+2023-05-08T07:00:40.000000147
+2046-03-05T13:03:49.877246911
+2077-07-07T22:44:25.667246911
+1961-10-08T12:02:30.000246911
+
+# Timestamp + Timestamp => error

Review Comment:
   👍 



##########
datafusion/physical-expr/src/expressions/datetime.rs:
##########
@@ -142,14 +138,30 @@ impl PhysicalExpr for DateTimeIntervalExpr {
                 return Err(DataFusionError::Internal(msg.to_string()));
             }
         };
+        // RHS is first checked. If it is a Scalar, there are 2 options:

Review Comment:
   Longer term I think it would be good to move the date_time arithmetic into 
https://github.com/apache/arrow-datafusion/tree/main/datafusion/physical-expr/src/expressions/binary
 as these really are binary operations
   
   That would also set us up so when the kernels are added to arrow-rs (aka 
part of https://github.com/apache/arrow-rs/issues/3958) it would be easier to 
migrate. 
   
   I like how this PR followed the existing pattern in `DateTimeIntervalExpr` 
even if that may not be our ideal end state



##########
datafusion/physical-expr/src/expressions/datetime.rs:
##########
@@ -239,6 +251,542 @@ pub fn evaluate_array(
     Ok(ColumnarValue::Array(ret))
 }
 
+macro_rules! ts_sub_op {
+    ($lhs:ident, $rhs:ident, $lhs_tz:ident, $rhs_tz:ident, $coef:expr, 
$caster:expr, $op:expr, $ts_unit:expr, $mode:expr, $type_in:ty, $type_out:ty) 
=> {{
+        let prim_array_lhs = $caster(&$lhs)?;
+        let prim_array_rhs = $caster(&$rhs)?;
+        let ret = Arc::new(try_binary_op::<$type_in, $type_in, _, $type_out>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |ts1, ts2| {
+                let (lhs_tz, rhs_tz) =
+                    (parse_timezones($lhs_tz), parse_timezones($rhs_tz));
+                Ok($op(
+                    $ts_unit(&with_timezone_to_naive_datetime::<$mode>(
+                        ts1.mul_wrapping($coef),
+                        &lhs_tz,
+                    )?),
+                    $ts_unit(&with_timezone_to_naive_datetime::<$mode>(
+                        ts2.mul_wrapping($coef),
+                        &rhs_tz,
+                    )?),
+                ))
+            },
+        )?) as ArrayRef;
+        ret
+    }};
+}
+macro_rules! interval_op {
+    ($lhs:ident, $rhs:ident, $caster:expr, $op:expr, $sign:ident, $type_in:ty) 
=> {{
+        let prim_array_lhs = $caster(&$lhs)?;
+        let prim_array_rhs = $caster(&$rhs)?;
+        let ret = Arc::new(binary::<$type_in, $type_in, _, $type_in>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |interval1, interval2| $op(interval1, interval2, $sign),
+        )?) as ArrayRef;
+        ret
+    }};
+}
+macro_rules! interval_cross_op {
+    ($lhs:ident, $rhs:ident, $caster1:expr, $caster2:expr, $op:expr, 
$sign:ident, $commute:ident, $type_in1:ty, $type_in2:ty) => {{
+        let prim_array_lhs = $caster1(&$lhs)?;
+        let prim_array_rhs = $caster2(&$rhs)?;
+        let ret = Arc::new(binary::<$type_in1, $type_in2, _, 
IntervalMonthDayNanoType>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |interval1, interval2| $op(interval1, interval2, $sign, $commute),
+        )?) as ArrayRef;
+        ret
+    }};
+}
+macro_rules! ts_interval_op {
+    ($lhs:ident, $rhs:ident, $caster1:expr, $caster2:expr, $op:expr, 
$sign:ident, $type_in1:ty, $type_in2:ty) => {{
+        let prim_array_lhs = $caster1(&$lhs)?;
+        let prim_array_rhs = $caster2(&$rhs)?;
+        let ret = Arc::new(try_binary_op::<$type_in1, $type_in2, _, $type_in1>(
+            prim_array_lhs,
+            prim_array_rhs,
+            |ts, interval| Ok($op(ts, interval as i128, $sign)?),
+        )?) as ArrayRef;
+        ret
+    }};
+}
+// This function evaluates temporal array operations, such as timestamp - 
timestamp, interval + interval,
+// timestamp + interval, and interval + timestamp. It takes two arrays as 
input and an integer sign representing
+// the operation (+1 for addition and -1 for subtraction). It returns a 
ColumnarValue as output, which can hold
+// either a scalar or an array.
+pub fn evaluate_temporal_arrays(
+    array_lhs: &ArrayRef,
+    sign: i32,
+    array_rhs: &ArrayRef,
+) -> Result<ColumnarValue> {
+    let ret = match (array_lhs.data_type(), array_rhs.data_type()) {
+        // Timestamp - Timestamp operations, operands of only the same types 
are supported.
+        (DataType::Timestamp(_, _), DataType::Timestamp(_, _)) => {
+            ts_array_op(array_lhs, array_rhs)?
+        }
+        // Interval (+ , -) Interval operations
+        (DataType::Interval(_), DataType::Interval(_)) => {
+            interval_array_op(array_lhs, array_rhs, sign)?
+        }
+        // Timestamp (+ , -) Interval and Interval + Timestamp operations
+        // Interval - Timestamp operation is not rational hence not supported
+        (DataType::Timestamp(_, _), DataType::Interval(_)) => {
+            ts_interval_array_op(array_lhs, sign, array_rhs)?
+        }
+        (DataType::Interval(_), DataType::Timestamp(_, _)) if sign == 1 => {
+            ts_interval_array_op(array_rhs, sign, array_lhs)?
+        }
+        (_, _) => Err(DataFusionError::Execution(format!(
+            "Invalid array types for DateIntervalExpr: {:?} {} {:?}",
+            array_lhs.data_type(),
+            sign,
+            array_rhs.data_type()
+        )))?,
+    };
+    Ok(ColumnarValue::Array(ret))
+}
+
+#[inline]
+unsafe fn build_primitive_array<O: ArrowPrimitiveType>(
+    len: usize,
+    buffer: Buffer,
+    null_count: usize,
+    null_buffer: Option<Buffer>,
+) -> PrimitiveArray<O> {
+    PrimitiveArray::from(ArrayData::new_unchecked(
+        O::DATA_TYPE,
+        len,
+        Some(null_count),
+        null_buffer,
+        0,
+        vec![buffer],
+        vec![],
+    ))
+}
+
+pub fn try_binary_op<A, B, F, O>(

Review Comment:
   I tried this and it seems to work well. @berkaysynnada  I made a PR here for 
your consideration https://github.com/synnada-ai/arrow-datafusion/pull/68
   
   It also has the very nice property of removing `unsafe` code from this PR



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to