This is an automated email from the ASF dual-hosted git repository.

rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c386319dec GH-34210: [C++] Make casting timestamp and duration 
zero-copy when TimeUnit matches (#34270)
c386319dec is described below

commit c386319decc58dadb86c3e620613e118a458dae5
Author: Rok Mihevc <[email protected]>
AuthorDate: Fri Mar 3 23:23:09 2023 +0100

    GH-34210: [C++] Make casting timestamp and duration zero-copy when TimeUnit 
matches (#34270)
    
    ### Rationale for this change
    
    Casting from e.g. `timestamp(s, "UTC")` to `timestamp(s)` could be a 
metadata only change, but is currently a multiplication operation.
    
    ### What changes are included in this PR?
    
    This change adds a zero-copy casting path for durations that have equal 
units and timestamps that have equal units and potentially different timezones.
    
    ### Are these changes tested?
    
    We test for correctness and zero-copy.
    
    ### Are there any user-facing changes?
    
    No.
    * Closes: #34210
    
    Authored-by: Rok Mihevc <[email protected]>
    Signed-off-by: Rok Mihevc <[email protected]>
---
 .../arrow/compute/kernels/scalar_cast_temporal.cc  | 35 ++++++++++++++++------
 cpp/src/arrow/compute/kernels/scalar_cast_test.cc  |  8 +++++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index 375cb0a0da..50d24ecab0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -147,17 +147,24 @@ struct CastFunctor<
     enable_if_t<(is_timestamp_type<O>::value && is_timestamp_type<I>::value) ||
                 (is_duration_type<O>::value && is_duration_type<I>::value)>> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
-    const ArraySpan& input = batch[0].array;
-    ArraySpan* output = out->array_span_mutable();
-
     const auto& in_type = checked_cast<const I&>(*batch[0].type());
-    const auto& out_type = checked_cast<const O&>(*output->type);
+    const auto& out_type = checked_cast<const O&>(*out->type());
 
-    // The units may be equal if the time zones are different. We might go to
-    // lengths to make this zero copy in the future but we leave it for now
+    if (in_type.unit() == out_type.unit()) {
+      return ZeroCopyCastExec(ctx, batch, out);
+    }
+
+    ArrayData* out_arr = out->array_data().get();
+    DCHECK_EQ(0, out_arr->offset);
+    int value_size = batch[0].type()->byte_width();
+    DCHECK_OK(ctx->Allocate(out_arr->length * 
value_size).Value(&out_arr->buffers[1]));
+
+    ArraySpan output_span;
+    output_span.SetMembers(*out_arr);
+    const ArraySpan& input = batch[0].array;
     auto conversion = util::GetTimestampConversion(in_type.unit(), 
out_type.unit());
     return ShiftTime<int64_t, int64_t>(ctx, conversion.first, 
conversion.second, input,
-                                       output);
+                                       &output_span);
   }
 };
 
@@ -452,6 +459,16 @@ void AddCrossUnitCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
 }
 
+template <typename Type>
+void AddCrossUnitCastNoPreallocate(CastFunction* func) {
+  ScalarKernel kernel;
+  kernel.exec = CastFunctor<Type, Type>::Exec;
+  kernel.null_handling = NullHandling::INTERSECTION;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.signature = KernelSignature::Make({InputType(Type::type_id)}, 
kOutputTargetType);
+  DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
+}
+
 std::shared_ptr<CastFunction> GetDate32Cast() {
   auto func = std::make_shared<CastFunction>("cast_date32", Type::DATE32);
   auto out_ty = date32();
@@ -499,7 +516,7 @@ std::shared_ptr<CastFunction> GetDurationCast() {
   AddZeroCopyCast(Type::INT64, /*in_type=*/int64(), kOutputTargetType, 
func.get());
 
   // Between durations
-  AddCrossUnitCast<DurationType>(func.get());
+  AddCrossUnitCastNoPreallocate<DurationType>(func.get());
 
   return func;
 }
@@ -574,7 +591,7 @@ std::shared_ptr<CastFunction> GetTimestampCast() {
                                                 func.get());
 
   // From one timestamp to another
-  AddCrossUnitCast<TimestampType>(func.get());
+  AddCrossUnitCastNoPreallocate<TimestampType>(func.get());
 
   return func;
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 85da81357b..a7613eb2b8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1067,6 +1067,10 @@ TEST(Cast, TimestampToTimestamp) {
     CheckCast(will_be_truncated, coarse, options);
   }
 
+  options.to_type = timestamp(TimeUnit::SECOND);
+  CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[0, null, 200, 
1, 2]"),
+            ArrayFromJSON(timestamp(TimeUnit::SECOND), "[0, null, 200, 1, 
2]"), options);
+
   for (auto types : {
            TimestampTypePair{timestamp(TimeUnit::SECOND), 
timestamp(TimeUnit::MICRO)},
            TimestampTypePair{timestamp(TimeUnit::MILLI), 
timestamp(TimeUnit::NANO)},
@@ -1125,6 +1129,10 @@ TEST(Cast, TimestampZeroCopy) {
   }
   CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
                     timestamp(TimeUnit::SECOND));
+
+  CheckCastZeroCopy(
+      ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[0, null, 2000, 1000, 
0]"),
+      timestamp(TimeUnit::SECOND));
 }
 
 TEST(Cast, TimestampToTimestampMultiplyOverflow) {

Reply via email to