This is an automated email from the ASF dual-hosted git repository.
rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c386319dec GH-34210: [C++] Make casting timestamp and duration
zero-copy when TimeUnit matches (#34270)
c386319dec is described below
commit c386319decc58dadb86c3e620613e118a458dae5
Author: Rok Mihevc <[email protected]>
AuthorDate: Fri Mar 3 23:23:09 2023 +0100
GH-34210: [C++] Make casting timestamp and duration zero-copy when TimeUnit
matches (#34270)
### Rationale for this change
Casting from e.g. `timestamp(s, "UTC")` to `timestamp(s)` could be a
metadata only change, but is currently a multiplication operation.
### What changes are included in this PR?
This change adds a zero-copy casting path for durations that have equal
units and timestamps that have equal units and potentially different timezones.
### Are these changes tested?
We test for correctness and zero-copy.
### Are there any user-facing changes?
No.
* Closes: #34210
Authored-by: Rok Mihevc <[email protected]>
Signed-off-by: Rok Mihevc <[email protected]>
---
.../arrow/compute/kernels/scalar_cast_temporal.cc | 35 ++++++++++++++++------
cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 8 +++++
2 files changed, 34 insertions(+), 9 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index 375cb0a0da..50d24ecab0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -147,17 +147,24 @@ struct CastFunctor<
enable_if_t<(is_timestamp_type<O>::value && is_timestamp_type<I>::value) ||
(is_duration_type<O>::value && is_duration_type<I>::value)>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
- const ArraySpan& input = batch[0].array;
- ArraySpan* output = out->array_span_mutable();
-
const auto& in_type = checked_cast<const I&>(*batch[0].type());
- const auto& out_type = checked_cast<const O&>(*output->type);
+ const auto& out_type = checked_cast<const O&>(*out->type());
- // The units may be equal if the time zones are different. We might go to
- // lengths to make this zero copy in the future but we leave it for now
+ if (in_type.unit() == out_type.unit()) {
+ return ZeroCopyCastExec(ctx, batch, out);
+ }
+
+ ArrayData* out_arr = out->array_data().get();
+ DCHECK_EQ(0, out_arr->offset);
+ int value_size = batch[0].type()->byte_width();
+ DCHECK_OK(ctx->Allocate(out_arr->length *
value_size).Value(&out_arr->buffers[1]));
+
+ ArraySpan output_span;
+ output_span.SetMembers(*out_arr);
+ const ArraySpan& input = batch[0].array;
auto conversion = util::GetTimestampConversion(in_type.unit(),
out_type.unit());
return ShiftTime<int64_t, int64_t>(ctx, conversion.first,
conversion.second, input,
- output);
+ &output_span);
}
};
@@ -452,6 +459,16 @@ void AddCrossUnitCast(CastFunction* func) {
DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
}
+template <typename Type>
+void AddCrossUnitCastNoPreallocate(CastFunction* func) {
+ ScalarKernel kernel;
+ kernel.exec = CastFunctor<Type, Type>::Exec;
+ kernel.null_handling = NullHandling::INTERSECTION;
+ kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+ kernel.signature = KernelSignature::Make({InputType(Type::type_id)},
kOutputTargetType);
+ DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
+}
+
std::shared_ptr<CastFunction> GetDate32Cast() {
auto func = std::make_shared<CastFunction>("cast_date32", Type::DATE32);
auto out_ty = date32();
@@ -499,7 +516,7 @@ std::shared_ptr<CastFunction> GetDurationCast() {
AddZeroCopyCast(Type::INT64, /*in_type=*/int64(), kOutputTargetType,
func.get());
// Between durations
- AddCrossUnitCast<DurationType>(func.get());
+ AddCrossUnitCastNoPreallocate<DurationType>(func.get());
return func;
}
@@ -574,7 +591,7 @@ std::shared_ptr<CastFunction> GetTimestampCast() {
func.get());
// From one timestamp to another
- AddCrossUnitCast<TimestampType>(func.get());
+ AddCrossUnitCastNoPreallocate<TimestampType>(func.get());
return func;
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 85da81357b..a7613eb2b8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1067,6 +1067,10 @@ TEST(Cast, TimestampToTimestamp) {
CheckCast(will_be_truncated, coarse, options);
}
+ options.to_type = timestamp(TimeUnit::SECOND);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[0, null, 200,
1, 2]"),
+ ArrayFromJSON(timestamp(TimeUnit::SECOND), "[0, null, 200, 1,
2]"), options);
+
for (auto types : {
TimestampTypePair{timestamp(TimeUnit::SECOND),
timestamp(TimeUnit::MICRO)},
TimestampTypePair{timestamp(TimeUnit::MILLI),
timestamp(TimeUnit::NANO)},
@@ -1125,6 +1129,10 @@ TEST(Cast, TimestampZeroCopy) {
}
CheckCastZeroCopy(ArrayFromJSON(int64(), "[0, null, 2000, 1000, 0]"),
timestamp(TimeUnit::SECOND));
+
+ CheckCastZeroCopy(
+ ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[0, null, 2000, 1000,
0]"),
+ timestamp(TimeUnit::SECOND));
}
TEST(Cast, TimestampToTimestampMultiplyOverflow) {