This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 535b925bf0 GH-39232: [C++] Support binary to fixed_size_binary cast
(#39236)
535b925bf0 is described below
commit 535b925bf073fb1af4e6e23ab54027f30dc8751f
Author: Jin Shang <[email protected]>
AuthorDate: Fri Dec 22 01:34:06 2023 +0800
GH-39232: [C++] Support binary to fixed_size_binary cast (#39236)
### Rationale for this change
Add binary to fixed_size_binary cast.
### What changes are included in this PR?
Add binary to fixed_size_binary cast.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* Closes: #39232
Authored-by: Jin Shang <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
.../arrow/compute/kernels/scalar_cast_string.cc | 61 +++++++++++++++++++---
cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 16 ++++++
2 files changed, 69 insertions(+), 8 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index ebeb597207..a6576e4e4c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -20,11 +20,14 @@
#include "arrow/array/array_base.h"
#include "arrow/array/builder_binary.h"
+#include "arrow/compute/kernels/base_arithmetic_internal.h"
#include "arrow/compute/kernels/codegen_internal.h"
#include "arrow/compute/kernels/common_internal.h"
#include "arrow/compute/kernels/scalar_cast_internal.h"
#include "arrow/compute/kernels/temporal_internal.h"
#include "arrow/result.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
#include "arrow/util/formatting.h"
#include "arrow/util/int_util.h"
#include "arrow/util/utf8_internal.h"
@@ -284,9 +287,8 @@ Status CastBinaryToBinaryOffsets<int64_t,
int32_t>(KernelContext* ctx,
}
template <typename O, typename I>
-enable_if_base_binary<I, Status> BinaryToBinaryCastExec(KernelContext* ctx,
- const ExecSpan& batch,
- ExecResult* out) {
+enable_if_t<is_base_binary_type<I>::value &&
!is_fixed_size_binary_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
const CastOptions& options = checked_cast<const
CastState&>(*ctx->state()).options;
const ArraySpan& input = batch[0].array;
@@ -387,6 +389,33 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan&
batch, ExecResult* ou
return ZeroCopyCastExec(ctx, batch, out);
}
+template <typename O, typename I>
+enable_if_t<is_base_binary_type<I>::value && std::is_same<O,
FixedSizeBinaryType>::value,
+ Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
+ const CastOptions& options = checked_cast<const
CastState&>(*ctx->state()).options;
+ FixedSizeBinaryBuilder builder(options.to_type.GetSharedPtr(),
ctx->memory_pool());
+ const ArraySpan& input = batch[0].array;
+ RETURN_NOT_OK(builder.Reserve(input.length));
+
+ RETURN_NOT_OK(VisitArraySpanInline<I>(
+ input,
+ [&](std::string_view v) {
+ if (v.size() != static_cast<size_t>(builder.byte_width())) {
+ return Status::Invalid("Failed casting from ",
input.type->ToString(), " to ",
+ options.to_type.ToString(), ": widths must
match");
+ }
+ builder.UnsafeAppend(v);
+ return Status::OK();
+ },
+ [&] {
+ builder.UnsafeAppendNull();
+ return Status::OK();
+ }));
+
+ return
builder.FinishInternal(&std::get<std::shared_ptr<ArrayData>>(out->value));
+}
+
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
@@ -452,6 +481,26 @@ void AddBinaryToBinaryCast(CastFunction* func) {
AddBinaryToBinaryCast<OutType, FixedSizeBinaryType>(func);
}
+template <typename InType>
+void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
+ auto resolver_fsb = [](KernelContext* ctx, const std::vector<TypeHolder>&) {
+ const CastOptions& options = checked_cast<const
CastState&>(*ctx->state()).options;
+ return options.to_type;
+ };
+
+ DCHECK_OK(func->AddKernel(InType::type_id, {InputType(InType::type_id)},
resolver_fsb,
+ BinaryToBinaryCastExec<FixedSizeBinaryType,
InType>,
+ NullHandling::COMPUTED_NO_PREALLOCATE));
+}
+
+void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
+ AddBinaryToFixedSizeBinaryCast<StringType>(func);
+ AddBinaryToFixedSizeBinaryCast<BinaryType>(func);
+ AddBinaryToFixedSizeBinaryCast<LargeStringType>(func);
+ AddBinaryToFixedSizeBinaryCast<LargeBinaryType>(func);
+ AddBinaryToFixedSizeBinaryCast<FixedSizeBinaryType>(func);
+}
+
} // namespace
std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
@@ -483,11 +532,7 @@ std::vector<std::shared_ptr<CastFunction>>
GetBinaryLikeCasts() {
std::make_shared<CastFunction>("cast_fixed_size_binary",
Type::FIXED_SIZE_BINARY);
AddCommonCasts(Type::FIXED_SIZE_BINARY, OutputType(ResolveOutputFromOptions),
cast_fsb.get());
- DCHECK_OK(cast_fsb->AddKernel(
- Type::FIXED_SIZE_BINARY, {InputType(Type::FIXED_SIZE_BINARY)},
- OutputType(FirstType),
- BinaryToBinaryCastExec<FixedSizeBinaryType, FixedSizeBinaryType>,
- NullHandling::COMPUTED_NO_PREALLOCATE));
+ AddBinaryToFixedSizeBinaryCast(cast_fsb.get());
return {cast_binary, cast_large_binary, cast_string, cast_large_string,
cast_fsb};
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index c84125bbdd..b429c8175b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -2171,6 +2171,22 @@ TEST(Cast, StringToString) {
}
}
+TEST(Cast, BinaryOrStringToFixedSizeBinary) {
+ for (auto in_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+ auto valid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz",
"quu"])");
+ auto invalid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz",
"quux"])");
+
+ CheckCast(valid_input, ArrayFromJSON(fixed_size_binary(3), R"(["foo",
null, "bar",
+ "baz", "quu"])"));
+ CheckCastFails(invalid_input, CastOptions::Safe(fixed_size_binary(3)));
+ CheckCastFails(valid_input, CastOptions::Safe(fixed_size_binary(5)));
+
+ auto empty_input = ArrayFromJSON(in_type, "[]");
+ CheckCast(empty_input, ArrayFromJSON(fixed_size_binary(3), "[]"));
+ CheckCast(empty_input, ArrayFromJSON(fixed_size_binary(5), "[]"));
+ }
+}
+
TEST(Cast, IntToString) {
for (auto string_type : {utf8(), large_utf8()}) {
CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"),