pitrou commented on code in PR #43302:
URL: https://github.com/apache/arrow/pull/43302#discussion_r1705206994
##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -305,19 +310,198 @@ BinaryToBinaryCastExec(KernelContext* ctx, const
ExecSpan& batch, ExecResult* ou
ctx, input, out->array_data().get());
}
+// View -> Span
+template <typename O, typename I>
+enable_if_t<is_binary_view_like_type<I>::value &&
is_base_binary_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
+ using OutputBuilderType = typename TypeTraits<O>::BuilderType;
+ const CastOptions& options = checked_cast<const
CastState&>(*ctx->state()).options;
+ const ArraySpan& input = batch[0].array;
+
+ if constexpr (!I::is_utf8 && O::is_utf8) {
+ if (!options.allow_invalid_utf8) {
+ InitializeUTF8();
+ ArraySpanVisitor<I> visitor;
+ Utf8Validator validator;
+ RETURN_NOT_OK(visitor.Visit(input, &validator));
+ }
+ }
+
+ // XXX: a more efficient implementation that zero-copies the validity bitmap
+ // is possible, but requires a more complex implementation for building the
+ // offsets and data buffers
Review Comment:
> How can I do that while still using the builders? I will create an issue
and link it from the comments.
You could instead use individual TypedBufferBuilders for the offsets and
data buffers. That shouldn't add a lot of complexity.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]