mapleFU commented on code in PR #44822:
URL: https://github.com/apache/arrow/pull/44822#discussion_r1878168755
##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -327,31 +329,49 @@ BinaryToBinaryCastExec(KernelContext* ctx, const
ExecSpan& batch, ExecResult* ou
}
}
+ ArrayData* output = out->array_data().get();
+ output->length = input.length;
+ output->SetNullCount(input.null_count);
+
+ // Set up validity bitmap
+ if (input.offset == output->offset) {
+ output->buffers[0] = input.GetBuffer(0);
+ } else {
+ // When the offsets are different (e.g., due to slice operation), we need
to check if
+ // the null bitmap buffer is not null before copying it. The null bitmap
buffer can be
+ // null if the input array value does not contain any null value.
+ if (input.buffers[0].data != NULLPTR) {
+ ARROW_ASSIGN_OR_RAISE(
+ output->buffers[0],
+ arrow::internal::CopyBitmap(ctx->memory_pool(),
input.buffers[0].data,
+ input.offset, input.length));
+ }
+ }
+
+ // Set up offset and data buffer
+ OffsetBuilder offset_builder(ctx->memory_pool());
+ RETURN_NOT_OK(offset_builder.Reserve(batch.length + 1));
+ offset_builder.UnsafeAppend(0); // offsets start at 0
const int64_t sum_of_binary_view_sizes = util::SumOfBinaryViewSizes(
input.GetValues<BinaryViewType::c_type>(1), input.length);
-
- // TODO(GH-43573): A more efficient implementation that copies the validity
- // bitmap all at once is possible, but would mean we don't delegate all the
- // building logic to the ArrayBuilder implementation for the output type.
- OutputBuilderType builder(options.to_type.GetSharedPtr(),
ctx->memory_pool());
- RETURN_NOT_OK(builder.Resize(input.length));
- RETURN_NOT_OK(builder.ReserveData(sum_of_binary_view_sizes));
- arrow::internal::ArraySpanInlineVisitor<I> visitor;
- RETURN_NOT_OK(visitor.VisitStatus(
- input,
- [&](std::string_view v) {
- // Append valid string view
- return builder.Append(v);
+ DataBuilder data_builder(ctx->memory_pool());
+ RETURN_NOT_OK(data_builder.Reserve(sum_of_binary_view_sizes));
+ RETURN_NOT_OK(VisitArraySpanInline<I>(
+ batch[0].array,
+ [&](std::string_view s) {
+ // for non-null value, append string view to buffer and calculate
offset
Review Comment:
what does calculate offset means?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]