felipecrv commented on code in PR #44822:
URL: https://github.com/apache/arrow/pull/44822#discussion_r1878101794
##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -327,31 +329,49 @@ BinaryToBinaryCastExec(KernelContext* ctx, const
ExecSpan& batch, ExecResult* ou
}
}
+ ArrayData* output = out->array_data().get();
+ output->length = input.length;
+ output->SetNullCount(input.null_count);
+
+ // Set up validity bitmap
+ if (input.offset == output->offset) {
+ output->buffers[0] = input.GetBuffer(0);
+ } else {
+ // When the offsets are different (e.g., due to slice operation), we need
to check if
+ // the null bitmap buffer is not null before copying it. The null bitmap
buffer can be
+ // null if the input array value does not contain any null value.
+ if (input.buffers[0].data != NULLPTR) {
+ ARROW_ASSIGN_OR_RAISE(
+ output->buffers[0],
+ arrow::internal::CopyBitmap(ctx->memory_pool(),
input.buffers[0].data,
+ input.offset, input.length));
+ }
+ }
+
+ // Set up offset and data buffer
+ OffsetBuilder offset_builder(ctx->memory_pool());
+ RETURN_NOT_OK(offset_builder.Reserve(batch.length + 1));
Review Comment:
for consistency with the rest of the function
```suggestion
RETURN_NOT_OK(offset_builder.Reserve(input.length + 1));
```
##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -327,31 +329,49 @@ BinaryToBinaryCastExec(KernelContext* ctx, const
ExecSpan& batch, ExecResult* ou
}
}
+ ArrayData* output = out->array_data().get();
+ output->length = input.length;
+ output->SetNullCount(input.null_count);
+
+ // Set up validity bitmap
+ if (input.offset == output->offset) {
+ output->buffers[0] = input.GetBuffer(0);
+ } else {
+ // When the offsets are different (e.g., due to slice operation), we need
to check if
+ // the null bitmap buffer is not null before copying it. The null bitmap
buffer can be
+ // null if the input array value does not contain any null value.
+ if (input.buffers[0].data != NULLPTR) {
+ ARROW_ASSIGN_OR_RAISE(
+ output->buffers[0],
+ arrow::internal::CopyBitmap(ctx->memory_pool(),
input.buffers[0].data,
+ input.offset, input.length));
+ }
+ }
+
+ // Set up offset and data buffer
+ OffsetBuilder offset_builder(ctx->memory_pool());
+ RETURN_NOT_OK(offset_builder.Reserve(batch.length + 1));
+ offset_builder.UnsafeAppend(0); // offsets start at 0
const int64_t sum_of_binary_view_sizes = util::SumOfBinaryViewSizes(
input.GetValues<BinaryViewType::c_type>(1), input.length);
-
- // TODO(GH-43573): A more efficient implementation that copies the validity
- // bitmap all at once is possible, but would mean we don't delegate all the
- // building logic to the ArrayBuilder implementation for the output type.
- OutputBuilderType builder(options.to_type.GetSharedPtr(),
ctx->memory_pool());
- RETURN_NOT_OK(builder.Resize(input.length));
- RETURN_NOT_OK(builder.ReserveData(sum_of_binary_view_sizes));
- arrow::internal::ArraySpanInlineVisitor<I> visitor;
- RETURN_NOT_OK(visitor.VisitStatus(
- input,
- [&](std::string_view v) {
- // Append valid string view
- return builder.Append(v);
+ DataBuilder data_builder(ctx->memory_pool());
+ RETURN_NOT_OK(data_builder.Reserve(sum_of_binary_view_sizes));
+ RETURN_NOT_OK(VisitArraySpanInline<I>(
+ batch[0].array,
Review Comment:
```suggestion
input,
```
##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -327,31 +329,43 @@ BinaryToBinaryCastExec(KernelContext* ctx, const
ExecSpan& batch, ExecResult* ou
}
}
- const int64_t sum_of_binary_view_sizes = util::SumOfBinaryViewSizes(
- input.GetValues<BinaryViewType::c_type>(1), input.length);
-
- // TODO(GH-43573): A more efficient implementation that copies the validity
- // bitmap all at once is possible, but would mean we don't delegate all the
- // building logic to the ArrayBuilder implementation for the output type.
- OutputBuilderType builder(options.to_type.GetSharedPtr(),
ctx->memory_pool());
- RETURN_NOT_OK(builder.Resize(input.length));
- RETURN_NOT_OK(builder.ReserveData(sum_of_binary_view_sizes));
- arrow::internal::ArraySpanInlineVisitor<I> visitor;
- RETURN_NOT_OK(visitor.VisitStatus(
- input,
- [&](std::string_view v) {
- // Append valid string view
- return builder.Append(v);
+ ArrayData* output = out->array_data().get();
+ output->length = input.length;
+ output->SetNullCount(input.null_count);
+
+ // Set up bitmap
+ if (input.offset == output->offset) {
+ output->buffers[0] = input.GetBuffer(0);
+ } else {
+ if (input.buffers[0].data != NULLPTR) {
Review Comment:
Can you copy and paste this utility function [1] to this compilation unit
and call it from here instead?
[1]
https://github.com/apache/arrow/blob/main/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc#L42
(Later the utility could be moved to a .h so it's callable from anywhere and
inlinable. But I'm suggesting a copy because it's tricky to name this function
in an informative and non-error-prone way.)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]