zanmato1984 commented on code in PR #48171:
URL: https://github.com/apache/arrow/pull/48171#discussion_r2597683183


##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -304,10 +305,33 @@ BinaryToBinaryCastExec(KernelContext* ctx, const 
ExecSpan& batch, ExecResult* ou
     }
   }
 
-  // Start with a zero-copy cast, but change indices to expected size
-  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
-  return CastBinaryToBinaryOffsets<typename I::offset_type, typename 
O::offset_type>(
-      ctx, input, out->array_data().get());
+  if constexpr (sizeof(typename I::offset_type) != sizeof(typename 
O::offset_type)) {
+    std::shared_ptr<ArrayData> input_arr = input.ToArrayData();
+    ArrayData* output = out->array_data().get();
+    output->length = input_arr->length;
+    // output->offset is set below
+    output->SetNullCount(input_arr->null_count);
+    output->buffers = std::move(input_arr->buffers);
+
+    // Slice buffers to reduce allocation when casting the offsets buffer
+    int64_t offset = input_arr->offset;
+    size_t input_offset_type_size = sizeof(typename I::offset_type);
+    if (output->null_count != 0 && output->buffers[0]) {
+      // Avoid reallocation of the validity buffer by allowing some padding 
bits
+      output->offset = input_arr->offset % 8;
+    } else {
+      output->offset = 0;
+    }
+    if (output->buffers[0]) {
+      output->buffers[0] = SliceBuffer(output->buffers[0], offset / 8);
+    }
+    output->buffers[1] = SliceBuffer(output->buffers[1], offset * 
input_offset_type_size);

Review Comment:
   The current code seems correct to me. I'm not sure what's the concern here.



##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -304,10 +305,34 @@ BinaryToBinaryCastExec(KernelContext* ctx, const 
ExecSpan& batch, ExecResult* ou
     }
   }
 
-  // Start with a zero-copy cast, but change indices to expected size
-  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
-  return CastBinaryToBinaryOffsets<typename I::offset_type, typename 
O::offset_type>(
-      ctx, input, out->array_data().get());
+  if constexpr (sizeof(typename I::offset_type) != sizeof(typename 
O::offset_type)) {
+    std::shared_ptr<ArrayData> input_arr = input.ToArrayData();
+    ArrayData* output = out->array_data().get();
+    output->length = input_arr->length;
+    // output->offset is set below
+    output->SetNullCount(input_arr->null_count);
+    output->buffers = std::move(input_arr->buffers);
+    // binary/string arrays don't have child_data
+
+    // Slice buffers to reduce allocation when casting the offsets buffer
+    int64_t offset = input_arr->offset;

Review Comment:
   More explicit naming to distinguish input vs. output.



##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -304,10 +305,34 @@ BinaryToBinaryCastExec(KernelContext* ctx, const 
ExecSpan& batch, ExecResult* ou
     }
   }
 
-  // Start with a zero-copy cast, but change indices to expected size
-  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
-  return CastBinaryToBinaryOffsets<typename I::offset_type, typename 
O::offset_type>(
-      ctx, input, out->array_data().get());
+  if constexpr (sizeof(typename I::offset_type) != sizeof(typename 
O::offset_type)) {
+    std::shared_ptr<ArrayData> input_arr = input.ToArrayData();
+    ArrayData* output = out->array_data().get();
+    output->length = input_arr->length;
+    // output->offset is set below
+    output->SetNullCount(input_arr->null_count);
+    output->buffers = std::move(input_arr->buffers);
+    // binary/string arrays don't have child_data
+
+    // Slice buffers to reduce allocation when casting the offsets buffer
+    int64_t offset = input_arr->offset;

Review Comment:
   ```suggestion
       int64_t input_offset = input_arr->offset;
   ```



##########
cpp/src/arrow/compute/kernels/scalar_cast_string.cc:
##########
@@ -304,10 +305,34 @@ BinaryToBinaryCastExec(KernelContext* ctx, const 
ExecSpan& batch, ExecResult* ou
     }
   }
 
-  // Start with a zero-copy cast, but change indices to expected size
-  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
-  return CastBinaryToBinaryOffsets<typename I::offset_type, typename 
O::offset_type>(
-      ctx, input, out->array_data().get());
+  if constexpr (sizeof(typename I::offset_type) != sizeof(typename 
O::offset_type)) {

Review Comment:
   Shall we organize the logic as:
   ```
   RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
   if constexpr (sizeof(typename I::offset_type) != sizeof(typename 
O::offset_type)) {
     // Change output offset and slice buffers correspondingly.
     return CastBinaryToBinaryOffsets<typename I::offset_type, typename 
O::offset_type>(ctx, input, out->array_data().get());
   }
   return Status::OK();
   ```
   to reuse some logic already in `ZeroCopyCastExec`? (I also see other cast 
functions having similar code organization).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to