pitrou commented on code in PR #50164:
URL: https://github.com/apache/arrow/pull/50164#discussion_r3414394862


##########
cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc:
##########
@@ -488,6 +489,123 @@ Status FixedWidthTakeExec(KernelContext* ctx, const 
ExecSpan& batch, ExecResult*
 
 namespace {
 
+template <typename IndexCType>
+Status VarBinaryViewTakeTyped(const ArraySpan& values, const ArraySpan& 
indices,
+                              BinaryViewType::c_type* out_views, uint8_t* 
out_validity,
+                              int64_t* valid_count) {
+  const auto* source_views = values.GetValues<BinaryViewType::c_type>(1);
+  const auto* index_values = indices.GetValues<IndexCType>(1);
+
+  const bool values_may_have_nulls = values.MayHaveNulls();
+  const bool indices_may_have_nulls = indices.MayHaveNulls();
+
+  if (!values_may_have_nulls && !indices_may_have_nulls) {
+    for (int64_t out_i = 0; out_i < indices.length; ++out_i) {
+      out_views[out_i] = 
source_views[static_cast<int64_t>(index_values[out_i])];
+    }
+    *valid_count = indices.length;
+    return Status::OK();
+  }
+
+  for (int64_t out_i = 0; out_i < indices.length; ++out_i) {
+    if (indices_may_have_nulls &&
+        !bit_util::GetBit(indices.buffers[0].data, indices.offset + out_i)) {

Review Comment:
   I think you can use `VisitTwoBitBlocksVoid` which will be more efficient 
than individual calls to `GetBit` for each item.



##########
cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc:
##########
@@ -488,6 +489,123 @@ Status FixedWidthTakeExec(KernelContext* ctx, const 
ExecSpan& batch, ExecResult*
 
 namespace {
 
+template <typename IndexCType>
+Status VarBinaryViewTakeTyped(const ArraySpan& values, const ArraySpan& 
indices,
+                              BinaryViewType::c_type* out_views, uint8_t* 
out_validity,
+                              int64_t* valid_count) {
+  const auto* source_views = values.GetValues<BinaryViewType::c_type>(1);
+  const auto* index_values = indices.GetValues<IndexCType>(1);
+
+  const bool values_may_have_nulls = values.MayHaveNulls();
+  const bool indices_may_have_nulls = indices.MayHaveNulls();
+
+  if (!values_may_have_nulls && !indices_may_have_nulls) {
+    for (int64_t out_i = 0; out_i < indices.length; ++out_i) {
+      out_views[out_i] = 
source_views[static_cast<int64_t>(index_values[out_i])];
+    }
+    *valid_count = indices.length;
+    return Status::OK();
+  }
+
+  for (int64_t out_i = 0; out_i < indices.length; ++out_i) {
+    if (indices_may_have_nulls &&
+        !bit_util::GetBit(indices.buffers[0].data, indices.offset + out_i)) {
+      continue;
+    }
+
+    const int64_t source_i = static_cast<int64_t>(index_values[out_i]);
+    const bool source_valid =
+        !values_may_have_nulls ||
+        bit_util::GetBit(values.buffers[0].data, values.offset + source_i);
+    if (!source_valid) {
+      continue;
+    }
+
+    out_views[out_i] = source_views[source_i];
+    if (out_validity != nullptr) {
+      bit_util::SetBit(out_validity, out_i);
+    }
+    ++(*valid_count);
+  }
+
+  return Status::OK();
+}
+
+Status VarBinaryViewTakeDispatch(const ArraySpan& values, const ArraySpan& 
indices,
+                                 BinaryViewType::c_type* out_views, uint8_t* 
out_validity,
+                                 int64_t* valid_count) {
+  switch (indices.type->byte_width()) {
+    case 1:
+      return VarBinaryViewTakeTyped<uint8_t>(values, indices, out_views, 
out_validity,
+                                             valid_count);
+    case 2:
+      return VarBinaryViewTakeTyped<uint16_t>(values, indices, out_views, 
out_validity,
+                                              valid_count);
+    case 4:
+      return VarBinaryViewTakeTyped<uint32_t>(values, indices, out_views, 
out_validity,
+                                              valid_count);
+    default:
+      DCHECK_EQ(indices.type->byte_width(), 8);
+      return VarBinaryViewTakeTyped<uint64_t>(values, indices, out_views, 
out_validity,
+                                              valid_count);
+  }
+}
+
+}  // namespace
+
+Status VarBinaryViewTakeExec(KernelContext* ctx, const ExecSpan& batch, 
ExecResult* out) {
+  const ArraySpan& values = batch[0].array;
+  const ArraySpan& indices = batch[1].array;
+
+  if (TakeState::Get(ctx).boundscheck) {
+    RETURN_NOT_OK(CheckIndexBounds(indices, values.length));
+  }
+
+  const int64_t out_length = indices.length;
+  const bool may_have_nulls = values.MayHaveNulls() || indices.MayHaveNulls();
+  const auto data_buffers = values.GetVariadicBuffers();
+
+  ARROW_ASSIGN_OR_RAISE(
+      auto views_buf,
+      AllocateBuffer(out_length * 
static_cast<int64_t>(sizeof(BinaryViewType::c_type)),
+                     ctx->memory_pool()));
+  auto* out_views = 
reinterpret_cast<BinaryViewType::c_type*>(views_buf->mutable_data());
+  if (may_have_nulls && views_buf->size() > 0) {
+    std::memset(out_views, 0, views_buf->size());
+  }
+
+  std::shared_ptr<Buffer> validity_buf;
+  uint8_t* out_validity = nullptr;
+  if (may_have_nulls) {
+    ARROW_ASSIGN_OR_RAISE(validity_buf,
+                          AllocateEmptyBitmap(out_length, ctx->memory_pool()));
+    if (validity_buf->size() > 0) {
+      std::memset(validity_buf->mutable_data(), 0, validity_buf->size());
+    }
+    out_validity = validity_buf->mutable_data();
+  }
+
+  int64_t valid_count = 0;
+  RETURN_NOT_OK(
+      VarBinaryViewTakeDispatch(values, indices, out_views, out_validity, 
&valid_count));
+
+  const int64_t null_count = out_length - valid_count;
+  BufferVector buffers;
+  buffers.reserve(2 + data_buffers.size());
+  buffers.push_back(null_count == 0 ? nullptr : std::move(validity_buf));
+  buffers.push_back(std::move(views_buf));
+
+  for (const auto& data_buffer : data_buffers) {
+    buffers.push_back(data_buffer);
+  }

Review Comment:
   This is going to keep alive all data buffers from the input, but it would be 
nice to only keep alive those that are actually used by the output.
   
   Depending on the outcome from https://github.com/apache/arrow/issues/50172 
we may replace the unused buffers with NULL slots, or with zero-sized buffers 
(or we can remap the buffer indices, which is going to be more costly).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to