amol- commented on code in PR #39566:
URL: https://github.com/apache/arrow/pull/39566#discussion_r1452392211


##########
cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc:
##########
@@ -660,29 +660,95 @@ Result<std::shared_ptr<ChunkedArray>> TakeCA(const 
ChunkedArray& values,
   auto num_chunks = values.num_chunks();
   std::shared_ptr<Array> current_chunk;
 
-  // Case 1: `values` has a single chunk, so just use it
-  if (num_chunks == 1) {
-    current_chunk = values.chunk(0);
-  } else {
-    // TODO Case 2: See if all `indices` fall in the same chunk and call Array 
Take on it
-    // See
-    // 
https://github.com/apache/arrow/blob/6f2c9041137001f7a9212f244b51bc004efc29af/r/src/compute.cpp#L123-L151
-    // TODO Case 3: If indices are sorted, can slice them and call Array Take
-
-    // Case 4: Else, concatenate chunks and call Array Take
+  if (indices.length() == 0) {
+    // Case 0: No indices were provided, nothing to take so return an empty 
chunked array
+    return ChunkedArray::MakeEmpty(values.type());
+  } else if (num_chunks < 2) {
+    // Case 1: `values` is empty or has a single chunk, so just use it
     if (values.chunks().empty()) {
       ARROW_ASSIGN_OR_RAISE(current_chunk, MakeArrayOfNull(values.type(), 
/*length=*/0,
                                                            
ctx->memory_pool()));
     } else {
-      ARROW_ASSIGN_OR_RAISE(current_chunk,
-                            Concatenate(values.chunks(), ctx->memory_pool()));
+      current_chunk = values.chunk(0);
     }
+    // Call Array Take on our single chunk
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> new_chunk,
+                          TakeAA(current_chunk->data(), indices.data(), 
options, ctx));
+    std::vector<std::shared_ptr<Array>> chunks = {MakeArray(new_chunk)};
+    return std::make_shared<ChunkedArray>(std::move(chunks));
+  } else {
+    // For each index, lookup at which chunk it refers to.
+    // We have to do this because the indices are not necessarily sorted.
+    // So we can't simply iterate over chunks and pick the slices we need.
+    std::vector<int64_t> indices_chunks(indices.length());
+    std::vector<int64_t> offsetted_indices(indices.length());
+    for (int64_t requested_index = 0; requested_index < indices.length();
+         ++requested_index) {
+      uint64_t index;
+      switch (indices.type()->byte_width()) {

Review Comment:
   Migrated to `type()->id()`. Note that I'm still relying on the fact that the 
user will never pass negative indices as otherwise I would have to double the 
switch cases unless there is some kind of helper that can always return a 
uint64 for any provided integer value and error for values <0



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to