amol- commented on code in PR #39566:
URL: https://github.com/apache/arrow/pull/39566#discussion_r1452369504
##########
cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc:
##########
@@ -660,29 +660,95 @@ Result<std::shared_ptr<ChunkedArray>> TakeCA(const
ChunkedArray& values,
auto num_chunks = values.num_chunks();
std::shared_ptr<Array> current_chunk;
- // Case 1: `values` has a single chunk, so just use it
- if (num_chunks == 1) {
- current_chunk = values.chunk(0);
- } else {
- // TODO Case 2: See if all `indices` fall in the same chunk and call Array
Take on it
- // See
- //
https://github.com/apache/arrow/blob/6f2c9041137001f7a9212f244b51bc004efc29af/r/src/compute.cpp#L123-L151
- // TODO Case 3: If indices are sorted, can slice them and call Array Take
-
- // Case 4: Else, concatenate chunks and call Array Take
+ if (indices.length() == 0) {
+ // Case 0: No indices were provided, nothing to take so return an empty
chunked array
+ return ChunkedArray::MakeEmpty(values.type());
+ } else if (num_chunks < 2) {
+ // Case 1: `values` is empty or has a single chunk, so just use it
if (values.chunks().empty()) {
ARROW_ASSIGN_OR_RAISE(current_chunk, MakeArrayOfNull(values.type(),
/*length=*/0,
ctx->memory_pool()));
} else {
- ARROW_ASSIGN_OR_RAISE(current_chunk,
- Concatenate(values.chunks(), ctx->memory_pool()));
+ current_chunk = values.chunk(0);
}
+ // Call Array Take on our single chunk
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> new_chunk,
+ TakeAA(current_chunk->data(), indices.data(),
options, ctx));
+ std::vector<std::shared_ptr<Array>> chunks = {MakeArray(new_chunk)};
+ return std::make_shared<ChunkedArray>(std::move(chunks));
+ } else {
+ // For each index, lookup at which chunk it refers to.
Review Comment:
Amazing, I didn't know about `ChunkResolver` seems super useful.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]