edponce commented on a change in pull request #12055:
URL: https://github.com/apache/arrow/pull/12055#discussion_r825390387



##########
File path: cpp/src/arrow/compute/kernels/chunked_internal.h
##########
@@ -61,97 +62,21 @@ struct ResolvedChunk<Array> {
   bool IsNull() const { return array->IsNull(index); }
 };
 
-struct ChunkLocation {
-  int64_t chunk_index, index_in_chunk;
-};
-
-// An object that resolves an array chunk depending on the index.
-struct ChunkResolver {
-  explicit ChunkResolver(std::vector<int64_t> lengths)
-      : num_chunks_(static_cast<int64_t>(lengths.size())),
-        offsets_(MakeEndOffsets(std::move(lengths))),
-        cached_chunk_(0) {}
-
-  ChunkLocation Resolve(int64_t index) const {
-    // It is common for the algorithms below to make consecutive accesses at
-    // a relatively small distance from each other, hence often falling in
-    // the same chunk.
-    // This is trivial when merging (assuming each side of the merge uses
-    // its own resolver), but also in the inner recursive invocations of
-    // partitioning.
-    const bool cache_hit =
-        (index >= offsets_[cached_chunk_] && index < offsets_[cached_chunk_ + 
1]);
-    if (ARROW_PREDICT_TRUE(cache_hit)) {
-      return {cached_chunk_, index - offsets_[cached_chunk_]};
-    } else {
-      return ResolveMissBisect(index);
-    }
-  }
-
-  static ChunkResolver FromBatches(const RecordBatchVector& batches) {
-    std::vector<int64_t> lengths(batches.size());
-    std::transform(
-        batches.begin(), batches.end(), lengths.begin(),
-        [](const std::shared_ptr<RecordBatch>& batch) { return 
batch->num_rows(); });
-    return ChunkResolver(std::move(lengths));
-  }
-
- protected:
-  ChunkLocation ResolveMissBisect(int64_t index) const {
-    // Like std::upper_bound(), but hand-written as it can help the compiler.
-    const int64_t* raw_offsets = offsets_.data();
-    // Search [lo, lo + n)
-    int64_t lo = 0, n = num_chunks_;
-    while (n > 1) {
-      int64_t m = n >> 1;
-      int64_t mid = lo + m;
-      if (index >= raw_offsets[mid]) {
-        lo = mid;
-        n -= m;
-      } else {
-        n = m;
-      }
-    }
-    cached_chunk_ = lo;
-    return {lo, index - offsets_[lo]};
-  }
-
-  static std::vector<int64_t> MakeEndOffsets(std::vector<int64_t> lengths) {
-    int64_t offset = 0;
-    for (auto& v : lengths) {
-      const auto this_length = v;
-      v = offset;
-      offset += this_length;
-    }
-    lengths.push_back(offset);
-    return lengths;
-  }
-
-  int64_t num_chunks_;
-  std::vector<int64_t> offsets_;
-
-  mutable int64_t cached_chunk_;
-};
-
-struct ChunkedArrayResolver : protected ChunkResolver {
+struct ChunkedArrayResolver : protected ::arrow::internal::ChunkResolver {
   explicit ChunkedArrayResolver(const std::vector<const Array*>& chunks)
-      : ChunkResolver(MakeLengths(chunks)), chunks_(chunks) {}
+      : ::arrow::internal::ChunkResolver(chunks), chunks_(chunks) {}
 
   template <typename ArrayType>
   ResolvedChunk<ArrayType> Resolve(int64_t index) const {
-    const auto loc = ChunkResolver::Resolve(index);
-    return ResolvedChunk<ArrayType>(
-        checked_cast<const ArrayType*>(chunks_[loc.chunk_index]), 
loc.index_in_chunk);
+    const auto loc = ::arrow::internal::ChunkResolver::Resolve(index);
+    // if (loc.chunk_index >= static_cast<int64_t>(chunks_.size())) {
+    //   return Status::IndexError("tried to refer to chunk ", loc.chunk_index,
+    //                             " but ChunkedArray is only ", 
chunks_.size(), " long");
+    // }
+    return {checked_cast<const ArrayType*>(chunks_[loc.chunk_index]), 
loc.index_in_chunk};

Review comment:
       `Resolve` does not returns a `Result` that we can use for returning an 
error. Also, functions that invoke `Resolve` do not handle errors.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to