felipecrv commented on code in PR #41561:
URL: https://github.com/apache/arrow/pull/41561#discussion_r1601827814


##########
cpp/src/arrow/chunk_resolver.h:
##########
@@ -97,12 +133,67 @@ struct ARROW_EXPORT ChunkResolver {
   /// \return ChunkLocation with a valid chunk_index if index is within
   ///         bounds, or with chunk_index == chunks.size() if logical index is
   ///         `>= chunked_array.length()`.
-  inline ChunkLocation ResolveWithChunkIndexHint(int64_t index,
-                                                 ChunkLocation hint) const {
+  inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) 
const {
     assert(hint.chunk_index < static_cast<int64_t>(offsets_.size()));
     const auto chunk_index =
         ResolveChunkIndex</*StoreCachedChunk=*/false>(index, hint.chunk_index);
-    return {chunk_index, index - offsets_[chunk_index]};
+    return ChunkLocation{chunk_index, index - offsets_[chunk_index]};
+  }
+
+  /// \brief Resolve `n` logical indices to chunk indices.
+  ///
+  /// \pre 0 <= logical_index_vec[i] < n (for well-defined and valid chunk 
index results)
+  /// \pre out_chunk_index_vec has space for `n_indices`
+  /// \post chunk_hint in [0, chunks.size()]
+  /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n)
+  /// \post if logical_index_vec[i] >= chunked_array.length(), then
+  ///       out_chunk_index_vec[i] == chunks.size()
+  ///       and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds)
+  /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and
+  ///       out_index_in_chunk_vec[i] are UNDEFINED
+  ///
+  /// \param n_indices The number of logical indices to resolve
+  /// \param logical_index_vec The logical indices to resolve
+  /// \param out_chunk_index_vec The output array where the chunk indices will 
be written
+  /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany
+  /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the
+  ///                               within-chunk indices will be written
+  /// \return false iff chunks.size() > std::numeric_limits<IndexType>::max()
+  template <typename IndexType>
+  [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* 
logical_index_vec,
+                                 IndexType* out_chunk_index_vec, IndexType 
chunk_hint = 0,
+                                 IndexType* out_index_in_chunk_vec = NULLPTR) 
const {
+    if constexpr (sizeof(IndexType) < sizeof(uint64_t)) {
+      // The max value returned by Bisect is `offsets.size() - 1` (= 
chunks.size()).
+      constexpr uint64_t kMaxIndexTypeValue = 
std::numeric_limits<IndexType>::max();
+      // A ChunkedArray with enough empty chunks can make the index of a chunk
+      // exceed the logical index and thus the maximum value of IndexType.
+      const bool chunk_index_fits_on_type =
+          static_cast<uint64_t>(offsets_.size() - 1) <= kMaxIndexTypeValue;
+      if (ARROW_PREDICT_FALSE(!chunk_index_fits_on_type)) {
+        return false;
+      }
+      // Since an index-in-chunk cannot possibly exceed the logical index being
+      // queried, we don't have to worry about these values not fitting on 
IndexType.
+    }
+    if constexpr (std::is_signed_v<IndexType>) {
+      // We interpret signed integers as unsigned and avoid having to generate 
double
+      // the amount of binary code to handle each integer width.
+      //
+      // Negative logical indices can become large values when cast to 
unsigned, but
+      // they are gracefully handled by ResolveManyImpl. Although both the 
chunk index
+      // and the index in chunk values will be undefined in these cases.

Review Comment:
   You're right. The overflow check guarantees it's impossible for negative 
logical indices to become valid indices.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to