pitrou commented on code in PR #41561:
URL: https://github.com/apache/arrow/pull/41561#discussion_r1599931890


##########
cpp/src/arrow/chunk_resolver.h:
##########
@@ -97,12 +157,47 @@ struct ARROW_EXPORT ChunkResolver {
   /// \return ChunkLocation with a valid chunk_index if index is within
   ///         bounds, or with chunk_index == chunks.size() if logical index is
   ///         `>= chunked_array.length()`.
-  inline ChunkLocation ResolveWithChunkIndexHint(int64_t index,
-                                                 ChunkLocation hint) const {
+  inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) 
const {
     assert(hint.chunk_index < static_cast<int64_t>(offsets_.size()));
     const auto chunk_index =
         ResolveChunkIndex</*StoreCachedChunk=*/false>(index, hint.chunk_index);
-    return {chunk_index, index - offsets_[chunk_index]};
+    return ChunkLocation{*this, chunk_index, index - offsets_[chunk_index]};
+  }
+
+  /// \brief Resolve `n` logical indices to chunk indices.
+  ///
+  /// \pre logical_index_vec[i] < n (for valid chunk index results)
+  /// \pre out_chunk_index_vec has space for `n` elements
+  /// \post chunk_hint in [0, chunks.size()]
+  /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n)
+  /// \post if logical_index_vec[i] >= chunked_array.length(), then
+  ///       out_chunk_index_vec[i] == chunks.size()
+  ///       and out_index_in_chunk_vec[i] is undefined (can be out-of-bounds)
+  ///
+  /// \param n The number of logical indices to resolve
+  /// \param logical_index_vec The logical indices to resolve
+  /// \param out_chunk_index_vec The output array where the chunk indices will 
be written
+  /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany
+  /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the
+  ///                               within-chunk indices will be written
+  /// \return false iff chunks.size() > std::numeric_limits<IndexType>::max()
+  template <typename IndexType>
+  [[nodiscard]] std::enable_if_t<std::is_unsigned_v<IndexType>, bool> 
ResolveMany(
+      int64_t n, const IndexType* ARROW_RESTRICT logical_index_vec,
+      IndexType* ARROW_RESTRICT out_chunk_index_vec, IndexType chunk_hint = 0,
+      IndexType* ARROW_RESTRICT out_index_in_chunk_vec = NULLPTR) const {
+    if constexpr (sizeof(IndexType) < sizeof(uint64_t)) {
+      // The max value returned by Bisect is `offsets.size() - 1` (= 
chunks.size()).
+      constexpr auto kMaxIndexTypeValue = 
std::numeric_limits<IndexType>::max();
+      const bool chunk_index_fits_on_type =
+          static_cast<uint64_t>(offsets_.size() - 1) <= kMaxIndexTypeValue;
+      if (ARROW_PREDICT_FALSE(!chunk_index_fits_on_type)) {
+        return false;
+      }

Review Comment:
   Ah, you're right! Yes, please add a comment :-)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to