pitrou commented on code in PR #41561:
URL: https://github.com/apache/arrow/pull/41561#discussion_r1599930771
##########
cpp/src/arrow/chunk_resolver.h:
##########
@@ -97,12 +157,47 @@ struct ARROW_EXPORT ChunkResolver {
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// `>= chunked_array.length()`.
- inline ChunkLocation ResolveWithChunkIndexHint(int64_t index,
- ChunkLocation hint) const {
+ inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint)
const {
assert(hint.chunk_index < static_cast<int64_t>(offsets_.size()));
const auto chunk_index =
ResolveChunkIndex</*StoreCachedChunk=*/false>(index, hint.chunk_index);
- return {chunk_index, index - offsets_[chunk_index]};
+ return ChunkLocation{*this, chunk_index, index - offsets_[chunk_index]};
+ }
+
+ /// \brief Resolve `n` logical indices to chunk indices.
+ ///
+ /// \pre logical_index_vec[i] < n (for valid chunk index results)
+ /// \pre out_chunk_index_vec has space for `n` elements
+ /// \post chunk_hint in [0, chunks.size()]
+ /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n)
+ /// \post if logical_index_vec[i] >= chunked_array.length(), then
+ /// out_chunk_index_vec[i] == chunks.size()
+ /// and out_index_in_chunk_vec[i] is undefined (can be out-of-bounds)
+ ///
+ /// \param n The number of logical indices to resolve
+ /// \param logical_index_vec The logical indices to resolve
+ /// \param out_chunk_index_vec The output array where the chunk indices will
be written
+ /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany
+ /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the
+ /// within-chunk indices will be written
+ /// \return false iff chunks.size() > std::numeric_limits<IndexType>::max()
+ template <typename IndexType>
+ [[nodiscard]] std::enable_if_t<std::is_unsigned_v<IndexType>, bool>
ResolveMany(
Review Comment:
I'm just questioning the use of switching to unsigned. We use signed indices
comprehensively in Arrow, and switching to unsigned introduces (gratuitous?)
complication and inconsistency.
This is especially true if we intend to make this a public API.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]