(arrow) branch main updated: GH-43953: [C++] Add tests based on random data and benchmarks to ChunkResolver::ResolveMany (#43954)

apitrou Tue, 24 Sep 2024 07:36:38 -0700

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 83f35de2eb GH-43953: [C++] Add tests based on random data and 
benchmarks to ChunkResolver::ResolveMany (#43954)
83f35de2eb is described below

commit 83f35de2ebe4672d0d57290840d0153ac8a016e8
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Tue Sep 24 11:36:24 2024 -0300

    GH-43953: [C++] Add tests based on random data and benchmarks to 
ChunkResolver::ResolveMany (#43954)
    
    ### Rationale for this change
    
    Improve tests and add benchmarks. I wrote the tests and benchmarks while 
trying to improve the performance of `ResolveMany` and failing at it.
    
    ### What changes are included in this PR?
    
    Tests, benchmarks, and changes that don't really affect performance but 
might unlock more optimization opportunities in the future.
    
    ### Are these changes tested?
    
    Yes.
    
    * GitHub Issue: #43953
    
    Lead-authored-by: Felipe Oliveira Carvalho <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/CMakeLists.txt              |   1 +
 cpp/src/arrow/chunk_resolver.cc           | 104 +++++++++++--------
 cpp/src/arrow/chunk_resolver.h            |  57 +++++-----
 cpp/src/arrow/chunk_resolver_benchmark.cc | 166 ++++++++++++++++++++++++++++++
 cpp/src/arrow/chunked_array.cc            |   4 +-
 cpp/src/arrow/chunked_array_test.cc       |  68 ++++++++++++
 6 files changed, 326 insertions(+), 74 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index e77a02d0c0..c911f0f4e9 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -1218,6 +1218,7 @@ add_arrow_test(sparse_tensor_test)
 add_arrow_test(stl_test SOURCES stl_iterator_test.cc stl_test.cc)
 
 add_arrow_benchmark(builder_benchmark)
+add_arrow_benchmark(chunk_resolver_benchmark)
 add_arrow_benchmark(compare_benchmark)
 add_arrow_benchmark(memory_pool_benchmark)
 add_arrow_benchmark(type_benchmark)
diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
index 8541274807..bda6b17810 100644
--- a/cpp/src/arrow/chunk_resolver.cc
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -55,43 +55,57 @@ inline std::vector<int64_t> MakeChunksOffsets(const 
std::vector<T>& chunks) {
   return offsets;
 }
 
+template <typename IndexType>
+inline TypedChunkLocation<IndexType> ResolveOneInline(uint32_t num_offsets,
+                                                      const uint64_t* offsets,
+                                                      IndexType 
typed_logical_index,
+                                                      int32_t num_chunks,
+                                                      int32_t chunk_hint) {
+  const auto index = static_cast<uint64_t>(typed_logical_index);
+  // use or update chunk_hint
+  if (index >= offsets[chunk_hint] &&
+      (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) {
+    // hint is correct!
+  } else {
+    // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
+    auto chunk_index =
+        ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
+    chunk_hint = static_cast<int32_t>(chunk_index);
+  }
+  // chunk_index is in [0, chunks.size()] no matter what the value
+  // of logical_index is, so it's always safe to dereference offsets
+  // as it contains chunks.size()+1 values.
+  auto loc = TypedChunkLocation<IndexType>(
+      /*chunk_index=*/chunk_hint,
+      /*index_in_chunk=*/typed_logical_index -
+          static_cast<IndexType>(offsets[chunk_hint]));
+#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)
+  // Make it more likely that Valgrind/ASAN can catch an invalid memory
+  // access by poisoning the index-in-chunk value when the logical
+  // index is out-of-bounds.
+  if (static_cast<int32_t>(loc.chunk_index) == num_chunks) {
+    loc.index_in_chunk = std::numeric_limits<IndexType>::max();
+  }
+#endif
+  return loc;
+}
+
 /// \pre all the pre-conditions of ChunkResolver::ResolveMany()
 /// \pre num_offsets - 1 <= std::numeric_limits<IndexType>::max()
 template <typename IndexType>
-void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets,
+void ResolveManyInline(uint32_t num_offsets, const int64_t* signed_offsets,
                        int64_t n_indices, const IndexType* logical_index_vec,
                        TypedChunkLocation<IndexType>* out_chunk_location_vec,
-                       IndexType chunk_hint) {
+                       int32_t chunk_hint) {
   auto* offsets = reinterpret_cast<const uint64_t*>(signed_offsets);
-  const auto num_chunks = static_cast<IndexType>(num_offsets - 1);
+  const auto num_chunks = static_cast<int32_t>(num_offsets - 1);
   // chunk_hint in [0, num_offsets) per the precondition.
   for (int64_t i = 0; i < n_indices; i++) {
-    auto typed_logical_index = logical_index_vec[i];
-    const auto index = static_cast<uint64_t>(typed_logical_index);
-    // use or update chunk_hint
-    if (index >= offsets[chunk_hint] &&
-        (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) {
-      // hint is correct!
-    } else {
-      // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
-      auto chunk_index =
-          ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
-      chunk_hint = static_cast<IndexType>(chunk_index);
-    }
-    out_chunk_location_vec[i].chunk_index = chunk_hint;
-    // chunk_index is in [0, chunks.size()] no matter what the
-    // value of logical_index is, so it's always safe to dereference
-    // offset_ as it contains chunks.size()+1 values.
-    out_chunk_location_vec[i].index_in_chunk =
-        typed_logical_index - static_cast<IndexType>(offsets[chunk_hint]);
-#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)
-    // Make it more likely that Valgrind/ASAN can catch an invalid memory
-    // access by poisoning the index-in-chunk value when the logical
-    // index is out-of-bounds.
-    if (chunk_hint == num_chunks) {
-      out_chunk_location_vec[i].index_in_chunk = 
std::numeric_limits<IndexType>::max();
-    }
-#endif
+    const auto typed_logical_index = logical_index_vec[i];
+    const auto loc = ResolveOneInline(num_offsets, offsets, 
typed_logical_index,
+                                      num_chunks, chunk_hint);
+    out_chunk_location_vec[i] = loc;
+    chunk_hint = static_cast<int32_t>(loc.chunk_index);
   }
 }
 
@@ -127,30 +141,30 @@ ChunkResolver& ChunkResolver::operator=(const 
ChunkResolver& other) noexcept {
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* 
logical_index_vec,
                                     TypedChunkLocation<uint8_t>* 
out_chunk_location_vec,
-                                    uint8_t chunk_hint) const {
-  ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, 
logical_index_vec,
-                    out_chunk_location_vec, chunk_hint);
-}
-
-void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* 
logical_index_vec,
-                                    TypedChunkLocation<uint32_t>* 
out_chunk_location_vec,
-                                    uint32_t chunk_hint) const {
-  ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, 
logical_index_vec,
-                    out_chunk_location_vec, chunk_hint);
+                                    int32_t chunk_hint) const {
+  ResolveManyInline(static_cast<uint32_t>(offsets_.size()), offsets_.data(), 
n_indices,
+                    logical_index_vec, out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* 
logical_index_vec,
                                     TypedChunkLocation<uint16_t>* 
out_chunk_location_vec,
-                                    uint16_t chunk_hint) const {
-  ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, 
logical_index_vec,
-                    out_chunk_location_vec, chunk_hint);
+                                    int32_t chunk_hint) const {
+  ResolveManyInline(static_cast<uint32_t>(offsets_.size()), offsets_.data(), 
n_indices,
+                    logical_index_vec, out_chunk_location_vec, chunk_hint);
+}
+
+void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* 
logical_index_vec,
+                                    TypedChunkLocation<uint32_t>* 
out_chunk_location_vec,
+                                    int32_t chunk_hint) const {
+  ResolveManyInline(static_cast<uint32_t>(offsets_.size()), offsets_.data(), 
n_indices,
+                    logical_index_vec, out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* 
logical_index_vec,
                                     TypedChunkLocation<uint64_t>* 
out_chunk_location_vec,
-                                    uint64_t chunk_hint) const {
-  ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, 
logical_index_vec,
-                    out_chunk_location_vec, chunk_hint);
+                                    int32_t chunk_hint) const {
+  ResolveManyInline(static_cast<uint32_t>(offsets_.size()), offsets_.data(), 
n_indices,
+                    logical_index_vec, out_chunk_location_vec, chunk_hint);
 }
 
 }  // namespace arrow::internal
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index 83fda62387..4a5e27c053 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -72,7 +72,7 @@ struct ARROW_EXPORT ChunkResolver {
   /// \brief Cache of the index of the last resolved chunk.
   ///
   /// \invariant `cached_chunk_ in [0, chunks.size()]`
-  mutable std::atomic<int64_t> cached_chunk_;
+  mutable std::atomic<int32_t> cached_chunk_;
 
  public:
   explicit ChunkResolver(const ArrayVector& chunks) noexcept;
@@ -92,6 +92,8 @@ struct ARROW_EXPORT ChunkResolver {
     for (size_t i = 1; i < offsets_.size(); i++) {
       assert(offsets_[i] >= offsets_[i - 1]);
     }
+    assert(offsets_.size() - 1 <=
+           static_cast<size_t>(std::numeric_limits<int32_t>::max()));
 #endif
   }
 
@@ -102,7 +104,7 @@ struct ARROW_EXPORT ChunkResolver {
   ChunkResolver& operator=(const ChunkResolver& other) noexcept;
 
   int64_t logical_array_length() const { return offsets_.back(); }
-  int64_t num_chunks() const { return static_cast<int64_t>(offsets_.size()) - 
1; }
+  int32_t num_chunks() const { return static_cast<int32_t>(offsets_.size() - 
1); }
 
   int64_t chunk_length(int64_t chunk_index) const {
     return offsets_[chunk_index + 1] - offsets_[chunk_index];
@@ -140,9 +142,9 @@ struct ARROW_EXPORT ChunkResolver {
   ///         bounds, or with chunk_index == chunks.size() if logical index is
   ///         `>= chunked_array.length()`.
   inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) 
const {
-    assert(hint.chunk_index < static_cast<int64_t>(offsets_.size()));
-    const auto chunk_index =
-        ResolveChunkIndex</*StoreCachedChunk=*/false>(index, hint.chunk_index);
+    assert(hint.chunk_index < static_cast<uint32_t>(offsets_.size()));
+    const auto chunk_index = ResolveChunkIndex</*StoreCachedChunk=*/false>(
+        index, static_cast<int32_t>(hint.chunk_index));
     return ChunkLocation{chunk_index, index - offsets_[chunk_index]};
   }
 
@@ -169,13 +171,12 @@ struct ARROW_EXPORT ChunkResolver {
   [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* 
logical_index_vec,
                                  TypedChunkLocation<IndexType>* 
out_chunk_location_vec,
                                  IndexType chunk_hint = 0) const {
-    if constexpr (sizeof(IndexType) < sizeof(uint64_t)) {
+    if constexpr (sizeof(IndexType) < sizeof(uint32_t)) {
       // The max value returned by Bisect is `offsets.size() - 1` (= 
chunks.size()).
-      constexpr uint64_t kMaxIndexTypeValue = 
std::numeric_limits<IndexType>::max();
+      constexpr int64_t kMaxIndexTypeValue = 
std::numeric_limits<IndexType>::max();
       // A ChunkedArray with enough empty chunks can make the index of a chunk
       // exceed the logical index and thus the maximum value of IndexType.
-      const bool chunk_index_fits_on_type =
-          static_cast<uint64_t>(offsets_.size() - 1) <= kMaxIndexTypeValue;
+      const bool chunk_index_fits_on_type = num_chunks() <= kMaxIndexTypeValue;
       if (ARROW_PREDICT_FALSE(!chunk_index_fits_on_type)) {
         return false;
       }
@@ -194,17 +195,18 @@ struct ARROW_EXPORT ChunkResolver {
       using U = std::make_unsigned_t<IndexType>;
       ResolveManyImpl(n_indices, reinterpret_cast<const U*>(logical_index_vec),
                       
reinterpret_cast<TypedChunkLocation<U>*>(out_chunk_location_vec),
-                      static_cast<U>(chunk_hint));
+                      static_cast<int32_t>(chunk_hint));
     } else {
       static_assert(std::is_unsigned_v<IndexType>);
-      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_location_vec, 
chunk_hint);
+      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_location_vec,
+                      static_cast<int32_t>(chunk_hint));
     }
     return true;
   }
 
  private:
   template <bool StoreCachedChunk>
-  inline int64_t ResolveChunkIndex(int64_t index, int64_t cached_chunk) const {
+  inline int64_t ResolveChunkIndex(int64_t index, int32_t cached_chunk) const {
     // It is common for algorithms sequentially processing arrays to make 
consecutive
     // accesses at a relatively small distance from each other, hence often 
falling in the
     // same chunk.
@@ -212,16 +214,17 @@ struct ARROW_EXPORT ChunkResolver {
     // This is guaranteed when merging (assuming each side of the merge uses 
its
     // own resolver), and is the most common case in recursive invocations of
     // partitioning.
-    const auto num_offsets = static_cast<int64_t>(offsets_.size());
+    const auto num_offsets = static_cast<uint32_t>(offsets_.size());
     const int64_t* offsets = offsets_.data();
     if (ARROW_PREDICT_TRUE(index >= offsets[cached_chunk]) &&
-        (cached_chunk + 1 == num_offsets || index < offsets[cached_chunk + 
1])) {
+        (static_cast<uint32_t>(cached_chunk + 1) == num_offsets ||
+         index < offsets[cached_chunk + 1])) {
       return cached_chunk;
     }
     // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
     const auto chunk_index = Bisect(index, offsets, /*lo=*/0, 
/*hi=*/num_offsets);
     if constexpr (StoreCachedChunk) {
-      assert(chunk_index < static_cast<int64_t>(offsets_.size()));
+      assert(static_cast<uint32_t>(chunk_index) < 
static_cast<uint32_t>(offsets_.size()));
       cached_chunk_.store(chunk_index, std::memory_order_relaxed);
     }
     return chunk_index;
@@ -230,13 +233,13 @@ struct ARROW_EXPORT ChunkResolver {
   /// \pre all the pre-conditions of ChunkResolver::ResolveMany()
   /// \pre num_offsets - 1 <= std::numeric_limits<IndexType>::max()
   void ResolveManyImpl(int64_t, const uint8_t*, TypedChunkLocation<uint8_t>*,
-                       uint8_t) const;
+                       int32_t) const;
   void ResolveManyImpl(int64_t, const uint16_t*, TypedChunkLocation<uint16_t>*,
-                       uint16_t) const;
+                       int32_t) const;
   void ResolveManyImpl(int64_t, const uint32_t*, TypedChunkLocation<uint32_t>*,
-                       uint32_t) const;
+                       int32_t) const;
   void ResolveManyImpl(int64_t, const uint64_t*, TypedChunkLocation<uint64_t>*,
-                       uint64_t) const;
+                       int32_t) const;
 
  public:
   /// \brief Find the index of the chunk that contains the logical index.
@@ -249,15 +252,15 @@ struct ARROW_EXPORT ChunkResolver {
   /// \pre index >= 0 (otherwise, when index is negative, hi-1 is returned)
   /// \pre lo < hi
   /// \pre lo >= 0 && hi <= offsets_.size()
-  static inline int64_t Bisect(int64_t index, const int64_t* offsets, int64_t 
lo,
-                               int64_t hi) {
+  static inline int32_t Bisect(int64_t index, const int64_t* offsets, int32_t 
lo,
+                               int32_t hi) {
     return Bisect(static_cast<uint64_t>(index),
-                  reinterpret_cast<const uint64_t*>(offsets), 
static_cast<uint64_t>(lo),
-                  static_cast<uint64_t>(hi));
+                  reinterpret_cast<const uint64_t*>(offsets), 
static_cast<uint32_t>(lo),
+                  static_cast<uint32_t>(hi));
   }
 
-  static inline int64_t Bisect(uint64_t index, const uint64_t* offsets, 
uint64_t lo,
-                               uint64_t hi) {
+  static inline int32_t Bisect(uint64_t index, const uint64_t* offsets, 
uint32_t lo,
+                               uint32_t hi) {
     // Similar to std::upper_bound(), but slightly different as our offsets
     // array always starts with 0.
     auto n = hi - lo;
@@ -265,8 +268,8 @@ struct ARROW_EXPORT ChunkResolver {
     // (lo < hi is guaranteed by the precondition).
     assert(n > 1 && "lo < hi is a precondition of Bisect");
     do {
-      const uint64_t m = n >> 1;
-      const uint64_t mid = lo + m;
+      const uint32_t m = n >> 1;
+      const uint32_t mid = lo + m;
       if (index >= offsets[mid]) {
         lo = mid;
         n -= m;
diff --git a/cpp/src/arrow/chunk_resolver_benchmark.cc 
b/cpp/src/arrow/chunk_resolver_benchmark.cc
new file mode 100644
index 0000000000..0756de3fbe
--- /dev/null
+++ b/cpp/src/arrow/chunk_resolver_benchmark.cc
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <random>
+#include <vector>
+
+#include "arrow/chunk_resolver.h"
+#include "arrow/util/benchmark_util.h"
+#include "arrow/util/pcg_random.h"
+
+namespace arrow {
+
+using internal::ChunkResolver;
+using internal::TypedChunkLocation;
+
+namespace {
+
+int64_t constexpr kChunkedArrayLength = std::numeric_limits<uint16_t>::max();
+
+struct ResolveManyBenchmark {
+  benchmark::State& state;
+  random::pcg64 rng;
+  // Values from the state.range(i)
+  int64_t chunked_array_length;
+  int32_t num_chunks;
+  int64_t num_logical_indices;
+
+  explicit ResolveManyBenchmark(benchmark::State& state)
+      : state(state),
+        rng(42),
+        chunked_array_length(state.range(0)),
+        num_chunks(static_cast<int32_t>(state.range(1))),
+        num_logical_indices(state.range(2)) {}
+
+  std::vector<int64_t> GenChunkedArrayOffsets() {
+    std::uniform_int_distribution<int64_t> offset_gen(1, chunked_array_length);
+    std::vector<int64_t> offsets;
+    offsets.reserve(num_chunks + 1);
+    offsets.push_back(0);
+    while (offsets.size() < static_cast<size_t>(num_chunks)) {
+      offsets.push_back(offset_gen(rng));
+    }
+    offsets.push_back(chunked_array_length);
+    std::sort(offsets.begin() + 1, offsets.end());
+    return offsets;
+  }
+
+  template <typename IndexType>
+  std::vector<IndexType> GenRandomIndices(IndexType max_index, bool sorted) {
+    std::uniform_int_distribution<IndexType> index_gen(0, max_index);
+    std::vector<IndexType> indices;
+    indices.reserve(num_logical_indices);
+    while (indices.size() < static_cast<size_t>(num_logical_indices)) {
+      indices.push_back(index_gen(rng));
+    }
+    if (sorted) {
+      std::sort(indices.begin(), indices.end());
+    }
+    return indices;
+  }
+
+  template <typename IndexType>
+  void Bench(bool sorted) {
+    if constexpr (sizeof(IndexType) < 8) {
+      constexpr uint64_t kLimitIndex = std::numeric_limits<IndexType>::max();
+      ARROW_CHECK_LE(static_cast<uint64_t>(chunked_array_length), kLimitIndex);
+    }
+    const auto max_random_index = static_cast<IndexType>(chunked_array_length);
+    auto offsets = GenChunkedArrayOffsets();
+    auto logical_indices = GenRandomIndices<IndexType>(max_random_index, 
sorted);
+    ChunkResolver resolver(std::move(offsets));
+    std::vector<TypedChunkLocation<IndexType>> 
chunk_location_vec(num_logical_indices);
+    BENCHMARK_UNUSED bool all_succeeded = true;
+    for (auto _ : state) {
+      const bool success = resolver.ResolveMany<IndexType>(
+          num_logical_indices, logical_indices.data(), 
chunk_location_vec.data());
+      all_succeeded &= success;
+    }
+    ARROW_CHECK(all_succeeded);
+    state.SetItemsProcessed(state.iterations() * num_logical_indices);
+  }
+};
+
+template <typename IndexType>
+void ResolveManySetArgs(benchmark::internal::Benchmark* bench) {
+  constexpr int32_t kNonAligned = 3;
+  const int64_t kNumIndicesFew = (kChunkedArrayLength >> 7) - kNonAligned;
+  const int64_t kNumIndicesMany = (kChunkedArrayLength >> 1) - kNonAligned;
+
+  bench->ArgNames({"chunked_array_length", "num_chunks", "num_indices"});
+
+  switch (sizeof(IndexType)) {
+    case 1:
+      // Unexpected. See comments below.
+    case 2:
+    case 4:
+    case 8:
+      bench->Args({kChunkedArrayLength, /*num_chunks*/ 10000, kNumIndicesFew});
+      bench->Args({kChunkedArrayLength, /*num_chunks*/ 100, kNumIndicesFew});
+      bench->Args({kChunkedArrayLength, /*num_chunks*/ 10000, 
kNumIndicesMany});
+      bench->Args({kChunkedArrayLength, /*num_chunks*/ 100, kNumIndicesMany});
+      break;
+  }
+}
+
+template <typename IndexType>
+void ResolveManyBench(benchmark::State& state, bool sorted) {
+  ResolveManyBenchmark{state}.Bench<IndexType>(sorted);
+}
+
+void ResolveManyUInt16Random(benchmark::State& state) {
+  ResolveManyBench<uint16_t>(state, false);
+}
+
+void ResolveManyUInt32Random(benchmark::State& state) {
+  ResolveManyBench<uint32_t>(state, false);
+}
+
+void ResolveManyUInt64Random(benchmark::State& state) {
+  ResolveManyBench<uint64_t>(state, false);
+}
+
+void ResolveManyUInt16Sorted(benchmark::State& state) {
+  ResolveManyBench<uint16_t>(state, true);
+}
+
+void ResolveManyUInt32Sorted(benchmark::State& state) {
+  ResolveManyBench<uint32_t>(state, true);
+}
+
+void ResolveManyUInt64Sorted(benchmark::State& state) {
+  ResolveManyBench<uint64_t>(state, true);
+}
+
+}  // namespace
+
+// We don't benchmark with uint8_t because it's too fast -- any meaningful
+// array of logical indices will contain all the possible values.
+
+BENCHMARK(ResolveManyUInt16Random)->Apply(ResolveManySetArgs<uint16_t>);
+BENCHMARK(ResolveManyUInt32Random)->Apply(ResolveManySetArgs<uint32_t>);
+BENCHMARK(ResolveManyUInt64Random)->Apply(ResolveManySetArgs<uint64_t>);
+
+BENCHMARK(ResolveManyUInt16Sorted)->Apply(ResolveManySetArgs<uint16_t>);
+BENCHMARK(ResolveManyUInt32Sorted)->Apply(ResolveManySetArgs<uint32_t>);
+BENCHMARK(ResolveManyUInt64Sorted)->Apply(ResolveManySetArgs<uint64_t>);
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index dd6aa51534..2ea2f514b9 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -51,11 +51,11 @@ ChunkedArray::ChunkedArray(ArrayVector chunks, 
std::shared_ptr<DataType> type)
       null_count_(0),
       chunk_resolver_{chunks_} {
   if (type_ == nullptr) {
-    ARROW_CHECK_GT(chunks_.size(), 0)
+    ARROW_CHECK_GT(chunks_.size(), static_cast<size_t>(0))
         << "cannot construct ChunkedArray from empty vector and omitted type";
     type_ = chunks_[0]->type();
   }
-
+  ARROW_CHECK_LE(chunks.size(), 
static_cast<size_t>(std::numeric_limits<int>::max()));
   for (const auto& chunk : chunks_) {
     length_ += chunk->length();
     null_count_ += chunk->null_count();
diff --git a/cpp/src/arrow/chunked_array_test.cc 
b/cpp/src/arrow/chunked_array_test.cc
index bf9d4af7c7..f98dde689c 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -32,6 +32,7 @@
 #include "arrow/type.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/key_value_metadata.h"
+#include "arrow/util/pcg_random.h"
 
 namespace arrow {
 
@@ -373,10 +374,27 @@ TEST(TestChunkResolver, Resolve) {
   ASSERT_EQ(resolver.Resolve(10).chunk_index, 3);
 }
 
+template <class RNG>
+std::vector<int64_t> GenChunkedArrayOffsets(RNG& rng, int32_t num_chunks,
+                                            int64_t chunked_array_len) {
+  std::uniform_int_distribution<int64_t> offset_gen(1, chunked_array_len - 1);
+  std::vector<int64_t> offsets;
+  offsets.reserve(num_chunks + 1);
+  offsets.push_back(0);
+  while (offsets.size() < static_cast<size_t>(num_chunks)) {
+    offsets.push_back(offset_gen(rng));
+  }
+  offsets.push_back(chunked_array_len);
+  std::sort(offsets.begin() + 1, offsets.end());
+  return offsets;
+}
+
 template <typename T>
 class TestChunkResolverMany : public ::testing::Test {
  public:
   using IndexType = T;
+  static constexpr int32_t kMaxInt32 = std::numeric_limits<int32_t>::max();
+  static constexpr uint64_t kMaxValidIndex = 
std::numeric_limits<IndexType>::max();
 
   Result<std::vector<ChunkLocation>> ResolveMany(
       const ChunkResolver& resolver, const std::vector<IndexType>& 
logical_index_vec) {
@@ -510,6 +528,55 @@ class TestChunkResolverMany : public ::testing::Test {
       ASSERT_OK(ResolveMany(resolver_with_empty, logical_index_vec));
     }
   }
+
+  void TestRandomInput(int32_t num_chunks, int64_t chunked_array_len) {
+    random::pcg64 rng(42);
+
+    // Generate random chunk offsets...
+    auto offsets = GenChunkedArrayOffsets(rng, num_chunks, chunked_array_len);
+    ASSERT_EQ(offsets.size(), static_cast<size_t>(num_chunks) + 1);
+    // ...and ensure there is at least one empty chunk.
+    std::uniform_int_distribution<int32_t> chunk_index_gen(
+        1, static_cast<int32_t>(num_chunks - 1));
+    auto chunk_index = chunk_index_gen(rng);
+    offsets[chunk_index] = offsets[chunk_index - 1];
+
+    // Generate random query array of logical indices...
+    const auto num_logical_indices = 3 * static_cast<int64_t>(num_chunks) / 2;
+    std::vector<IndexType> logical_index_vec;
+    logical_index_vec.reserve(num_logical_indices);
+    std::uniform_int_distribution<uint64_t> logical_index_gen(1, 
kMaxValidIndex);
+    for (int64_t i = 0; i < num_logical_indices; i++) {
+      const auto index = static_cast<IndexType>(logical_index_gen(rng));
+      logical_index_vec.push_back(index);
+    }
+    // ...and sprinkle some extreme logical index values.
+    std::uniform_int_distribution<int64_t> position_gen(0, 
logical_index_vec.size() - 1);
+    for (int i = 0; i < 2; i++) {
+      auto max_valid_index =
+          std::min(kMaxValidIndex, static_cast<uint64_t>(chunked_array_len));
+      // zero and last valid logical index
+      logical_index_vec[position_gen(rng)] = 0;
+      logical_index_vec[position_gen(rng)] = 
static_cast<IndexType>(max_valid_index - 1);
+      // out of  bounds indices
+      logical_index_vec[position_gen(rng)] = 
static_cast<IndexType>(max_valid_index);
+      if (max_valid_index < kMaxValidIndex) {
+        logical_index_vec[position_gen(rng)] =
+            static_cast<IndexType>(max_valid_index + 1);
+      }
+    }
+
+    ChunkResolver resolver(std::move(offsets));
+    CheckResolveMany(resolver, logical_index_vec);
+  }
+
+  void TestRandomInput() {
+    const int64_t num_chunks = static_cast<int64_t>(
+        std::min(kMaxValidIndex - 1, static_cast<uint64_t>(1) << 16));
+    const int64_t avg_chunk_length = 20;
+    const int64_t chunked_array_len = num_chunks * 2 * avg_chunk_length;
+    TestRandomInput(num_chunks, chunked_array_len);
+  }
 };
 
 TYPED_TEST_SUITE(TestChunkResolverMany, IndexTypes);
@@ -517,5 +584,6 @@ TYPED_TEST_SUITE(TestChunkResolverMany, IndexTypes);
 TYPED_TEST(TestChunkResolverMany, Basics) { this->TestBasics(); }
 TYPED_TEST(TestChunkResolverMany, OutOfBounds) { this->TestOutOfBounds(); }
 TYPED_TEST(TestChunkResolverMany, Overflow) { this->TestOverflow(); }
+TYPED_TEST(TestChunkResolverMany, RandomInput) { this->TestRandomInput(); }
 
 }  // namespace arrow

(arrow) branch main updated: GH-43953: [C++] Add tests based on random data and benchmarks to ChunkResolver::ResolveMany (#43954)

Reply via email to