This is an automated email from the ASF dual-hosted git repository.

brycemecum pushed a commit to branch maint-18.1.0
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit e56e0a48e71b798cfd36078b4c2f1a54b852153a
Author: Anja Kefala <[email protected]>
AuthorDate: Mon Oct 21 15:46:48 2024 -0700

    GH-34535: [C++] Move ChunkResolver to the public API (#44357)
    
    ### Rationale for this change
    
    Adopting https://github.com/apache/arrow/pull/40226.
    
     The creation and return of a shared_ptr does result in some performance 
overhead, that makes a difference for a performance-sensitive application.
    
    If someone could use ChunkResolver to learn the indices, they could then 
instead access the data directly.
    
    ### What changes are included in this PR?
    
    - [X] Updates to documentation (thanks to @ SChakravorti21 )
    - [X] Moving `ChunkResolver` to public API, and updating all references to 
it in the code
    
    ### Are these changes tested?
    
    There seemed to be comprehensive tests already: 
https://github.com/apache/arrow/blob/main/cpp/src/arrow/chunked_array_test.cc#L324
 If an edgecase is missing, I'd be happy to add it.
    
    ### Are there any user-facing changes?
    
    `ChunkResolver` and `TypedChunkLocation` are now in the public API.
    * GitHub Issue: #34535
    
    Lead-authored-by: Anja Kefala <[email protected]>
    Co-authored-by: anjakefala <[email protected]>
    Co-authored-by: Bryce Mecum <[email protected]>
    Co-authored-by: SChakravorti21 <[email protected]>
    Co-authored-by: SChakravorti21<[email protected]>
    Co-authored-by: Jacob Wujciak-Jens <[email protected]>
    Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
 cpp/src/arrow/chunk_resolver.cc                    |  4 +--
 cpp/src/arrow/chunk_resolver.h                     | 40 +++++++++++++---------
 cpp/src/arrow/chunk_resolver_benchmark.cc          |  3 --
 cpp/src/arrow/chunked_array.h                      |  2 +-
 cpp/src/arrow/chunked_array_test.cc                |  4 ---
 cpp/src/arrow/compute/kernels/chunked_internal.h   |  2 +-
 cpp/src/arrow/compute/kernels/vector_sort.cc       |  3 +-
 .../arrow/compute/kernels/vector_sort_internal.h   |  4 +--
 cpp/src/arrow/stl_iterator.h                       |  2 +-
 docs/source/cpp/api/array.rst                      | 14 ++++++++
 r/src/altrep.cpp                                   | 14 +++++---
 11 files changed, 55 insertions(+), 37 deletions(-)

diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
index bda6b17810..ca74ffa06c 100644
--- a/cpp/src/arrow/chunk_resolver.cc
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -26,7 +26,7 @@
 #include "arrow/array.h"
 #include "arrow/record_batch.h"
 
-namespace arrow::internal {
+namespace arrow {
 
 namespace {
 template <typename T>
@@ -167,4 +167,4 @@ void ChunkResolver::ResolveManyImpl(int64_t n_indices, 
const uint64_t* logical_i
                     logical_index_vec, out_chunk_location_vec, chunk_hint);
 }
 
-}  // namespace arrow::internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index 4a5e27c053..ab0e753d00 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -27,12 +27,12 @@
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 
-namespace arrow::internal {
+namespace arrow {
 
-struct ChunkResolver;
+class ChunkResolver;
 
 template <typename IndexType>
-struct TypedChunkLocation {
+struct ARROW_EXPORT TypedChunkLocation {
   /// \brief Index of the chunk in the array of chunks
   ///
   /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` 
is used
@@ -41,7 +41,7 @@ struct TypedChunkLocation {
 
   /// \brief Index of the value in the chunk
   ///
-  /// The value is UNDEFINED if chunk_index >= chunks.size()
+  /// The value is UNDEFINED if `chunk_index >= chunks.size()`
   IndexType index_in_chunk = 0;
 
   TypedChunkLocation() = default;
@@ -61,7 +61,7 @@ using ChunkLocation = TypedChunkLocation<int64_t>;
 
 /// \brief An utility that incrementally resolves logical indices into
 /// physical indices in a chunked array.
-struct ARROW_EXPORT ChunkResolver {
+class ARROW_EXPORT ChunkResolver {
  private:
   /// \brief Array containing `chunks.size() + 1` offsets.
   ///
@@ -76,14 +76,11 @@ struct ARROW_EXPORT ChunkResolver {
 
  public:
   explicit ChunkResolver(const ArrayVector& chunks) noexcept;
+
   explicit ChunkResolver(const std::vector<const Array*>& chunks) noexcept;
+
   explicit ChunkResolver(const RecordBatchVector& batches) noexcept;
 
-  /// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 
offsets.
-  ///
-  /// The first offset must be 0 and the last offset must be the logical 
length of the
-  /// chunked array. Each offset before the last represents the starting 
logical index of
-  /// the corresponding chunk.
   explicit ChunkResolver(std::vector<int64_t> offsets) noexcept
       : offsets_(std::move(offsets)), cached_chunk_(0) {
 #ifndef NDEBUG
@@ -115,11 +112,11 @@ struct ARROW_EXPORT ChunkResolver {
   /// The returned ChunkLocation contains the chunk index and the within-chunk 
index
   /// equivalent to the logical index.
   ///
-  /// \pre index >= 0
-  /// \post location.chunk_index in [0, chunks.size()]
+  /// \pre `index >= 0`
+  /// \post `location.chunk_index` in `[0, chunks.size()]`
   /// \param index The logical index to resolve
   /// \return ChunkLocation with a valid chunk_index if index is within
-  ///         bounds, or with chunk_index == chunks.size() if logical index is
+  ///         bounds, or with `chunk_index == chunks.size()` if logical index 
is
   ///         `>= chunked_array.length()`.
   inline ChunkLocation Resolve(int64_t index) const {
     const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed);
@@ -133,13 +130,13 @@ struct ARROW_EXPORT ChunkResolver {
   /// The returned ChunkLocation contains the chunk index and the within-chunk 
index
   /// equivalent to the logical index.
   ///
-  /// \pre index >= 0
-  /// \post location.chunk_index in [0, chunks.size()]
+  /// \pre `index >= 0`
+  /// \post `location.chunk_index` in `[0, chunks.size()]`
   /// \param index The logical index to resolve
   /// \param hint ChunkLocation{} or the last ChunkLocation returned by
   ///             this ChunkResolver.
   /// \return ChunkLocation with a valid chunk_index if index is within
-  ///         bounds, or with chunk_index == chunks.size() if logical index is
+  ///         bounds, or with `chunk_index == chunks.size()` if logical index 
is
   ///         `>= chunked_array.length()`.
   inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) 
const {
     assert(hint.chunk_index < static_cast<uint32_t>(offsets_.size()));
@@ -281,4 +278,13 @@ struct ARROW_EXPORT ChunkResolver {
   }
 };
 
-}  // namespace arrow::internal
+// Explicitly instantiate template base struct, for DLL linking on Windows
+template struct TypedChunkLocation<int32_t>;
+template struct TypedChunkLocation<int16_t>;
+template struct TypedChunkLocation<int8_t>;
+template struct TypedChunkLocation<uint8_t>;
+template struct TypedChunkLocation<uint16_t>;
+template struct TypedChunkLocation<uint32_t>;
+template struct TypedChunkLocation<int64_t>;
+template struct TypedChunkLocation<uint64_t>;
+}  // namespace arrow
diff --git a/cpp/src/arrow/chunk_resolver_benchmark.cc 
b/cpp/src/arrow/chunk_resolver_benchmark.cc
index 0756de3fbe..a6f539a444 100644
--- a/cpp/src/arrow/chunk_resolver_benchmark.cc
+++ b/cpp/src/arrow/chunk_resolver_benchmark.cc
@@ -28,9 +28,6 @@
 
 namespace arrow {
 
-using internal::ChunkResolver;
-using internal::TypedChunkLocation;
-
 namespace {
 
 int64_t constexpr kChunkedArrayLength = std::numeric_limits<uint16_t>::max();
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index c65b6cb6e2..02bcd0f902 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -199,7 +199,7 @@ class ARROW_EXPORT ChunkedArray {
  private:
   template <typename T, typename V>
   friend class ::arrow::stl::ChunkedArrayIterator;
-  internal::ChunkResolver chunk_resolver_;
+  ChunkResolver chunk_resolver_;
   ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
 };
 
diff --git a/cpp/src/arrow/chunked_array_test.cc 
b/cpp/src/arrow/chunked_array_test.cc
index f98dde689c..b3944fd1b1 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -36,10 +36,6 @@
 
 namespace arrow {
 
-using internal::ChunkLocation;
-using internal::ChunkResolver;
-using internal::TypedChunkLocation;
-
 class TestChunkedArray : public ::testing::Test {
  protected:
   virtual void Construct() {
diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h 
b/cpp/src/arrow/compute/kernels/chunked_internal.h
index 2b72e0ab31..f7cb615f3e 100644
--- a/cpp/src/arrow/compute/kernels/chunked_internal.h
+++ b/cpp/src/arrow/compute/kernels/chunked_internal.h
@@ -52,7 +52,7 @@ struct ResolvedChunk {
 
 class ChunkedArrayResolver {
  private:
-  ::arrow::internal::ChunkResolver resolver_;
+  ChunkResolver resolver_;
   std::vector<const Array*> chunks_;
 
  public:
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc 
b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 8766ca3baa..395ed86a06 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -24,7 +24,6 @@
 namespace arrow {
 
 using internal::checked_cast;
-using internal::ChunkLocation;
 
 namespace compute {
 namespace internal {
@@ -852,7 +851,7 @@ class TableSorter {
   const RecordBatchVector batches_;
   const SortOptions& options_;
   const NullPlacement null_placement_;
-  const ::arrow::internal::ChunkResolver left_resolver_, right_resolver_;
+  const ::arrow::ChunkResolver left_resolver_, right_resolver_;
   const std::vector<ResolvedSortKey> sort_keys_;
   uint64_t* indices_begin_;
   uint64_t* indices_end_;
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h 
b/cpp/src/arrow/compute/kernels/vector_sort_internal.h
index 564afb8c08..bee7f838a0 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h
+++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h
@@ -749,9 +749,9 @@ struct ResolvedTableSortKey {
         order(order),
         null_count(null_count) {}
 
-  using LocationType = ::arrow::internal::ChunkLocation;
+  using LocationType = ::arrow::ChunkLocation;
 
-  ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const {
+  ResolvedChunk GetChunk(::arrow::ChunkLocation loc) const {
     return {chunks[loc.chunk_index], loc.index_in_chunk};
   }
 
diff --git a/cpp/src/arrow/stl_iterator.h b/cpp/src/arrow/stl_iterator.h
index 5f2acfb071..577066cba0 100644
--- a/cpp/src/arrow/stl_iterator.h
+++ b/cpp/src/arrow/stl_iterator.h
@@ -237,7 +237,7 @@ class ChunkedArrayIterator {
   }
 
  private:
-  arrow::internal::ChunkLocation GetChunkLocation(int64_t index) const {
+  arrow::ChunkLocation GetChunkLocation(int64_t index) const {
     assert(chunked_array_);
     return chunked_array_->chunk_resolver_.Resolve(index);
   }
diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst
index a7e5d0cf07..133a432884 100644
--- a/docs/source/cpp/api/array.rst
+++ b/docs/source/cpp/api/array.rst
@@ -19,6 +19,9 @@
 Arrays
 ======
 
+Base classes
+============
+
 .. doxygenclass:: arrow::ArrayData
    :project: arrow_cpp
    :members:
@@ -85,6 +88,17 @@ Chunked Arrays
    :project: arrow_cpp
    :members:
 
+.. doxygenstruct:: arrow::ChunkLocation
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::TypedChunkLocation
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::ChunkResolver
+   :project: arrow_cpp
+   :members:
 
 Utilities
 =========
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index bdaac0a9ce..90a459e19c 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -80,6 +80,14 @@ void DeletePointer(std::shared_ptr<T>* ptr) {
 template <typename T>
 using Pointer = cpp11::external_pointer<std::shared_ptr<T>, DeletePointer<T>>;
 
+#if ARROW_VERSION_MAJOR >= 18
+using ChunkResolver = arrow::ChunkResolver;
+using ChunkLocation = arrow::ChunkLocation;
+#else
+using ChunkResolver = arrow::internal::ChunkResolver;
+using ChunkLocation = arrow::internal::ChunkLocation;
+#endif
+
 class ArrowAltrepData {
  public:
   explicit ArrowAltrepData(const std::shared_ptr<ChunkedArray>& chunked_array)
@@ -87,13 +95,11 @@ class ArrowAltrepData {
 
   const std::shared_ptr<ChunkedArray>& chunked_array() { return 
chunked_array_; }
 
-  arrow::internal::ChunkLocation locate(int64_t index) {
-    return resolver_.Resolve(index);
-  }
+  ChunkLocation locate(int64_t index) { return resolver_.Resolve(index); }
 
  private:
   std::shared_ptr<ChunkedArray> chunked_array_;
-  arrow::internal::ChunkResolver resolver_;
+  ChunkResolver resolver_;
 };
 
 // the ChunkedArray that is being wrapped by the altrep object

Reply via email to