This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 8163d026b3 GH-40431: [C++] Move key_hash/key_map/light_array related 
files to internal for prevent using by users (#40484)
8163d026b3 is described below

commit 8163d026b3c56253d9e33c0129fac5d9ba573c53
Author: ZhangHuiGui <[email protected]>
AuthorDate: Wed Apr 3 00:01:52 2024 +0800

    GH-40431: [C++] Move key_hash/key_map/light_array related files to internal 
for prevent using by users (#40484)
    
    ### Rationale for this change
    
    These files expose implementation details and APIs that are not meant for 
third-party use. This PR explicitly marks them internal, which also avoids 
having them installed.
    
    ### Are these changes tested?
    
    By existing builds and tests.
    
    ### Are there any user-facing changes?
    
    No, except hiding some header files that were not supposed to be included 
externally.
    
    * GitHub Issue: #40431
    
    Lead-authored-by: ZhangHuiGui <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/CMakeLists.txt                               | 10 +++++-----
 cpp/src/arrow/acero/asof_join_node.cc                      |  4 ++--
 cpp/src/arrow/acero/bloom_filter_test.cc                   |  2 +-
 cpp/src/arrow/acero/hash_join_node.cc                      |  2 +-
 cpp/src/arrow/acero/hash_join_node.h                       |  3 ++-
 cpp/src/arrow/acero/schema_util.h                          | 14 +++++++-------
 cpp/src/arrow/acero/swiss_join.cc                          |  2 +-
 cpp/src/arrow/acero/swiss_join_internal.h                  |  4 ++--
 .../arrow/compute/{key_hash.cc => key_hash_internal.cc}    |  4 ++--
 cpp/src/arrow/compute/{key_hash.h => key_hash_internal.h}  |  2 +-
 .../{key_hash_avx2.cc => key_hash_internal_avx2.cc}        |  2 +-
 cpp/src/arrow/compute/key_hash_test.cc                     |  2 +-
 cpp/src/arrow/compute/{key_map.cc => key_map_internal.cc}  |  2 +-
 cpp/src/arrow/compute/{key_map.h => key_map_internal.h}    |  0
 .../compute/{key_map_avx2.cc => key_map_internal_avx2.cc}  |  2 +-
 .../compute/{light_array.cc => light_array_internal.cc}    |  2 +-
 .../compute/{light_array.h => light_array_internal.h}      |  0
 cpp/src/arrow/compute/light_array_test.cc                  |  2 +-
 cpp/src/arrow/compute/row/compare_internal.h               |  2 +-
 cpp/src/arrow/compute/row/encode_internal.h                |  4 ++--
 cpp/src/arrow/compute/row/grouper.cc                       |  4 ++--
 cpp/src/arrow/compute/row/row_internal.h                   |  2 +-
 cpp/src/arrow/compute/util.cc                              |  4 ++--
 cpp/src/arrow/compute/util.h                               |  8 ++++++--
 24 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 4bf1008af4..617bfedabf 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -689,9 +689,9 @@ set(ARROW_COMPUTE_SRCS
     compute/function.cc
     compute/function_internal.cc
     compute/kernel.cc
-    compute/key_hash.cc
-    compute/key_map.cc
-    compute/light_array.cc
+    compute/key_hash_internal.cc
+    compute/key_map_internal.cc
+    compute/light_array_internal.cc
     compute/ordering.cc
     compute/registry.cc
     compute/kernels/codegen_internal.cc
@@ -717,8 +717,8 @@ set(ARROW_COMPUTE_SRCS
     compute/row/row_internal.cc
     compute/util.cc)
 
-append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc)
-append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc)
+append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
+append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS 
compute/key_map_internal_avx2.cc)
 append_runtime_avx2_src(ARROW_COMPUTE_SRCS 
compute/row/compare_internal_avx2.cc)
 append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
 append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
diff --git a/cpp/src/arrow/acero/asof_join_node.cc 
b/cpp/src/arrow/acero/asof_join_node.cc
index cf0d475c1d..48cc83dd3d 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -45,8 +45,8 @@
 #include "arrow/compute/function_internal.h"
 #endif
 #include "arrow/acero/time_series_util.h"
-#include "arrow/compute/key_hash.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_hash_internal.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc 
b/cpp/src/arrow/acero/bloom_filter_test.cc
index bad331cfd9..a2d6e9575a 100644
--- a/cpp/src/arrow/acero/bloom_filter_test.cc
+++ b/cpp/src/arrow/acero/bloom_filter_test.cc
@@ -27,7 +27,7 @@
 #include "arrow/acero/task_util.h"
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/acero/util.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/config.h"
 #include "arrow/util/cpu_info.h"
diff --git a/cpp/src/arrow/acero/hash_join_node.cc 
b/cpp/src/arrow/acero/hash_join_node.cc
index c0179fd160..b49364300d 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -27,7 +27,7 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/schema_util.h"
 #include "arrow/acero/util.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/future.h"
 #include "arrow/util/thread_pool.h"
diff --git a/cpp/src/arrow/acero/hash_join_node.h 
b/cpp/src/arrow/acero/hash_join_node.h
index cca64d5983..ad60019cea 100644
--- a/cpp/src/arrow/acero/hash_join_node.h
+++ b/cpp/src/arrow/acero/hash_join_node.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cassert>
 #include <vector>
 
 #include "arrow/acero/options.h"
@@ -88,7 +89,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
                                             const Expression& filter);
 
   bool PayloadIsEmpty(int side) {
-    ARROW_DCHECK(side == 0 || side == 1);
+    assert(side == 0 || side == 1);
     return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
   }
 
diff --git a/cpp/src/arrow/acero/schema_util.h 
b/cpp/src/arrow/acero/schema_util.h
index 6760022feb..db3076a588 100644
--- a/cpp/src/arrow/acero/schema_util.h
+++ b/cpp/src/arrow/acero/schema_util.h
@@ -17,13 +17,13 @@
 
 #pragma once
 
+#include <cassert>
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
-#include "arrow/compute/light_array.h"  // for KeyColumnMetadata
-#include "arrow/type.h"                 // for DataType, FieldRef, Field and 
Schema
+#include "arrow/type.h"  // for DataType, FieldRef, Field and Schema
 
 namespace arrow {
 
@@ -47,8 +47,8 @@ struct SchemaProjectionMap {
   const int* source_to_base;
   const int* base_to_target;
   inline int get(int i) const {
-    ARROW_DCHECK(i >= 0 && i < num_cols);
-    ARROW_DCHECK(source_to_base[i] != kMissingField);
+    assert(i >= 0 && i < num_cols);
+    assert(source_to_base[i] != kMissingField);
     return base_to_target[source_to_base[i]];
   }
 };
@@ -66,7 +66,7 @@ class SchemaProjectionMaps {
   Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema,
               const std::vector<ProjectionIdEnum>& projection_handles,
               const std::vector<const std::vector<FieldRef>*>& projections) {
-    ARROW_DCHECK(projection_handles.size() == projections.size());
+    assert(projection_handles.size() == projections.size());
     ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema));
     for (size_t i = 0; i < projections.size(); ++i) {
       ARROW_RETURN_NOT_OK(
@@ -174,7 +174,7 @@ class SchemaProjectionMaps {
       }
     }
     // We should never get here
-    ARROW_DCHECK(false);
+    assert(false);
     return -1;
   }
 
@@ -207,7 +207,7 @@ class SchemaProjectionMaps {
             break;
           }
         }
-        ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField);
+        assert(field_id != SchemaProjectionMap::kMissingField);
         mapping[i] = field_id;
         inverse_mapping[field_id] = i;
       }
diff --git a/cpp/src/arrow/acero/swiss_join.cc 
b/cpp/src/arrow/acero/swiss_join.cc
index 68b0e37b01..61c8bfe954 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -25,7 +25,7 @@
 #include "arrow/acero/util.h"
 #include "arrow/array/util.h"  // MakeArrayFromScalar
 #include "arrow/compute/kernels/row_encoder_internal.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
 #include "arrow/compute/row/compare_internal.h"
 #include "arrow/compute/row/encode_internal.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/acero/swiss_join_internal.h 
b/cpp/src/arrow/acero/swiss_join_internal.h
index aa36a61109..dceb74abe4 100644
--- a/cpp/src/arrow/acero/swiss_join_internal.h
+++ b/cpp/src/arrow/acero/swiss_join_internal.h
@@ -23,8 +23,8 @@
 #include "arrow/acero/schema_util.h"
 #include "arrow/acero/task_util.h"
 #include "arrow/compute/kernels/row_encoder_internal.h"
-#include "arrow/compute/key_map.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_map_internal.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/encode_internal.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash.cc 
b/cpp/src/arrow/compute/key_hash_internal.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_hash.cc
rename to cpp/src/arrow/compute/key_hash_internal.cc
index 1902b9ce9a..a0002efb3f 100644
--- a/cpp/src/arrow/compute/key_hash.cc
+++ b/cpp/src/arrow/compute/key_hash_internal.cc
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
 
 #include <memory.h>
 
 #include <algorithm>
 #include <cstdint>
 
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/ubsan.h"
 
diff --git a/cpp/src/arrow/compute/key_hash.h 
b/cpp/src/arrow/compute/key_hash_internal.h
similarity index 99%
rename from cpp/src/arrow/compute/key_hash.h
rename to cpp/src/arrow/compute/key_hash_internal.h
index 1173df5ed1..7d226f5208 100644
--- a/cpp/src/arrow/compute/key_hash.h
+++ b/cpp/src/arrow/compute/key_hash_internal.h
@@ -23,7 +23,7 @@
 
 #include <cstdint>
 
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/util.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash_avx2.cc 
b/cpp/src/arrow/compute/key_hash_internal_avx2.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_hash_avx2.cc
rename to cpp/src/arrow/compute/key_hash_internal_avx2.cc
index aec2800c64..4def87bd7a 100644
--- a/cpp/src/arrow/compute/key_hash_avx2.cc
+++ b/cpp/src/arrow/compute/key_hash_internal_avx2.cc
@@ -17,7 +17,7 @@
 
 #include <immintrin.h>
 
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
 #include "arrow/util/bit_util.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash_test.cc 
b/cpp/src/arrow/compute/key_hash_test.cc
index c998df7169..4e5d869cb7 100644
--- a/cpp/src/arrow/compute/key_hash_test.cc
+++ b/cpp/src/arrow/compute/key_hash_test.cc
@@ -23,7 +23,7 @@
 #include <unordered_set>
 
 #include "arrow/array/builder_binary.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/cpu_info.h"
diff --git a/cpp/src/arrow/compute/key_map.cc 
b/cpp/src/arrow/compute/key_map_internal.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_map.cc
rename to cpp/src/arrow/compute/key_map_internal.cc
index a027ec811c..9e6d60ab50 100644
--- a/cpp/src/arrow/compute/key_map.cc
+++ b/cpp/src/arrow/compute/key_map_internal.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/key_map.h"
+#include "arrow/compute/key_map_internal.h"
 
 #include <algorithm>
 #include <cstdint>
diff --git a/cpp/src/arrow/compute/key_map.h 
b/cpp/src/arrow/compute/key_map_internal.h
similarity index 100%
rename from cpp/src/arrow/compute/key_map.h
rename to cpp/src/arrow/compute/key_map_internal.h
diff --git a/cpp/src/arrow/compute/key_map_avx2.cc 
b/cpp/src/arrow/compute/key_map_internal_avx2.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_map_avx2.cc
rename to cpp/src/arrow/compute/key_map_internal_avx2.cc
index 3526a6cb0f..8c98166f49 100644
--- a/cpp/src/arrow/compute/key_map_avx2.cc
+++ b/cpp/src/arrow/compute/key_map_internal_avx2.cc
@@ -17,7 +17,7 @@
 
 #include <immintrin.h>
 
-#include "arrow/compute/key_map.h"
+#include "arrow/compute/key_map_internal.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/light_array.cc 
b/cpp/src/arrow/compute/light_array_internal.cc
similarity index 99%
rename from cpp/src/arrow/compute/light_array.cc
rename to cpp/src/arrow/compute/light_array_internal.cc
index b225e04b05..4f235925d0 100644
--- a/cpp/src/arrow/compute/light_array.cc
+++ b/cpp/src/arrow/compute/light_array_internal.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
 
 #include <type_traits>
 
diff --git a/cpp/src/arrow/compute/light_array.h 
b/cpp/src/arrow/compute/light_array_internal.h
similarity index 100%
rename from cpp/src/arrow/compute/light_array.h
rename to cpp/src/arrow/compute/light_array_internal.h
diff --git a/cpp/src/arrow/compute/light_array_test.cc 
b/cpp/src/arrow/compute/light_array_test.cc
index ecc5f3ad37..08f36ee606 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
 
 #include <gtest/gtest.h>
 #include <numeric>
diff --git a/cpp/src/arrow/compute/row/compare_internal.h 
b/cpp/src/arrow/compute/row/compare_internal.h
index db953fbe11..b039ca97ff 100644
--- a/cpp/src/arrow/compute/row/compare_internal.h
+++ b/cpp/src/arrow/compute/row/compare_internal.h
@@ -19,7 +19,7 @@
 
 #include <cstdint>
 
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/encode_internal.h"
 #include "arrow/compute/row/row_internal.h"
 #include "arrow/compute/util.h"
diff --git a/cpp/src/arrow/compute/row/encode_internal.h 
b/cpp/src/arrow/compute/row/encode_internal.h
index 6091fb6698..2afc150530 100644
--- a/cpp/src/arrow/compute/row/encode_internal.h
+++ b/cpp/src/arrow/compute/row/encode_internal.h
@@ -22,8 +22,8 @@
 #include <vector>
 
 #include "arrow/array/data.h"
-#include "arrow/compute/key_map.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_map_internal.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/row_internal.h"
 #include "arrow/compute/util.h"
 #include "arrow/memory_pool.h"
diff --git a/cpp/src/arrow/compute/row/grouper.cc 
b/cpp/src/arrow/compute/row/grouper.cc
index 5e23eda16f..756c70967a 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -26,8 +26,8 @@
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernels/row_encoder_internal.h"
-#include "arrow/compute/key_hash.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_hash_internal.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/compare_internal.h"
 #include "arrow/compute/row/grouper_internal.h"
diff --git a/cpp/src/arrow/compute/row/row_internal.h 
b/cpp/src/arrow/compute/row/row_internal.h
index c9194267aa..3220b7ffe6 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -20,7 +20,7 @@
 #include <vector>
 
 #include "arrow/buffer.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc
index 2058ba9f30..b0c863b26a 100644
--- a/cpp/src/arrow/compute/util.cc
+++ b/cpp/src/arrow/compute/util.cc
@@ -32,7 +32,7 @@ using internal::CpuInfo;
 namespace util {
 
 void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
-  int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * 
sizeof(uint64_t);
+  int64_t new_top = top_ + EstimatedAllocationSize(num_bytes);
   // Stack overflow check (see GH-39582).
   // XXX cannot return a regular Status because most consumers do not either.
   ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow";
@@ -48,7 +48,7 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** 
data, int* id) {
 
 void TempVectorStack::release(int id, uint32_t num_bytes) {
   ARROW_DCHECK(num_vectors_ == id + 1);
-  int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
+  int64_t size = EstimatedAllocationSize(num_bytes);
   ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + 
top_)[-1] ==
                kGuard2);
   ARROW_DCHECK(top_ >= size);
diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h
index e2a2e71b85..88dce160ce 100644
--- a/cpp/src/arrow/compute/util.h
+++ b/cpp/src/arrow/compute/util.h
@@ -89,7 +89,7 @@ class ARROW_EXPORT TempVectorStack {
   Status Init(MemoryPool* pool, int64_t size) {
     num_vectors_ = 0;
     top_ = 0;
-    buffer_size_ = PaddedAllocationSize(size) + kPadding + 2 * 
sizeof(uint64_t);
+    buffer_size_ = EstimatedAllocationSize(size);
     ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
     // Ensure later operations don't accidentally read uninitialized memory.
     std::memset(buffer->mutable_data(), 0xFF, size);
@@ -98,7 +98,11 @@ class ARROW_EXPORT TempVectorStack {
   }
 
  private:
-  int64_t PaddedAllocationSize(int64_t num_bytes) {
+  static int64_t EstimatedAllocationSize(int64_t size) {
+    return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
+  }
+
+  static int64_t PaddedAllocationSize(int64_t num_bytes) {
     // Round up allocation size to multiple of 8 bytes
     // to avoid returning temp vectors with unaligned address.
     //

Reply via email to