This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 8163d026b3 GH-40431: [C++] Move key_hash/key_map/light_array related
files to internal for prevent using by users (#40484)
8163d026b3 is described below
commit 8163d026b3c56253d9e33c0129fac5d9ba573c53
Author: ZhangHuiGui <[email protected]>
AuthorDate: Wed Apr 3 00:01:52 2024 +0800
GH-40431: [C++] Move key_hash/key_map/light_array related files to internal
for prevent using by users (#40484)
### Rationale for this change
These files expose implementation details and APIs that are not meant for
third-party use. This PR explicitly marks them internal, which also avoids
having them installed.
### Are these changes tested?
By existing builds and tests.
### Are there any user-facing changes?
No, except hiding some header files that were not supposed to be included
externally.
* GitHub Issue: #40431
Lead-authored-by: ZhangHuiGui <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/CMakeLists.txt | 10 +++++-----
cpp/src/arrow/acero/asof_join_node.cc | 4 ++--
cpp/src/arrow/acero/bloom_filter_test.cc | 2 +-
cpp/src/arrow/acero/hash_join_node.cc | 2 +-
cpp/src/arrow/acero/hash_join_node.h | 3 ++-
cpp/src/arrow/acero/schema_util.h | 14 +++++++-------
cpp/src/arrow/acero/swiss_join.cc | 2 +-
cpp/src/arrow/acero/swiss_join_internal.h | 4 ++--
.../arrow/compute/{key_hash.cc => key_hash_internal.cc} | 4 ++--
cpp/src/arrow/compute/{key_hash.h => key_hash_internal.h} | 2 +-
.../{key_hash_avx2.cc => key_hash_internal_avx2.cc} | 2 +-
cpp/src/arrow/compute/key_hash_test.cc | 2 +-
cpp/src/arrow/compute/{key_map.cc => key_map_internal.cc} | 2 +-
cpp/src/arrow/compute/{key_map.h => key_map_internal.h} | 0
.../compute/{key_map_avx2.cc => key_map_internal_avx2.cc} | 2 +-
.../compute/{light_array.cc => light_array_internal.cc} | 2 +-
.../compute/{light_array.h => light_array_internal.h} | 0
cpp/src/arrow/compute/light_array_test.cc | 2 +-
cpp/src/arrow/compute/row/compare_internal.h | 2 +-
cpp/src/arrow/compute/row/encode_internal.h | 4 ++--
cpp/src/arrow/compute/row/grouper.cc | 4 ++--
cpp/src/arrow/compute/row/row_internal.h | 2 +-
cpp/src/arrow/compute/util.cc | 4 ++--
cpp/src/arrow/compute/util.h | 8 ++++++--
24 files changed, 44 insertions(+), 39 deletions(-)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 4bf1008af4..617bfedabf 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -689,9 +689,9 @@ set(ARROW_COMPUTE_SRCS
compute/function.cc
compute/function_internal.cc
compute/kernel.cc
- compute/key_hash.cc
- compute/key_map.cc
- compute/light_array.cc
+ compute/key_hash_internal.cc
+ compute/key_map_internal.cc
+ compute/light_array_internal.cc
compute/ordering.cc
compute/registry.cc
compute/kernels/codegen_internal.cc
@@ -717,8 +717,8 @@ set(ARROW_COMPUTE_SRCS
compute/row/row_internal.cc
compute/util.cc)
-append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc)
-append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc)
+append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
+append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS
compute/key_map_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS
compute/row/compare_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
diff --git a/cpp/src/arrow/acero/asof_join_node.cc
b/cpp/src/arrow/acero/asof_join_node.cc
index cf0d475c1d..48cc83dd3d 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -45,8 +45,8 @@
#include "arrow/compute/function_internal.h"
#endif
#include "arrow/acero/time_series_util.h"
-#include "arrow/compute/key_hash.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_hash_internal.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/status.h"
diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc
b/cpp/src/arrow/acero/bloom_filter_test.cc
index bad331cfd9..a2d6e9575a 100644
--- a/cpp/src/arrow/acero/bloom_filter_test.cc
+++ b/cpp/src/arrow/acero/bloom_filter_test.cc
@@ -27,7 +27,7 @@
#include "arrow/acero/task_util.h"
#include "arrow/acero/test_util_internal.h"
#include "arrow/acero/util.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/config.h"
#include "arrow/util/cpu_info.h"
diff --git a/cpp/src/arrow/acero/hash_join_node.cc
b/cpp/src/arrow/acero/hash_join_node.cc
index c0179fd160..b49364300d 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -27,7 +27,7 @@
#include "arrow/acero/options.h"
#include "arrow/acero/schema_util.h"
#include "arrow/acero/util.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/future.h"
#include "arrow/util/thread_pool.h"
diff --git a/cpp/src/arrow/acero/hash_join_node.h
b/cpp/src/arrow/acero/hash_join_node.h
index cca64d5983..ad60019cea 100644
--- a/cpp/src/arrow/acero/hash_join_node.h
+++ b/cpp/src/arrow/acero/hash_join_node.h
@@ -17,6 +17,7 @@
#pragma once
+#include <cassert>
#include <vector>
#include "arrow/acero/options.h"
@@ -88,7 +89,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
const Expression& filter);
bool PayloadIsEmpty(int side) {
- ARROW_DCHECK(side == 0 || side == 1);
+ assert(side == 0 || side == 1);
return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
}
diff --git a/cpp/src/arrow/acero/schema_util.h
b/cpp/src/arrow/acero/schema_util.h
index 6760022feb..db3076a588 100644
--- a/cpp/src/arrow/acero/schema_util.h
+++ b/cpp/src/arrow/acero/schema_util.h
@@ -17,13 +17,13 @@
#pragma once
+#include <cassert>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
-#include "arrow/compute/light_array.h" // for KeyColumnMetadata
-#include "arrow/type.h" // for DataType, FieldRef, Field and
Schema
+#include "arrow/type.h" // for DataType, FieldRef, Field and Schema
namespace arrow {
@@ -47,8 +47,8 @@ struct SchemaProjectionMap {
const int* source_to_base;
const int* base_to_target;
inline int get(int i) const {
- ARROW_DCHECK(i >= 0 && i < num_cols);
- ARROW_DCHECK(source_to_base[i] != kMissingField);
+ assert(i >= 0 && i < num_cols);
+ assert(source_to_base[i] != kMissingField);
return base_to_target[source_to_base[i]];
}
};
@@ -66,7 +66,7 @@ class SchemaProjectionMaps {
Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema,
const std::vector<ProjectionIdEnum>& projection_handles,
const std::vector<const std::vector<FieldRef>*>& projections) {
- ARROW_DCHECK(projection_handles.size() == projections.size());
+ assert(projection_handles.size() == projections.size());
ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema));
for (size_t i = 0; i < projections.size(); ++i) {
ARROW_RETURN_NOT_OK(
@@ -174,7 +174,7 @@ class SchemaProjectionMaps {
}
}
// We should never get here
- ARROW_DCHECK(false);
+ assert(false);
return -1;
}
@@ -207,7 +207,7 @@ class SchemaProjectionMaps {
break;
}
}
- ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField);
+ assert(field_id != SchemaProjectionMap::kMissingField);
mapping[i] = field_id;
inverse_mapping[field_id] = i;
}
diff --git a/cpp/src/arrow/acero/swiss_join.cc
b/cpp/src/arrow/acero/swiss_join.cc
index 68b0e37b01..61c8bfe954 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -25,7 +25,7 @@
#include "arrow/acero/util.h"
#include "arrow/array/util.h" // MakeArrayFromScalar
#include "arrow/compute/kernels/row_encoder_internal.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/acero/swiss_join_internal.h
b/cpp/src/arrow/acero/swiss_join_internal.h
index aa36a61109..dceb74abe4 100644
--- a/cpp/src/arrow/acero/swiss_join_internal.h
+++ b/cpp/src/arrow/acero/swiss_join_internal.h
@@ -23,8 +23,8 @@
#include "arrow/acero/schema_util.h"
#include "arrow/acero/task_util.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
-#include "arrow/compute/key_map.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_map_internal.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/encode_internal.h"
namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash.cc
b/cpp/src/arrow/compute/key_hash_internal.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_hash.cc
rename to cpp/src/arrow/compute/key_hash_internal.cc
index 1902b9ce9a..a0002efb3f 100644
--- a/cpp/src/arrow/compute/key_hash.cc
+++ b/cpp/src/arrow/compute/key_hash_internal.cc
@@ -15,14 +15,14 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
#include <memory.h>
#include <algorithm>
#include <cstdint>
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/ubsan.h"
diff --git a/cpp/src/arrow/compute/key_hash.h
b/cpp/src/arrow/compute/key_hash_internal.h
similarity index 99%
rename from cpp/src/arrow/compute/key_hash.h
rename to cpp/src/arrow/compute/key_hash_internal.h
index 1173df5ed1..7d226f5208 100644
--- a/cpp/src/arrow/compute/key_hash.h
+++ b/cpp/src/arrow/compute/key_hash_internal.h
@@ -23,7 +23,7 @@
#include <cstdint>
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/util.h"
namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash_avx2.cc
b/cpp/src/arrow/compute/key_hash_internal_avx2.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_hash_avx2.cc
rename to cpp/src/arrow/compute/key_hash_internal_avx2.cc
index aec2800c64..4def87bd7a 100644
--- a/cpp/src/arrow/compute/key_hash_avx2.cc
+++ b/cpp/src/arrow/compute/key_hash_internal_avx2.cc
@@ -17,7 +17,7 @@
#include <immintrin.h>
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/bit_util.h"
namespace arrow {
diff --git a/cpp/src/arrow/compute/key_hash_test.cc
b/cpp/src/arrow/compute/key_hash_test.cc
index c998df7169..4e5d869cb7 100644
--- a/cpp/src/arrow/compute/key_hash_test.cc
+++ b/cpp/src/arrow/compute/key_hash_test.cc
@@ -23,7 +23,7 @@
#include <unordered_set>
#include "arrow/array/builder_binary.h"
-#include "arrow/compute/key_hash.h"
+#include "arrow/compute/key_hash_internal.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/util/cpu_info.h"
diff --git a/cpp/src/arrow/compute/key_map.cc
b/cpp/src/arrow/compute/key_map_internal.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_map.cc
rename to cpp/src/arrow/compute/key_map_internal.cc
index a027ec811c..9e6d60ab50 100644
--- a/cpp/src/arrow/compute/key_map.cc
+++ b/cpp/src/arrow/compute/key_map_internal.cc
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/compute/key_map.h"
+#include "arrow/compute/key_map_internal.h"
#include <algorithm>
#include <cstdint>
diff --git a/cpp/src/arrow/compute/key_map.h
b/cpp/src/arrow/compute/key_map_internal.h
similarity index 100%
rename from cpp/src/arrow/compute/key_map.h
rename to cpp/src/arrow/compute/key_map_internal.h
diff --git a/cpp/src/arrow/compute/key_map_avx2.cc
b/cpp/src/arrow/compute/key_map_internal_avx2.cc
similarity index 99%
rename from cpp/src/arrow/compute/key_map_avx2.cc
rename to cpp/src/arrow/compute/key_map_internal_avx2.cc
index 3526a6cb0f..8c98166f49 100644
--- a/cpp/src/arrow/compute/key_map_avx2.cc
+++ b/cpp/src/arrow/compute/key_map_internal_avx2.cc
@@ -17,7 +17,7 @@
#include <immintrin.h>
-#include "arrow/compute/key_map.h"
+#include "arrow/compute/key_map_internal.h"
#include "arrow/util/logging.h"
namespace arrow {
diff --git a/cpp/src/arrow/compute/light_array.cc
b/cpp/src/arrow/compute/light_array_internal.cc
similarity index 99%
rename from cpp/src/arrow/compute/light_array.cc
rename to cpp/src/arrow/compute/light_array_internal.cc
index b225e04b05..4f235925d0 100644
--- a/cpp/src/arrow/compute/light_array.cc
+++ b/cpp/src/arrow/compute/light_array_internal.cc
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
#include <type_traits>
diff --git a/cpp/src/arrow/compute/light_array.h
b/cpp/src/arrow/compute/light_array_internal.h
similarity index 100%
rename from cpp/src/arrow/compute/light_array.h
rename to cpp/src/arrow/compute/light_array_internal.h
diff --git a/cpp/src/arrow/compute/light_array_test.cc
b/cpp/src/arrow/compute/light_array_test.cc
index ecc5f3ad37..08f36ee606 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
#include <gtest/gtest.h>
#include <numeric>
diff --git a/cpp/src/arrow/compute/row/compare_internal.h
b/cpp/src/arrow/compute/row/compare_internal.h
index db953fbe11..b039ca97ff 100644
--- a/cpp/src/arrow/compute/row/compare_internal.h
+++ b/cpp/src/arrow/compute/row/compare_internal.h
@@ -19,7 +19,7 @@
#include <cstdint>
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/compute/row/row_internal.h"
#include "arrow/compute/util.h"
diff --git a/cpp/src/arrow/compute/row/encode_internal.h
b/cpp/src/arrow/compute/row/encode_internal.h
index 6091fb6698..2afc150530 100644
--- a/cpp/src/arrow/compute/row/encode_internal.h
+++ b/cpp/src/arrow/compute/row/encode_internal.h
@@ -22,8 +22,8 @@
#include <vector>
#include "arrow/array/data.h"
-#include "arrow/compute/key_map.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_map_internal.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/row_internal.h"
#include "arrow/compute/util.h"
#include "arrow/memory_pool.h"
diff --git a/cpp/src/arrow/compute/row/grouper.cc
b/cpp/src/arrow/compute/row/grouper.cc
index 5e23eda16f..756c70967a 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -26,8 +26,8 @@
#include "arrow/compute/api_vector.h"
#include "arrow/compute/function.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
-#include "arrow/compute/key_hash.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/key_hash_internal.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/registry.h"
#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/grouper_internal.h"
diff --git a/cpp/src/arrow/compute/row/row_internal.h
b/cpp/src/arrow/compute/row/row_internal.h
index c9194267aa..3220b7ffe6 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -20,7 +20,7 @@
#include <vector>
#include "arrow/buffer.h"
-#include "arrow/compute/light_array.h"
+#include "arrow/compute/light_array_internal.h"
#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc
index 2058ba9f30..b0c863b26a 100644
--- a/cpp/src/arrow/compute/util.cc
+++ b/cpp/src/arrow/compute/util.cc
@@ -32,7 +32,7 @@ using internal::CpuInfo;
namespace util {
void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
- int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 *
sizeof(uint64_t);
+ int64_t new_top = top_ + EstimatedAllocationSize(num_bytes);
// Stack overflow check (see GH-39582).
// XXX cannot return a regular Status because most consumers do not either.
ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow";
@@ -48,7 +48,7 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t**
data, int* id) {
void TempVectorStack::release(int id, uint32_t num_bytes) {
ARROW_DCHECK(num_vectors_ == id + 1);
- int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
+ int64_t size = EstimatedAllocationSize(num_bytes);
ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() +
top_)[-1] ==
kGuard2);
ARROW_DCHECK(top_ >= size);
diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h
index e2a2e71b85..88dce160ce 100644
--- a/cpp/src/arrow/compute/util.h
+++ b/cpp/src/arrow/compute/util.h
@@ -89,7 +89,7 @@ class ARROW_EXPORT TempVectorStack {
Status Init(MemoryPool* pool, int64_t size) {
num_vectors_ = 0;
top_ = 0;
- buffer_size_ = PaddedAllocationSize(size) + kPadding + 2 *
sizeof(uint64_t);
+ buffer_size_ = EstimatedAllocationSize(size);
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
// Ensure later operations don't accidentally read uninitialized memory.
std::memset(buffer->mutable_data(), 0xFF, size);
@@ -98,7 +98,11 @@ class ARROW_EXPORT TempVectorStack {
}
private:
- int64_t PaddedAllocationSize(int64_t num_bytes) {
+ static int64_t EstimatedAllocationSize(int64_t size) {
+ return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
+ }
+
+ static int64_t PaddedAllocationSize(int64_t num_bytes) {
// Round up allocation size to multiple of 8 bytes
// to avoid returning temp vectors with unaligned address.
//