This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 12f68fca05 GH-43209: [C++] Add lint for DCHECK in public headers
(#43248)
12f68fca05 is described below
commit 12f68fca055c6301947fa29c72cda13a9360e054
Author: Rossi Sun <[email protected]>
AuthorDate: Tue Jul 16 20:37:05 2024 +0800
GH-43209: [C++] Add lint for DCHECK in public headers (#43248)
### Rationale for this change
I raised my question in #43209 about which I have always been curious. The
top answer makes a good sense and after some searching in the code base, I
found more evidence like:
https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/public_api_test.cc#L67-L71
So I'm making the following changes to `DCHECK` macro family.
### What changes are included in this PR?
1. Add lint rule for `DCHECK` usage in public headers;
2. Cleanup exisiting `DCHECK` usages in public (probably non-public, but at
least not named with `internal`) headers;
3. Add `ifdef` protection for `DCHECK` definition like we did for
`ASSIGN_OR_RAISE`
(https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/result_internal.h#L20-L22)
and `RETURN_NOT_OK`
(https://github.com/apache/arrow/blob/03726178494c8978bf48b9bab15ed9676e7c9196/cpp/src/arrow/status.h#L80-L82),
to not mess up user code that directly includes `arrow/util/logging.h`.
4. Add comments as guideline.
### Are these changes tested?
No test needed.
### Are there any user-facing changes?
Probably not?
* GitHub Issue: #43209
Authored-by: Ruoxi Sun <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/build-support/lint_cpp_cli.py | 11 +++++------
cpp/src/arrow/acero/asof_join_node.cc | 2 +-
cpp/src/arrow/acero/sorted_merge_node.cc | 2 +-
...rialized_table.h => unmaterialized_table_internal.h} | 0
cpp/src/arrow/acero/util.h | 2 +-
.../{bit_stream_utils.h => bit_stream_utils_internal.h} | 0
cpp/src/arrow/util/bit_util_test.cc | 2 +-
cpp/src/arrow/util/logging.h | 17 +++++++++++++++++
.../util/{rle_encoding.h => rle_encoding_internal.h} | 2 +-
cpp/src/arrow/util/rle_encoding_test.cc | 4 ++--
cpp/src/arrow/util/tdigest.h | 2 +-
cpp/src/arrow/util/vector.h | 8 ++++----
cpp/src/gandiva/dex_visitor.h | 2 +-
cpp/src/gandiva/engine.h | 2 +-
cpp/src/gandiva/eval_batch.h | 12 ++++++------
cpp/src/gandiva/llvm_types.h | 2 +-
cpp/src/gandiva/local_bitmaps_holder.h | 2 +-
cpp/src/gandiva/selection_vector_impl.h | 2 +-
cpp/src/parquet/bloom_filter.h | 4 ++--
cpp/src/parquet/column_reader.cc | 4 ++--
cpp/src/parquet/column_writer.cc | 4 ++--
cpp/src/parquet/encoding.cc | 4 ++--
cpp/src/parquet/level_conversion_inc.h | 2 +-
23 files changed, 54 insertions(+), 38 deletions(-)
diff --git a/cpp/build-support/lint_cpp_cli.py
b/cpp/build-support/lint_cpp_cli.py
index a0eb8f0efe..47abd53fe9 100755
--- a/cpp/build-support/lint_cpp_cli.py
+++ b/cpp/build-support/lint_cpp_cli.py
@@ -31,6 +31,7 @@ _STRIP_COMMENT_REGEX = re.compile('(.+)?(?=//)')
_NULLPTR_REGEX = re.compile(r'.*\bnullptr\b.*')
_RETURN_NOT_OK_REGEX = re.compile(r'.*\sRETURN_NOT_OK.*')
_ASSIGN_OR_RAISE_REGEX = re.compile(r'.*\sASSIGN_OR_RAISE.*')
+_DCHECK_REGEX = re.compile(r'.*\sDCHECK.*')
def _paths(paths):
@@ -54,14 +55,12 @@ def lint_file(path):
(lambda x: re.match(_RETURN_NOT_OK_REGEX, x),
'Use ARROW_RETURN_NOT_OK in header files', _paths('''\
arrow/status.h
- test
- arrow/util/hash.h
arrow/python/util''')),
(lambda x: re.match(_ASSIGN_OR_RAISE_REGEX, x),
- 'Use ARROW_ASSIGN_OR_RAISE in header files', _paths('''\
- arrow/result_internal.h
- test
- '''))
+ 'Use ARROW_ASSIGN_OR_RAISE in header files', []),
+ (lambda x: re.match(_DCHECK_REGEX, x),
+ 'Use ARROW_DCHECK in header files', _paths('''\
+ arrow/util/logging.h'''))
]
diff --git a/cpp/src/arrow/acero/asof_join_node.cc
b/cpp/src/arrow/acero/asof_join_node.cc
index 848cbdf750..2248362241 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -32,7 +32,7 @@
#include "arrow/acero/exec_plan.h"
#include "arrow/acero/options.h"
-#include "arrow/acero/unmaterialized_table.h"
+#include "arrow/acero/unmaterialized_table_internal.h"
#ifndef NDEBUG
#include "arrow/acero/options_internal.h"
#endif
diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc
b/cpp/src/arrow/acero/sorted_merge_node.cc
index a71ac79efc..2845383cee 100644
--- a/cpp/src/arrow/acero/sorted_merge_node.cc
+++ b/cpp/src/arrow/acero/sorted_merge_node.cc
@@ -28,7 +28,7 @@
#include "arrow/acero/options.h"
#include "arrow/acero/query_context.h"
#include "arrow/acero/time_series_util.h"
-#include "arrow/acero/unmaterialized_table.h"
+#include "arrow/acero/unmaterialized_table_internal.h"
#include "arrow/acero/util.h"
#include "arrow/array/builder_base.h"
#include "arrow/result.h"
diff --git a/cpp/src/arrow/acero/unmaterialized_table.h
b/cpp/src/arrow/acero/unmaterialized_table_internal.h
similarity index 100%
rename from cpp/src/arrow/acero/unmaterialized_table.h
rename to cpp/src/arrow/acero/unmaterialized_table_internal.h
diff --git a/cpp/src/arrow/acero/util.h b/cpp/src/arrow/acero/util.h
index 0eb9f4c87e..ee46e85274 100644
--- a/cpp/src/arrow/acero/util.h
+++ b/cpp/src/arrow/acero/util.h
@@ -65,7 +65,7 @@ class ARROW_ACERO_EXPORT AtomicCounter {
// return true if the counter is complete
bool Increment() {
- DCHECK_NE(count_.load(), total_.load());
+ ARROW_DCHECK_NE(count_.load(), total_.load());
int count = count_.fetch_add(1) + 1;
if (count != total_.load()) return false;
return DoneOnce();
diff --git a/cpp/src/arrow/util/bit_stream_utils.h
b/cpp/src/arrow/util/bit_stream_utils_internal.h
similarity index 100%
rename from cpp/src/arrow/util/bit_stream_utils.h
rename to cpp/src/arrow/util/bit_stream_utils_internal.h
diff --git a/cpp/src/arrow/util/bit_util_test.cc
b/cpp/src/arrow/util/bit_util_test.cc
index e026dfec24..c7674af57f 100644
--- a/cpp/src/arrow/util/bit_util_test.cc
+++ b/cpp/src/arrow/util/bit_util_test.cc
@@ -43,7 +43,7 @@
#include "arrow/testing/util.h"
#include "arrow/type_fwd.h"
#include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bitmap.h"
#include "arrow/util/bitmap_generate.h"
#include "arrow/util/bitmap_ops.h"
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 2a2175ec0f..be73c020c0 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -138,14 +138,31 @@ enum class ArrowLogLevel : int {
#endif // NDEBUG
+// These are internal-use macros and should not be used in public headers.
+#ifndef DCHECK
#define DCHECK ARROW_DCHECK
+#endif
+#ifndef DCHECK_OK
#define DCHECK_OK ARROW_DCHECK_OK
+#endif
+#ifndef DCHECK_EQ
#define DCHECK_EQ ARROW_DCHECK_EQ
+#endif
+#ifndef DCHECK_NE
#define DCHECK_NE ARROW_DCHECK_NE
+#endif
+#ifndef DCHECK_LE
#define DCHECK_LE ARROW_DCHECK_LE
+#endif
+#ifndef DCHECK_LT
#define DCHECK_LT ARROW_DCHECK_LT
+#endif
+#ifndef DCHECK_GE
#define DCHECK_GE ARROW_DCHECK_GE
+#endif
+#ifndef DCHECK_GT
#define DCHECK_GT ARROW_DCHECK_GT
+#endif
// This code is adapted from
// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
diff --git a/cpp/src/arrow/util/rle_encoding.h
b/cpp/src/arrow/util/rle_encoding_internal.h
similarity index 99%
rename from cpp/src/arrow/util/rle_encoding.h
rename to cpp/src/arrow/util/rle_encoding_internal.h
index e0f5690062..4575320659 100644
--- a/cpp/src/arrow/util/rle_encoding.h
+++ b/cpp/src/arrow/util/rle_encoding_internal.h
@@ -27,7 +27,7 @@
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/macros.h"
diff --git a/cpp/src/arrow/util/rle_encoding_test.cc
b/cpp/src/arrow/util/rle_encoding_test.cc
index 26984e5f77..0cc0a276a2 100644
--- a/cpp/src/arrow/util/rle_encoding_test.cc
+++ b/cpp/src/arrow/util/rle_encoding_test.cc
@@ -28,10 +28,10 @@
#include "arrow/buffer.h"
#include "arrow/testing/random.h"
#include "arrow/type.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/io_util.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
namespace arrow {
namespace util {
diff --git a/cpp/src/arrow/util/tdigest.h b/cpp/src/arrow/util/tdigest.h
index 308df46884..ea033ed696 100644
--- a/cpp/src/arrow/util/tdigest.h
+++ b/cpp/src/arrow/util/tdigest.h
@@ -56,7 +56,7 @@ class ARROW_EXPORT TDigest {
// this function is intensively called and performance critical
// call it only if you are sure no NAN exists in input data
void Add(double value) {
- DCHECK(!std::isnan(value)) << "cannot add NAN";
+ ARROW_DCHECK(!std::isnan(value)) << "cannot add NAN";
if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) {
MergeInput();
}
diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h
index 74b6a2403a..e77d713a44 100644
--- a/cpp/src/arrow/util/vector.h
+++ b/cpp/src/arrow/util/vector.h
@@ -31,8 +31,8 @@ namespace internal {
template <typename T>
std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index)
{
- DCHECK(!values.empty());
- DCHECK_LT(index, values.size());
+ ARROW_DCHECK(!values.empty());
+ ARROW_DCHECK_LT(index, values.size());
std::vector<T> out;
out.reserve(values.size() - 1);
for (size_t i = 0; i < index; ++i) {
@@ -47,7 +47,7 @@ std::vector<T> DeleteVectorElement(const std::vector<T>&
values, size_t index) {
template <typename T>
std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
T new_element) {
- DCHECK_LE(index, values.size());
+ ARROW_DCHECK_LE(index, values.size());
std::vector<T> out;
out.reserve(values.size() + 1);
for (size_t i = 0; i < index; ++i) {
@@ -63,7 +63,7 @@ std::vector<T> AddVectorElement(const std::vector<T>& values,
size_t index,
template <typename T>
std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
T new_element) {
- DCHECK_LE(index, values.size());
+ ARROW_DCHECK_LE(index, values.size());
std::vector<T> out;
out.reserve(values.size());
for (size_t i = 0; i < index; ++i) {
diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h
index 5d160bb22c..4115df7ffb 100644
--- a/cpp/src/gandiva/dex_visitor.h
+++ b/cpp/src/gandiva/dex_visitor.h
@@ -70,7 +70,7 @@ class GANDIVA_EXPORT DexVisitor {
/// Default implementation with only DCHECK().
#define VISIT_DCHECK(DEX_CLASS) \
- void Visit(const DEX_CLASS& dex) override { DCHECK(0); }
+ void Visit(const DEX_CLASS& dex) override { ARROW_DCHECK(0); }
class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
VISIT_DCHECK(VectorReadValidityDex)
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index 565c3f1425..3a69500e38 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -67,7 +67,7 @@ class GANDIVA_EXPORT Engine {
/// Add the function to the list of IR functions that need to be compiled.
/// Compiling only the functions that are used by the module saves time.
void AddFunctionToCompile(const std::string& fname) {
- DCHECK(!module_finalized_);
+ ARROW_DCHECK(!module_finalized_);
functions_to_compile_.push_back(fname);
}
diff --git a/cpp/src/gandiva/eval_batch.h b/cpp/src/gandiva/eval_batch.h
index 9644010b72..feb4cdc975 100644
--- a/cpp/src/gandiva/eval_batch.h
+++ b/cpp/src/gandiva/eval_batch.h
@@ -53,22 +53,22 @@ class EvalBatch {
int GetNumBuffers() const { return num_buffers_; }
const uint8_t* GetBuffer(int idx) const {
- DCHECK(idx <= num_buffers_);
+ ARROW_DCHECK(idx <= num_buffers_);
return (buffers_array_.get())[idx];
}
uint8_t* GetBuffer(int idx) {
- DCHECK(idx <= num_buffers_);
+ ARROW_DCHECK(idx <= num_buffers_);
return (buffers_array_.get())[idx];
}
int64_t GetBufferOffset(int idx) const {
- DCHECK(idx <= num_buffers_);
+ ARROW_DCHECK(idx <= num_buffers_);
return (buffer_offsets_array_.get())[idx];
}
void SetBuffer(int idx, uint8_t* buffer, int64_t offset) {
- DCHECK(idx <= num_buffers_);
+ ARROW_DCHECK(idx <= num_buffers_);
(buffers_array_.get())[idx] = buffer;
(buffer_offsets_array_.get())[idx] = offset;
}
@@ -80,11 +80,11 @@ class EvalBatch {
}
const uint8_t* GetLocalBitMap(int idx) const {
- DCHECK(idx <= GetNumLocalBitMaps());
+ ARROW_DCHECK(idx <= GetNumLocalBitMaps());
return local_bitmaps_holder_->GetLocalBitMap(idx);
}
uint8_t* GetLocalBitMap(int idx) {
- DCHECK(idx <= GetNumLocalBitMaps());
+ ARROW_DCHECK(idx <= GetNumLocalBitMaps());
return local_bitmaps_holder_->GetLocalBitMap(idx);
}
diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h
index d6f0952713..7768a7f7e4 100644
--- a/cpp/src/gandiva/llvm_types.h
+++ b/cpp/src/gandiva/llvm_types.h
@@ -97,7 +97,7 @@ class GANDIVA_EXPORT LLVMTypes {
} else if (type->isFloatingPointTy()) {
return llvm::ConstantFP::get(type, 0);
} else {
- DCHECK(type->isPointerTy());
+ ARROW_DCHECK(type->isPointerTy());
return llvm::ConstantPointerNull::getNullValue(type);
}
}
diff --git a/cpp/src/gandiva/local_bitmaps_holder.h
b/cpp/src/gandiva/local_bitmaps_holder.h
index a172fb973c..dc24a32e7c 100644
--- a/cpp/src/gandiva/local_bitmaps_holder.h
+++ b/cpp/src/gandiva/local_bitmaps_holder.h
@@ -40,7 +40,7 @@ class LocalBitMapsHolder {
uint8_t** GetLocalBitMapArray() const { return local_bitmaps_array_.get(); }
uint8_t* GetLocalBitMap(int idx) const {
- DCHECK(idx <= GetNumLocalBitMaps());
+ ARROW_DCHECK(idx <= GetNumLocalBitMaps());
return local_bitmaps_array_.get()[idx];
}
diff --git a/cpp/src/gandiva/selection_vector_impl.h
b/cpp/src/gandiva/selection_vector_impl.h
index dc9724ca86..234298daf5 100644
--- a/cpp/src/gandiva/selection_vector_impl.h
+++ b/cpp/src/gandiva/selection_vector_impl.h
@@ -60,7 +60,7 @@ class SelectionVectorImpl : public SelectionVector {
int64_t GetNumSlots() const override { return num_slots_; }
void SetNumSlots(int64_t num_slots) override {
- DCHECK_LE(num_slots, max_slots_);
+ ARROW_DCHECK_LE(num_slots, max_slots_);
num_slots_ = num_slots;
}
diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h
index 909563d013..82172f363b 100644
--- a/cpp/src/parquet/bloom_filter.h
+++ b/cpp/src/parquet/bloom_filter.h
@@ -221,7 +221,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public
BloomFilter {
/// kMaximumBloomFilterBytes, and the return value is always a power of 2
static uint32_t OptimalNumOfBytes(uint32_t ndv, double fpp) {
uint32_t optimal_num_of_bits = OptimalNumOfBits(ndv, fpp);
- DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits));
+ ARROW_DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits));
return optimal_num_of_bits >> 3;
}
@@ -233,7 +233,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public
BloomFilter {
/// @return it always return a value between kMinimumBloomFilterBytes * 8 and
/// kMaximumBloomFilterBytes * 8, and the return value is always a power of
16
static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) {
- DCHECK(fpp > 0.0 && fpp < 1.0);
+ ARROW_DCHECK(fpp > 0.0 && fpp < 1.0);
const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8));
uint32_t num_bits;
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index ebf9515f27..05ee6a16c5 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -36,14 +36,14 @@
#include "arrow/array/builder_primitive.h"
#include "arrow/chunked_array.h"
#include "arrow/type.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/compression.h"
#include "arrow/util/crc32.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/logging.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
#include "arrow/util/unreachable.h"
#include "parquet/column_page.h"
#include "parquet/encoding.h"
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index c9f6e48298..90e0102b42 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -32,7 +32,7 @@
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/checked_cast.h"
@@ -41,7 +41,7 @@
#include "arrow/util/endian.h"
#include "arrow/util/float16.h"
#include "arrow/util/logging.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
#include "arrow/util/type_traits.h"
#include "arrow/visit_array_inline.h"
#include "parquet/column_page.h"
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 54e1e00004..c3f2b79629 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -33,7 +33,7 @@
#include "arrow/type_traits.h"
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/bitmap_writer.h"
@@ -42,7 +42,7 @@
#include "arrow/util/hashing.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/logging.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
#include "arrow/util/ubsan.h"
#include "arrow/visit_data_inline.h"
#include "parquet/exception.h"
diff --git a/cpp/src/parquet/level_conversion_inc.h
b/cpp/src/parquet/level_conversion_inc.h
index d1ccedabfd..3accb154e6 100644
--- a/cpp/src/parquet/level_conversion_inc.h
+++ b/cpp/src/parquet/level_conversion_inc.h
@@ -296,7 +296,7 @@ template <bool has_repeated_parent>
int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t
batch_size,
int64_t upper_bound_remaining, LevelInfo
level_info,
::arrow::internal::FirstTimeBitmapWriter*
writer) {
- DCHECK_LE(batch_size, kExtractBitsSize);
+ ARROW_DCHECK_LE(batch_size, kExtractBitsSize);
// Greater than level_info.def_level - 1 implies >= the def_level
auto defined_bitmap = static_cast<extract_bitmap_t>(