This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new a78760f995 GH-41114: [C++] Add is_validity_defined_by_bitmap()
predicate (#41115)
a78760f995 is described below
commit a78760f995b11d3f14c035696fc567e019321243
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Tue Apr 23 15:03:49 2024 -0300
GH-41114: [C++] Add is_validity_defined_by_bitmap() predicate (#41115)
### Rationale for this change
To make it easier to find bugs that are very likely to be silent in the
codebas because users rarely use unions and REE types.
### What changes are included in this PR?
Adding the type predicate and two usages in `builder_nested.h`.
### Are these changes tested?
By the compilation process, since they are both `static_asserts`.
* GitHub Issue: #41114
Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/array/array_test.cc | 7 ++++---
cpp/src/arrow/array/builder_nested.h | 18 +++++++-----------
cpp/src/arrow/array/concatenate.cc | 2 +-
cpp/src/arrow/array/data.cc | 6 +++---
cpp/src/arrow/array/data.h | 1 +
cpp/src/arrow/array/util.cc | 2 +-
cpp/src/arrow/array/validate.cc | 2 +-
cpp/src/arrow/c/bridge.cc | 2 +-
cpp/src/arrow/c/bridge_test.cc | 2 +-
cpp/src/arrow/compute/exec.cc | 2 +-
cpp/src/arrow/integration/json_internal.cc | 2 +-
cpp/src/arrow/ipc/metadata_internal.cc | 5 +++--
cpp/src/arrow/type.h | 5 ++++-
13 files changed, 29 insertions(+), 27 deletions(-)
diff --git a/cpp/src/arrow/array/array_test.cc
b/cpp/src/arrow/array/array_test.cc
index b0d7fe740a..af64908b59 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -604,11 +604,11 @@ void AssertAppendScalar(MemoryPool* pool, const
std::shared_ptr<Scalar>& scalar)
ASSERT_EQ(out->length(), 9);
auto out_type_id = out->type()->id();
- const bool has_validity = internal::HasValidityBitmap(out_type_id);
+ const bool can_check_nulls = internal::may_have_validity_bitmap(out_type_id);
// For a dictionary builder, the output dictionary won't necessarily be the
same
const bool can_check_values = !is_dictionary(out_type_id);
- if (has_validity) {
+ if (can_check_nulls) {
ASSERT_EQ(out->null_count(), 4);
} else {
ASSERT_EQ(out->null_count(), 0);
@@ -891,7 +891,8 @@ TEST_F(TestArray, TestAppendArraySlice) {
span.SetMembers(*nulls->data());
ASSERT_OK(builder->AppendArraySlice(span, 0, 4));
ASSERT_EQ(12, builder->length());
- const bool has_validity_bitmap =
internal::HasValidityBitmap(scalar->type->id());
+ const bool has_validity_bitmap =
+ internal::may_have_validity_bitmap(scalar->type->id());
if (has_validity_bitmap) {
ASSERT_EQ(4, builder->null_count());
}
diff --git a/cpp/src/arrow/array/builder_nested.h
b/cpp/src/arrow/array/builder_nested.h
index 2c8c41c365..9f7b0fcdbc 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -181,13 +181,11 @@ class ARROW_EXPORT VarLengthListLikeBuilder : public
ArrayBuilder {
if constexpr (is_list_view(TYPE::type_id)) {
sizes = array.GetValues<offset_type>(2);
}
- const bool all_valid = !array.MayHaveLogicalNulls();
- const uint8_t* validity = array.HasValidityBitmap() ?
array.buffers[0].data : NULLPTR;
+ static_assert(internal::may_have_validity_bitmap(TYPE::type_id));
+ const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data :
NULLPTR;
ARROW_RETURN_NOT_OK(Reserve(length));
for (int64_t row = offset; row < offset + length; row++) {
- const bool is_valid =
- all_valid || (validity && bit_util::GetBit(validity, array.offset +
row)) ||
- array.IsValid(row);
+ const bool is_valid = !validity || bit_util::GetBit(validity,
array.offset + row);
int64_t size = 0;
if (is_valid) {
if constexpr (is_list_view(TYPE::type_id)) {
@@ -569,13 +567,11 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
- const int32_t* offsets = array.GetValues<int32_t>(1);
- const bool all_valid = !array.MayHaveLogicalNulls();
- const uint8_t* validity = array.HasValidityBitmap() ?
array.buffers[0].data : NULLPTR;
+ const auto* offsets = array.GetValues<int32_t>(1);
+ static_assert(internal::may_have_validity_bitmap(MapType::type_id));
+ const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data :
NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
- const bool is_valid =
- all_valid || (validity && bit_util::GetBit(validity, array.offset +
row)) ||
- array.IsValid(row);
+ const bool is_valid = !validity || bit_util::GetBit(validity,
array.offset + row);
if (is_valid) {
ARROW_RETURN_NOT_OK(Append());
const int64_t slot_length = offsets[row + 1] - offsets[row];
diff --git a/cpp/src/arrow/array/concatenate.cc
b/cpp/src/arrow/array/concatenate.cc
index ff9ed66d11..44d58cc0bd 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -317,7 +317,7 @@ class ConcatenateImpl {
}
Status Concatenate(std::shared_ptr<ArrayData>* out) && {
- if (out_->null_count != 0 &&
internal::HasValidityBitmap(out_->type->id())) {
+ if (out_->null_count != 0 &&
internal::may_have_validity_bitmap(out_->type->id())) {
RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
}
RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index ff3112ec1f..ac828a9c35 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -53,7 +53,7 @@ static inline void AdjustNonNullable(Type::type type_id,
int64_t length,
if (type_id == Type::NA) {
*null_count = length;
(*buffers)[0] = nullptr;
- } else if (internal::HasValidityBitmap(type_id)) {
+ } else if (internal::may_have_validity_bitmap(type_id)) {
if (*null_count == 0) {
// In case there are no nulls, don't keep an allocated null bitmap around
(*buffers)[0] = nullptr;
@@ -335,7 +335,7 @@ void FillZeroLengthArray(const DataType* type, ArraySpan*
span) {
span->buffers[i].size = 0;
}
- if (!HasValidityBitmap(type->id())) {
+ if (!may_have_validity_bitmap(type->id())) {
span->buffers[0] = {};
}
@@ -370,7 +370,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
if (type_id == Type::NA) {
this->null_count = 1;
- } else if (!internal::HasValidityBitmap(type_id)) {
+ } else if (!internal::may_have_validity_bitmap(type_id)) {
this->null_count = 0;
} else {
// Populate null count and validity bitmap
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index d8a6663cec..beec29789a 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -46,6 +46,7 @@ ARROW_EXPORT bool IsNullRunEndEncoded(const ArrayData& data,
int64_t i);
ARROW_EXPORT bool UnionMayHaveLogicalNulls(const ArrayData& data);
ARROW_EXPORT bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data);
ARROW_EXPORT bool DictionaryMayHaveLogicalNulls(const ArrayData& data);
+
} // namespace internal
// When slicing, we do not know the null count of the sliced range without
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index 86e2ffcae4..bdba92c9a1 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -95,7 +95,7 @@ class ArrayDataEndianSwapper {
Status SwapType(const DataType& type) {
RETURN_NOT_OK(VisitTypeInline(type, this));
RETURN_NOT_OK(SwapChildren(type.fields()));
- if (internal::HasValidityBitmap(type.id())) {
+ if (internal::may_have_validity_bitmap(type.id())) {
// Copy null bitmap
out_->buffers[0] = data_->buffers[0];
}
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 8dd3eb3f90..0d940d3bc8 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -550,7 +550,7 @@ struct ValidateArrayImpl {
if (full_validation) {
if (data.null_count != kUnknownNullCount) {
int64_t actual_null_count;
- if (HasValidityBitmap(data.type->id()) && data.buffers[0]) {
+ if (may_have_validity_bitmap(data.type->id()) && data.buffers[0]) {
// Do not call GetNullCount() as it would also set the `null_count`
member
actual_null_count = data.length -
CountSetBits(data.buffers[0]->data(),
data.offset,
data.length);
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index d004de7a2e..8a530b3798 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -576,7 +576,7 @@ struct ArrayExporter {
// Store buffer pointers
size_t n_buffers = data->buffers.size();
auto buffers_begin = data->buffers.begin();
- if (n_buffers > 0 && !internal::HasValidityBitmap(data->type->id())) {
+ if (n_buffers > 0 &&
!internal::may_have_validity_bitmap(data->type->id())) {
--n_buffers;
++buffers_begin;
}
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index dba6e4736b..d64fe67acc 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -565,7 +565,7 @@ struct ArrayExportChecker {
auto expected_n_buffers =
static_cast<int64_t>(expected_data.buffers.size());
auto expected_buffers = expected_data.buffers.data();
- if (!internal::HasValidityBitmap(expected_data.type->id())) {
+ if (!internal::may_have_validity_bitmap(expected_data.type->id())) {
--expected_n_buffers;
++expected_buffers;
}
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 28dcf493fa..f2e4578383 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -480,7 +480,7 @@ struct NullGeneralization {
if (dtype_id == Type::NA) {
return ALL_NULL;
}
- if (!arrow::internal::HasValidityBitmap(dtype_id)) {
+ if (!arrow::internal::may_have_validity_bitmap(dtype_id)) {
return ALL_VALID;
}
if (value.is_scalar()) {
diff --git a/cpp/src/arrow/integration/json_internal.cc
b/cpp/src/arrow/integration/json_internal.cc
index 64eb342d5b..4b75e84bfc 100644
--- a/cpp/src/arrow/integration/json_internal.cc
+++ b/cpp/src/arrow/integration/json_internal.cc
@@ -1849,7 +1849,7 @@ class ArrayReader {
Result<std::shared_ptr<ArrayData>> Parse() {
ARROW_ASSIGN_OR_RAISE(length_, GetMemberInt<int32_t>(obj_, "count"));
- if (::arrow::internal::HasValidityBitmap(type_->id())) {
+ if (::arrow::internal::may_have_validity_bitmap(type_->id())) {
// Null and union types don't have a validity bitmap
RETURN_NOT_OK(ParseValidityBitmap());
}
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc
b/cpp/src/arrow/ipc/metadata_internal.cc
index 4154b594d9..e20b352d18 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -109,8 +109,9 @@ flatbuf::MetadataVersion
MetadataVersionToFlatbuffer(MetadataVersion version) {
bool HasValidityBitmap(Type::type type_id, MetadataVersion version) {
// In V4, null types have no validity bitmap
// In V5 and later, null and union types have no validity bitmap
- return (version < MetadataVersion::V5) ? (type_id != Type::NA)
- :
::arrow::internal::HasValidityBitmap(type_id);
+ return (version < MetadataVersion::V5)
+ ? (type_id != Type::NA)
+ : ::arrow::internal::may_have_validity_bitmap(type_id);
}
namespace {
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 5629cade42..58c9df04ec 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -2488,7 +2488,7 @@ Result<std::shared_ptr<Schema>> UnifySchemas(
namespace internal {
-constexpr bool HasValidityBitmap(Type::type id) {
+constexpr bool may_have_validity_bitmap(Type::type id) {
switch (id) {
case Type::NA:
case Type::DENSE_UNION:
@@ -2500,6 +2500,9 @@ constexpr bool HasValidityBitmap(Type::type id) {
}
}
+ARROW_DEPRECATED("Deprecated in 17.0.0. Use may_have_validity_bitmap()
instead.")
+constexpr bool HasValidityBitmap(Type::type id) { return
may_have_validity_bitmap(id); }
+
ARROW_EXPORT
std::string ToString(Type::type id);