This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a78760f995 GH-41114: [C++] Add is_validity_defined_by_bitmap() 
predicate (#41115)
a78760f995 is described below

commit a78760f995b11d3f14c035696fc567e019321243
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Tue Apr 23 15:03:49 2024 -0300

    GH-41114: [C++] Add is_validity_defined_by_bitmap() predicate (#41115)
    
    ### Rationale for this change
    
    To make it easier to find bugs that are very likely to be silent in the 
codebas because users rarely use unions and REE types.
    
    ### What changes are included in this PR?
    
    Adding the type predicate and two usages in `builder_nested.h`.
    
    ### Are these changes tested?
    
    By the compilation process, since they are both `static_asserts`.
    * GitHub Issue: #41114
    
    Authored-by: Felipe Oliveira Carvalho <[email protected]>
    Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
 cpp/src/arrow/array/array_test.cc          |  7 ++++---
 cpp/src/arrow/array/builder_nested.h       | 18 +++++++-----------
 cpp/src/arrow/array/concatenate.cc         |  2 +-
 cpp/src/arrow/array/data.cc                |  6 +++---
 cpp/src/arrow/array/data.h                 |  1 +
 cpp/src/arrow/array/util.cc                |  2 +-
 cpp/src/arrow/array/validate.cc            |  2 +-
 cpp/src/arrow/c/bridge.cc                  |  2 +-
 cpp/src/arrow/c/bridge_test.cc             |  2 +-
 cpp/src/arrow/compute/exec.cc              |  2 +-
 cpp/src/arrow/integration/json_internal.cc |  2 +-
 cpp/src/arrow/ipc/metadata_internal.cc     |  5 +++--
 cpp/src/arrow/type.h                       |  5 ++++-
 13 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/cpp/src/arrow/array/array_test.cc 
b/cpp/src/arrow/array/array_test.cc
index b0d7fe740a..af64908b59 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -604,11 +604,11 @@ void AssertAppendScalar(MemoryPool* pool, const 
std::shared_ptr<Scalar>& scalar)
   ASSERT_EQ(out->length(), 9);
 
   auto out_type_id = out->type()->id();
-  const bool has_validity = internal::HasValidityBitmap(out_type_id);
+  const bool can_check_nulls = internal::may_have_validity_bitmap(out_type_id);
   // For a dictionary builder, the output dictionary won't necessarily be the 
same
   const bool can_check_values = !is_dictionary(out_type_id);
 
-  if (has_validity) {
+  if (can_check_nulls) {
     ASSERT_EQ(out->null_count(), 4);
   } else {
     ASSERT_EQ(out->null_count(), 0);
@@ -891,7 +891,8 @@ TEST_F(TestArray, TestAppendArraySlice) {
     span.SetMembers(*nulls->data());
     ASSERT_OK(builder->AppendArraySlice(span, 0, 4));
     ASSERT_EQ(12, builder->length());
-    const bool has_validity_bitmap = 
internal::HasValidityBitmap(scalar->type->id());
+    const bool has_validity_bitmap =
+        internal::may_have_validity_bitmap(scalar->type->id());
     if (has_validity_bitmap) {
       ASSERT_EQ(4, builder->null_count());
     }
diff --git a/cpp/src/arrow/array/builder_nested.h 
b/cpp/src/arrow/array/builder_nested.h
index 2c8c41c365..9f7b0fcdbc 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -181,13 +181,11 @@ class ARROW_EXPORT VarLengthListLikeBuilder : public 
ArrayBuilder {
     if constexpr (is_list_view(TYPE::type_id)) {
       sizes = array.GetValues<offset_type>(2);
     }
-    const bool all_valid = !array.MayHaveLogicalNulls();
-    const uint8_t* validity = array.HasValidityBitmap() ? 
array.buffers[0].data : NULLPTR;
+    static_assert(internal::may_have_validity_bitmap(TYPE::type_id));
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : 
NULLPTR;
     ARROW_RETURN_NOT_OK(Reserve(length));
     for (int64_t row = offset; row < offset + length; row++) {
-      const bool is_valid =
-          all_valid || (validity && bit_util::GetBit(validity, array.offset + 
row)) ||
-          array.IsValid(row);
+      const bool is_valid = !validity || bit_util::GetBit(validity, 
array.offset + row);
       int64_t size = 0;
       if (is_valid) {
         if constexpr (is_list_view(TYPE::type_id)) {
@@ -569,13 +567,11 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
 
   Status AppendArraySlice(const ArraySpan& array, int64_t offset,
                           int64_t length) override {
-    const int32_t* offsets = array.GetValues<int32_t>(1);
-    const bool all_valid = !array.MayHaveLogicalNulls();
-    const uint8_t* validity = array.HasValidityBitmap() ? 
array.buffers[0].data : NULLPTR;
+    const auto* offsets = array.GetValues<int32_t>(1);
+    static_assert(internal::may_have_validity_bitmap(MapType::type_id));
+    const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : 
NULLPTR;
     for (int64_t row = offset; row < offset + length; row++) {
-      const bool is_valid =
-          all_valid || (validity && bit_util::GetBit(validity, array.offset + 
row)) ||
-          array.IsValid(row);
+      const bool is_valid = !validity || bit_util::GetBit(validity, 
array.offset + row);
       if (is_valid) {
         ARROW_RETURN_NOT_OK(Append());
         const int64_t slot_length = offsets[row + 1] - offsets[row];
diff --git a/cpp/src/arrow/array/concatenate.cc 
b/cpp/src/arrow/array/concatenate.cc
index ff9ed66d11..44d58cc0bd 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -317,7 +317,7 @@ class ConcatenateImpl {
   }
 
   Status Concatenate(std::shared_ptr<ArrayData>* out) && {
-    if (out_->null_count != 0 && 
internal::HasValidityBitmap(out_->type->id())) {
+    if (out_->null_count != 0 && 
internal::may_have_validity_bitmap(out_->type->id())) {
       RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
     }
     RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index ff3112ec1f..ac828a9c35 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -53,7 +53,7 @@ static inline void AdjustNonNullable(Type::type type_id, 
int64_t length,
   if (type_id == Type::NA) {
     *null_count = length;
     (*buffers)[0] = nullptr;
-  } else if (internal::HasValidityBitmap(type_id)) {
+  } else if (internal::may_have_validity_bitmap(type_id)) {
     if (*null_count == 0) {
       // In case there are no nulls, don't keep an allocated null bitmap around
       (*buffers)[0] = nullptr;
@@ -335,7 +335,7 @@ void FillZeroLengthArray(const DataType* type, ArraySpan* 
span) {
     span->buffers[i].size = 0;
   }
 
-  if (!HasValidityBitmap(type->id())) {
+  if (!may_have_validity_bitmap(type->id())) {
     span->buffers[0] = {};
   }
 
@@ -370,7 +370,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
 
   if (type_id == Type::NA) {
     this->null_count = 1;
-  } else if (!internal::HasValidityBitmap(type_id)) {
+  } else if (!internal::may_have_validity_bitmap(type_id)) {
     this->null_count = 0;
   } else {
     // Populate null count and validity bitmap
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index d8a6663cec..beec29789a 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -46,6 +46,7 @@ ARROW_EXPORT bool IsNullRunEndEncoded(const ArrayData& data, 
int64_t i);
 ARROW_EXPORT bool UnionMayHaveLogicalNulls(const ArrayData& data);
 ARROW_EXPORT bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data);
 ARROW_EXPORT bool DictionaryMayHaveLogicalNulls(const ArrayData& data);
+
 }  // namespace internal
 
 // When slicing, we do not know the null count of the sliced range without
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index 86e2ffcae4..bdba92c9a1 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -95,7 +95,7 @@ class ArrayDataEndianSwapper {
   Status SwapType(const DataType& type) {
     RETURN_NOT_OK(VisitTypeInline(type, this));
     RETURN_NOT_OK(SwapChildren(type.fields()));
-    if (internal::HasValidityBitmap(type.id())) {
+    if (internal::may_have_validity_bitmap(type.id())) {
       // Copy null bitmap
       out_->buffers[0] = data_->buffers[0];
     }
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 8dd3eb3f90..0d940d3bc8 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -550,7 +550,7 @@ struct ValidateArrayImpl {
     if (full_validation) {
       if (data.null_count != kUnknownNullCount) {
         int64_t actual_null_count;
-        if (HasValidityBitmap(data.type->id()) && data.buffers[0]) {
+        if (may_have_validity_bitmap(data.type->id()) && data.buffers[0]) {
           // Do not call GetNullCount() as it would also set the `null_count` 
member
           actual_null_count = data.length - 
CountSetBits(data.buffers[0]->data(),
                                                          data.offset, 
data.length);
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index d004de7a2e..8a530b3798 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -576,7 +576,7 @@ struct ArrayExporter {
     // Store buffer pointers
     size_t n_buffers = data->buffers.size();
     auto buffers_begin = data->buffers.begin();
-    if (n_buffers > 0 && !internal::HasValidityBitmap(data->type->id())) {
+    if (n_buffers > 0 && 
!internal::may_have_validity_bitmap(data->type->id())) {
       --n_buffers;
       ++buffers_begin;
     }
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index dba6e4736b..d64fe67acc 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -565,7 +565,7 @@ struct ArrayExportChecker {
 
     auto expected_n_buffers = 
static_cast<int64_t>(expected_data.buffers.size());
     auto expected_buffers = expected_data.buffers.data();
-    if (!internal::HasValidityBitmap(expected_data.type->id())) {
+    if (!internal::may_have_validity_bitmap(expected_data.type->id())) {
       --expected_n_buffers;
       ++expected_buffers;
     }
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index 28dcf493fa..f2e4578383 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -480,7 +480,7 @@ struct NullGeneralization {
     if (dtype_id == Type::NA) {
       return ALL_NULL;
     }
-    if (!arrow::internal::HasValidityBitmap(dtype_id)) {
+    if (!arrow::internal::may_have_validity_bitmap(dtype_id)) {
       return ALL_VALID;
     }
     if (value.is_scalar()) {
diff --git a/cpp/src/arrow/integration/json_internal.cc 
b/cpp/src/arrow/integration/json_internal.cc
index 64eb342d5b..4b75e84bfc 100644
--- a/cpp/src/arrow/integration/json_internal.cc
+++ b/cpp/src/arrow/integration/json_internal.cc
@@ -1849,7 +1849,7 @@ class ArrayReader {
   Result<std::shared_ptr<ArrayData>> Parse() {
     ARROW_ASSIGN_OR_RAISE(length_, GetMemberInt<int32_t>(obj_, "count"));
 
-    if (::arrow::internal::HasValidityBitmap(type_->id())) {
+    if (::arrow::internal::may_have_validity_bitmap(type_->id())) {
       // Null and union types don't have a validity bitmap
       RETURN_NOT_OK(ParseValidityBitmap());
     }
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc 
b/cpp/src/arrow/ipc/metadata_internal.cc
index 4154b594d9..e20b352d18 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -109,8 +109,9 @@ flatbuf::MetadataVersion 
MetadataVersionToFlatbuffer(MetadataVersion version) {
 bool HasValidityBitmap(Type::type type_id, MetadataVersion version) {
   // In V4, null types have no validity bitmap
   // In V5 and later, null and union types have no validity bitmap
-  return (version < MetadataVersion::V5) ? (type_id != Type::NA)
-                                         : 
::arrow::internal::HasValidityBitmap(type_id);
+  return (version < MetadataVersion::V5)
+             ? (type_id != Type::NA)
+             : ::arrow::internal::may_have_validity_bitmap(type_id);
 }
 
 namespace {
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 5629cade42..58c9df04ec 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -2488,7 +2488,7 @@ Result<std::shared_ptr<Schema>> UnifySchemas(
 
 namespace internal {
 
-constexpr bool HasValidityBitmap(Type::type id) {
+constexpr bool may_have_validity_bitmap(Type::type id) {
   switch (id) {
     case Type::NA:
     case Type::DENSE_UNION:
@@ -2500,6 +2500,9 @@ constexpr bool HasValidityBitmap(Type::type id) {
   }
 }
 
+ARROW_DEPRECATED("Deprecated in 17.0.0. Use may_have_validity_bitmap() 
instead.")
+constexpr bool HasValidityBitmap(Type::type id) { return 
may_have_validity_bitmap(id); }
+
 ARROW_EXPORT
 std::string ToString(Type::type id);
 

Reply via email to