This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new ac505c986 KUDU-1261 use empty validity vector when no nulls in array
ac505c986 is described below

commit ac505c986987e9079acf9cbd74d3ef56f728a94d
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Oct 20 21:35:36 2025 -0700

    KUDU-1261 use empty validity vector when no nulls in array
    
    This changelist updates conventions on supplying and consuming the
    validity information on array elements in the Kudu C++ client API.
    With this changelist, an empty validity vector is accepted if all
    elements in data array to be written are non-null (i.e. valid).
    The same convention is now used for array data returned as a result
    of read/scan operation.
    
    The Python client has been updated correspondingly.
    
    This patch also contains a few new test scenarios to cover the updated
    functionality.
    
    Change-Id: I6f0284a1cdf23234d30d18ae4b9f6ee3a9bd82f4
    Reviewed-on: http://gerrit.cloudera.org:8080/23568
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Abhishek Chennaka <[email protected]>
---
 python/kudu/client.pyx                    | 29 +++++-----
 src/kudu/cfile/cfile-test-base.h          | 16 +++---
 src/kudu/cfile/cfile_writer.cc            |  2 +-
 src/kudu/client/array_cell-internal.cc    |  4 ++
 src/kudu/client/array_cell.h              |  8 ++-
 src/kudu/client/scan_batch.cc             |  8 ++-
 src/kudu/common/array_cell_view.h         | 90 +++++++++++++++++++++++++++----
 src/kudu/common/array_type_serdes-test.cc | 63 +++++++++++++++++++++-
 src/kudu/common/array_type_serdes.h       | 11 ++--
 src/kudu/common/partial_row-test.cc       | 47 ++++++++++++++++
 src/kudu/common/partial_row.cc            | 10 ++--
 src/kudu/common/row_operations.cc         |  6 +--
 src/kudu/common/types.h                   | 19 ++++---
 src/kudu/util/bitmap-test.cc              | 27 ++++++++++
 src/kudu/util/bitmap.cc                   |  1 +
 src/kudu/util/bitmap.h                    | 13 +++--
 16 files changed, 294 insertions(+), 60 deletions(-)

diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 041405d54..b2ed6db9c 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -1975,74 +1975,75 @@ cdef class Row:
             size_t j
             list result
 
+        # An empty validity vector means all the elements are non-null/valid.
         if elem_type == KUDU_INT8:
             check_status(self.row.GetArrayInt8(i, &cpp_data_int8, 
&cpp_validity))
             result = []
             for j in range(cpp_data_int8.size()):
-                result.append(cpp_data_int8[j] if cpp_validity[j] else None)
+                result.append(cpp_data_int8[j] if cpp_validity.empty() or 
cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_INT16:
             check_status(self.row.GetArrayInt16(i, &cpp_data_int16, 
&cpp_validity))
             result = []
             for j in range(cpp_data_int16.size()):
-                result.append(cpp_data_int16[j] if cpp_validity[j] else None)
+                result.append(cpp_data_int16[j] if cpp_validity.empty() or 
cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_INT32:
             check_status(self.row.GetArrayInt32(i, &cpp_data_int32, 
&cpp_validity))
             result = []
             for j in range(cpp_data_int32.size()):
-                result.append(cpp_data_int32[j] if cpp_validity[j] else None)
+                result.append(cpp_data_int32[j] if cpp_validity.empty() or 
cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_INT64:
             check_status(self.row.GetArrayInt64(i, &cpp_data_int64, 
&cpp_validity))
             result = []
             for j in range(cpp_data_int64.size()):
-                result.append(cpp_data_int64[j] if cpp_validity[j] else None)
+                result.append(cpp_data_int64[j] if cpp_validity.empty() or 
cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_FLOAT:
             check_status(self.row.GetArrayFloat(i, &cpp_data_float, 
&cpp_validity))
             result = []
             for j in range(cpp_data_float.size()):
-                result.append(cpp_data_float[j] if cpp_validity[j] else None)
+                result.append(cpp_data_float[j] if cpp_validity.empty() or 
cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_DOUBLE:
             check_status(self.row.GetArrayDouble(i, &cpp_data_double, 
&cpp_validity))
             result = []
             for j in range(cpp_data_double.size()):
-                result.append(cpp_data_double[j] if cpp_validity[j] else None)
+                result.append(cpp_data_double[j] if cpp_validity.empty() or 
cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_BOOL:
             check_status(self.row.GetArrayBool(i, &cpp_data_bool, 
&cpp_validity))
             result = []
             for j in range(cpp_data_bool.size()):
-                result.append(bool(cpp_data_bool[j]) if cpp_validity[j] else 
None)
+                result.append(bool(cpp_data_bool[j]) if cpp_validity.empty() 
or cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_STRING:
             check_status(self.row.GetArrayString(i, &cpp_data_slice, 
&cpp_validity))
             result = []
             for j in range(cpp_data_slice.size()):
-                result.append(frombytes(cpp_data_slice[j].ToString()) if 
cpp_validity[j] else None)
+                result.append(frombytes(cpp_data_slice[j].ToString()) if 
cpp_validity.empty() or cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_BINARY:
             check_status(self.row.GetArrayBinary(i, &cpp_data_slice, 
&cpp_validity))
             result = []
             for j in range(cpp_data_slice.size()):
-                
result.append(cpp_data_slice[j].data()[:cpp_data_slice[j].size()] if 
cpp_validity[j] else None)
+                
result.append(cpp_data_slice[j].data()[:cpp_data_slice[j].size()] if 
cpp_validity.empty() or cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_VARCHAR:
             check_status(self.row.GetArrayVarchar(i, &cpp_data_slice, 
&cpp_validity))
             result = []
             for j in range(cpp_data_slice.size()):
-                result.append(frombytes(cpp_data_slice[j].ToString()) if 
cpp_validity[j] else None)
+                result.append(frombytes(cpp_data_slice[j].ToString()) if 
cpp_validity.empty() or cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_DECIMAL:
@@ -2054,13 +2055,13 @@ cdef class Row:
                 check_status(self.row.GetArrayUnscaledDecimal(i, 
&cpp_data_int32, &cpp_validity))
                 result = []
                 for j in range(cpp_data_int32.size()):
-                    result.append(from_unscaled_decimal(cpp_data_int32[j], 
scale) if cpp_validity[j] else None)
+                    result.append(from_unscaled_decimal(cpp_data_int32[j], 
scale) if cpp_validity.empty() or cpp_validity[j] else None)
                 return result
             elif precision <= 18:
                 check_status(self.row.GetArrayUnscaledDecimal(i, 
&cpp_data_int64, &cpp_validity))
                 result = []
                 for j in range(cpp_data_int64.size()):
-                    result.append(from_unscaled_decimal(cpp_data_int64[j], 
scale) if cpp_validity[j] else None)
+                    result.append(from_unscaled_decimal(cpp_data_int64[j], 
scale) if cpp_validity.empty() or cpp_validity[j] else None)
                 return result
             else:
                 raise TypeError("Unsupported DECIMAL array precision: 
{0}".format(precision))
@@ -2069,14 +2070,14 @@ cdef class Row:
             check_status(self.row.GetArrayUnixTimeMicros(i, &cpp_data_int64, 
&cpp_validity))
             result = []
             for j in range(cpp_data_int64.size()):
-                result.append(from_unixtime_micros(cpp_data_int64[j]) if 
cpp_validity[j] else None)
+                result.append(from_unixtime_micros(cpp_data_int64[j]) if 
cpp_validity.empty() or cpp_validity[j] else None)
             return result
 
         elif elem_type == KUDU_DATE:
             check_status(self.row.GetArrayDate(i, &cpp_data_int32, 
&cpp_validity))
             result = []
             for j in range(cpp_data_int32.size()):
-                result.append(unix_epoch_days_to_date(cpp_data_int32[j]) if 
cpp_validity[j] else None)
+                result.append(unix_epoch_days_to_date(cpp_data_int32[j]) if 
cpp_validity.empty() or cpp_validity[j] else None)
             return result
 
         else:
diff --git a/src/kudu/cfile/cfile-test-base.h b/src/kudu/cfile/cfile-test-base.h
index c4387bef5..8f18fc081 100644
--- a/src/kudu/cfile/cfile-test-base.h
+++ b/src/kudu/cfile/cfile-test-base.h
@@ -874,11 +874,14 @@ Status TimeReadFileForArrayDataType(
 
       const auto elem_num = view.elem_num();
       total_array_elements += elem_num;
-      BitmapIterator bit(view.not_null_bitmap(), elem_num);
-      bool is_not_null = false;
-      while (size_t elem_count = bit.Next(&is_not_null)) {
-        if (!is_not_null) {
-          null_array_elements += elem_count;
+      if (view.not_null_bitmap()) {
+        DCHECK(view.has_nulls());
+        BitmapIterator bit(view.not_null_bitmap(), elem_num);
+        bool is_not_null = false;
+        while (size_t elem_count = bit.Next(&is_not_null)) {
+          if (!is_not_null) {
+            null_array_elements += elem_count;
+          }
         }
       }
 
@@ -890,9 +893,8 @@ Status TimeReadFileForArrayDataType(
         const Slice* data = reinterpret_cast<const Slice*>(view.data_as(Type));
         DCHECK(data);
         const auto* bm = view.not_null_bitmap();
-        DCHECK(bm);
         for (size_t i = 0; i < elem_num; ++i, ++data) {
-          if (BitmapTest(bm, i)) {
+          if (!bm || BitmapTest(bm, i)) {
             total_elem_str_size += data->size();
           }
         }
diff --git a/src/kudu/cfile/cfile_writer.cc b/src/kudu/cfile/cfile_writer.cc
index 444f4fdf5..4f5004f50 100644
--- a/src/kudu/cfile/cfile_writer.cc
+++ b/src/kudu/cfile/cfile_writer.cc
@@ -540,7 +540,7 @@ Status CFileWriter::AppendNullableArrayEntries(const 
uint8_t* bitmap,
         }
 
         const uint8_t* cell_non_null_bitmap = view.not_null_bitmap();
-        DCHECK(cell_non_null_bitmap);
+        DCHECK(cell_non_null_bitmap || !view.has_nulls());
         BitmapIterator elem_bitmap_iter(cell_non_null_bitmap,
                                         cell_elem_num);
         const uint8_t* data = view.data_as(elem_type_info->type());
diff --git a/src/kudu/client/array_cell-internal.cc 
b/src/kudu/client/array_cell-internal.cc
index a101f3bdb..40d5c75f1 100644
--- a/src/kudu/client/array_cell-internal.cc
+++ b/src/kudu/client/array_cell-internal.cc
@@ -69,6 +69,10 @@ bool KuduArrayCellView::empty() const {
   return data_->view_.empty();
 }
 
+bool KuduArrayCellView::has_nulls() const {
+  return data_->view_.has_nulls();
+}
+
 const uint8_t* KuduArrayCellView::not_null_bitmap() const {
   return data_->view_.not_null_bitmap();
 }
diff --git a/src/kudu/client/array_cell.h b/src/kudu/client/array_cell.h
index 353fcecf7..4710ad5b3 100644
--- a/src/kudu/client/array_cell.h
+++ b/src/kudu/client/array_cell.h
@@ -52,7 +52,13 @@ class KUDU_EXPORT KuduArrayCellView {
   // Whether the array cell is empty, i.e. does not contain any elements.
   bool empty() const;
 
-  // Get non-null (a.k.a. validity) bitmap for the array elements.
+  // Whether at least one element in the array is null/invalid. If !has_nulls()
+  // holds true, it helps avoiding calls to 'not_null_bitmap()' and evaluating
+  // non-nullness/validity per element in particular usage scenarios.
+  bool has_nulls() const;
+
+  // Get non-null (a.k.a. validity) bitmap for the array elements. This returns
+  // null if has_nulls() == false.
   const uint8_t* not_null_bitmap() const;
 
   // Accessor for the cell's raw data in the format similar to what
diff --git a/src/kudu/client/scan_batch.cc b/src/kudu/client/scan_batch.cc
index 6cbdac6cd..9b4f2313e 100644
--- a/src/kudu/client/scan_batch.cc
+++ b/src/kudu/client/scan_batch.cc
@@ -327,8 +327,12 @@ Status KuduScanBatch::RowPtr::GetArray(int col_idx,
   if (validity_out) {
     validity_out->resize(elem_num);
     if (!view.empty()) {
-      DCHECK(view.not_null_bitmap());
-      *validity_out = BitmapToVector(view.not_null_bitmap(), elem_num);
+      if (view.not_null_bitmap()) {
+        *validity_out = BitmapToVector(view.not_null_bitmap(), elem_num);
+      } else {
+        // All elements are valid: the validity array is empty.
+        validity_out->clear();
+      }
     }
   }
   return Status::OK();
diff --git a/src/kudu/common/array_cell_view.h 
b/src/kudu/common/array_cell_view.h
index fdbd60b4f..663cc8390 100644
--- a/src/kudu/common/array_cell_view.h
+++ b/src/kudu/common/array_cell_view.h
@@ -90,7 +90,8 @@ class ArrayCellMetadataView final {
        : data_(buf),
          size_(size),
          content_(nullptr),
-         is_initialized_(false) {
+         is_initialized_(false),
+         has_nulls_(false) {
   }
 
   Status Init() {
@@ -98,6 +99,7 @@ class ArrayCellMetadataView final {
     if (size_ == 0) {
       content_ = nullptr;
       is_initialized_ = true;
+      has_nulls_ = false;
       return Status::OK();
     }
 
@@ -139,15 +141,33 @@ class ArrayCellMetadataView final {
       return Status::IllegalState("null flatbuffers of non-zero size");
     }
 
-    DCHECK(content_);
-    if (const size_t bit_num = content_->validity()->size(); bit_num != 0) {
+    const size_t values_size = elem_num_impl();
+    const size_t validity_size = content_->validity() ? 
content_->validity()->size() : 0;
+    if (validity_size != 0 && values_size != validity_size) {
+      return Status::Corruption("number of data and validity elements differ");
+    }
+
+    has_nulls_ = false;
+    if (validity_size != 0) {
+      // If the validity vector is supplied and with all its elements non-zero.
+      const auto& validity = *(DCHECK_NOTNULL(content_->validity()));
+      has_nulls_ = std::any_of(validity.cbegin(), validity.cend(),
+                               [&](uint8_t e) { return e == 0; });
+    }
+
+    if (has_nulls_) {
+      const size_t bit_num = values_size;
+      DCHECK_GT(bit_num, 0);
       bitmap_.reset(new uint8_t[BitmapSize(bit_num)]);
+      // Assuming the most of the elements are non-null/valid, it's usually 
less
+      // calls to flip particular bits from 1 to 0 than flipping them from 0 
to 1.
+      // TODO(aserbin): consider counting alternating strategy based on the 
number
+      //                of non-zero values that comes from computing 
'has_nulls_'
       auto* bm = bitmap_.get();
-      const auto* v = content_->validity();
+      memset(bm, 0xff, BitmapSize(bit_num));
+      const auto& v = *(DCHECK_NOTNULL(content_->validity()));
       for (size_t idx = 0; idx < bit_num; ++idx) {
-        if (v->Get(idx) != 0) {
-          BitmapSet(bm, idx);
-        } else {
+        if (v.Get(idx) == 0) {
           BitmapClear(bm, idx);
         }
       }
@@ -189,20 +209,26 @@ class ArrayCellMetadataView final {
   // Number of elements in the array.
   size_t elem_num() const {
     DCHECK(is_initialized_);
-    return content_ ? content_->validity()->size() : 0;
+    return elem_num_impl();
   }
 
   bool empty() const {
-    DCHECK(is_initialized_);
-    return content_ ? content_->validity()->empty() : true;
+    return elem_num() == 0;
   }
 
   // Non-null (a.k.a. validity) bitmap for the array elements.
   const uint8_t* not_null_bitmap() const {
     DCHECK(is_initialized_);
+    DCHECK((has_nulls_ && bitmap_) || (!has_nulls_ && !bitmap_));
     return bitmap_.get();
   }
 
+  bool has_nulls() const {
+    DCHECK(is_initialized_);
+    DCHECK((has_nulls_ && bitmap_) || (!has_nulls_ && !bitmap_));
+    return has_nulls_;
+  }
+
   const uint8_t* data_as(DataType data_type) const {
     DCHECK(is_initialized_);
     if (empty()) {
@@ -258,6 +284,47 @@ class ArrayCellMetadataView final {
     return content_ ? content_->data_as<T>()->values()->Data() : nullptr;
   }
 
+  size_t elem_num_impl() const {
+    if (!content_) {
+      return 0;
+    }
+    if (!content_->data()) {
+      return 0;
+    }
+    const auto data_type = content_->data_type();
+    switch (data_type) {
+      case serdes::ScalarArray::Int8Array:
+        return content_->data_as<serdes::Int8Array>()->values()->size();
+      case serdes::ScalarArray::UInt8Array:
+        return content_->data_as<serdes::UInt8Array>()->values()->size();
+      case serdes::ScalarArray::Int16Array:
+        return content_->data_as<serdes::Int16Array>()->values()->size();
+      case serdes::ScalarArray::UInt16Array:
+        return content_->data_as<serdes::UInt16Array>()->values()->size();
+      case serdes::ScalarArray::Int32Array:
+        return content_->data_as<serdes::Int32Array>()->values()->size();
+      case serdes::ScalarArray::UInt32Array:
+        return content_->data_as<serdes::UInt32Array>()->values()->size();
+      case serdes::ScalarArray::Int64Array:
+        return content_->data_as<serdes::Int64Array>()->values()->size();
+      case serdes::ScalarArray::UInt64Array:
+        return content_->data_as<serdes::UInt64Array>()->values()->size();
+      case serdes::ScalarArray::FloatArray:
+        return content_->data_as<serdes::FloatArray>()->values()->size();
+      case serdes::ScalarArray::DoubleArray:
+        return content_->data_as<serdes::DoubleArray>()->values()->size();
+      case serdes::ScalarArray::StringArray:
+        return content_->data_as<serdes::StringArray>()->values()->size();
+      case serdes::ScalarArray::BinaryArray:
+        return content_->data_as<serdes::BinaryArray>()->values()->size();
+      default:
+        LOG(DFATAL) << "unknown ScalarArray type: " << 
static_cast<size_t>(data_type);
+        break;
+    }
+
+    return 0;
+  }
+
   // Flatbuffer-encoded data; a non-owning raw pointer.
   const uint8_t* const data_;
 
@@ -279,6 +346,9 @@ class ArrayCellMetadataView final {
 
   // Whether the Init() method has been successfully run for this object.
   bool is_initialized_;
+
+  // Whether the underlying array has at least one null/invalid element.
+  bool has_nulls_;
 };
 
 } // namespace kudu
diff --git a/src/kudu/common/array_type_serdes-test.cc 
b/src/kudu/common/array_type_serdes-test.cc
index 34de4c064..7e8284489 100644
--- a/src/kudu/common/array_type_serdes-test.cc
+++ b/src/kudu/common/array_type_serdes-test.cc
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 #include <cstring>
+#include <initializer_list>
 #include <memory>
 #include <string>
 #include <vector>
@@ -78,7 +79,8 @@ TEST(ArrayTypeSerdesTest, Basic) {
   ASSERT_OK(view.Init());
   ASSERT_EQ(val.size(), view.elem_num());
   const auto* view_validity_bitmap = view.not_null_bitmap();
-  ASSERT_TRUE(BitmapEquals(validity_bitmap, view_validity_bitmap, 
view.elem_num()));
+  ASSERT_NE(nullptr, view_validity_bitmap);
+  ASSERT_TRUE(view.has_nulls());
 
   // Verify the data matches the source.
   {
@@ -99,4 +101,63 @@ TEST(ArrayTypeSerdesTest, Basic) {
   }
 }
 
+// When all the elements in the array are non-null/valid, the validity bitmap
+// accessed via ArrayCellMetadataView is null in both two cases:
+//   * the supplied validity vector have all the elements set to 'true'
+//   * the supplied validity vector is empty
+TEST(ArrayTypeSerdesTest, AllNonNullElements) {
+  constexpr const uint8_t kThreeOnes[] = { 0b00000111 };
+  constexpr const uint8_t* kNull = nullptr;
+  const vector<bool> kEmpty{};
+
+  for (const uint8_t* validity_bitmap : { kNull, kThreeOnes }) {
+    const vector<int16_t> val{ 0, 1, 2, };
+    const vector<bool>& validity_vector =
+        validity_bitmap ? BitmapToVector(validity_bitmap, val.size()) : kEmpty;
+    if (validity_bitmap) {
+      ASSERT_EQ(val.size(), validity_vector.size());
+    }
+
+    unique_ptr<uint8_t[]> buf_data;
+    size_t buf_data_size = 0;
+    ASSERT_OK(serdes::Serialize(GetTypeInfo(INT16),
+                                reinterpret_cast<const uint8_t*>(val.data()),
+                                val.size(),
+                                validity_vector,
+                                &buf_data,
+                                &buf_data_size));
+    ASSERT_TRUE(buf_data);
+    const Slice cell(buf_data.get(), buf_data_size);
+
+    Arena arena(128);
+    Slice arena_cell;
+    ASSERT_OK(serdes::SerializeIntoArena(
+        GetTypeInfo(INT16),
+        reinterpret_cast<const uint8_t*>(val.data()),
+        validity_bitmap,
+        val.size(),
+        &arena,
+        &arena_cell));
+
+    // Make sure Serialize() an SerializeInfoArena() produce the same data.
+    ASSERT_EQ(cell, arena_cell);
+
+    // Peek into the serialized buffer using ArrayCellMetadataView and compare
+    // the source data with the view into the serialized buffer.
+    ArrayCellMetadataView view(cell.data(), cell.size());
+    ASSERT_OK(view.Init());
+    ASSERT_EQ(val.size(), view.elem_num());
+    const auto* view_validity_bitmap = view.not_null_bitmap();
+    ASSERT_EQ(nullptr, view_validity_bitmap);
+    ASSERT_FALSE(view.has_nulls());
+
+    // Verify the data matches the source.
+    {
+      const uint8_t* data_view = view.data_as(INT16);
+      ASSERT_NE(nullptr, data_view);
+      ASSERT_EQ(0, memcmp(data_view, val.data(), sizeof(int16_t) * 
val.size()));
+    }
+  }
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/array_type_serdes.h 
b/src/kudu/common/array_type_serdes.h
index 7e8890925..9acd64b4e 100644
--- a/src/kudu/common/array_type_serdes.h
+++ b/src/kudu/common/array_type_serdes.h
@@ -55,7 +55,7 @@ void BuildFlatbuffers(
       static const Slice kEmptySlice(static_cast<uint8_t*>(nullptr), 0);
       const Slice* ptr = reinterpret_cast<const Slice*>(column_data);
       for (size_t idx = 0; idx < nrows; ++idx) {
-        val[idx] = validity[idx] ? *(ptr + idx) : kEmptySlice;
+        val[idx] = (validity.empty() || validity[idx]) ? *(ptr + idx) : 
kEmptySlice;
       }
     } else {
       static_assert(!std::is_same<Slice, ElementType>::value,
@@ -64,7 +64,8 @@ void BuildFlatbuffers(
     }
   }
 
-  auto validity_vector = builder.CreateVector(validity);
+  const auto validity_vector =
+      !validity.empty() ? builder.CreateVector(validity) : 0;
   if constexpr (KUDU_DATA_TYPE == STRING) {
     auto values = FB_TYPE::Traits::Create(
         builder, builder.CreateVectorOfStrings<ElementType>(val));
@@ -107,7 +108,7 @@ Status Serialize(
 
   DCHECK(out_buf);
   DCHECK(out_buf_size);
-  DCHECK_EQ(validity.size(), nrows);
+  DCHECK(validity.empty() || validity.size() == nrows);
 
   if (PREDICT_FALSE(column_data == nullptr && nrows > 0)) {
     return Status::InvalidArgument("inconsistent data and validity info for 
array");
@@ -141,6 +142,7 @@ Status SerializeIntoArena(
     size_t nrows,
     Arena* arena,
     Slice* out) {
+  static const std::vector<bool> kAllValid{};
   typedef typename DataTypeTraits<KUDU_DATA_TYPE>::cpp_type ElementType;
 
   DCHECK(arena);
@@ -148,7 +150,8 @@ Status SerializeIntoArena(
 
   flatbuffers::FlatBufferBuilder builder(
       nrows * sizeof(ElementType) + nrows + FLATBUFFERS_MIN_BUFFER_SIZE);
-  const std::vector<bool>& validity = BitmapToVector(validity_bitmap, nrows);
+  const std::vector<bool>& validity =
+      validity_bitmap ? BitmapToVector(validity_bitmap, nrows) : kAllValid;
   BuildFlatbuffers<KUDU_DATA_TYPE, FB_TYPE>(column_data, nrows, validity, 
&builder);
 
   // Copy the serialized data into the arena.
diff --git a/src/kudu/common/partial_row-test.cc 
b/src/kudu/common/partial_row-test.cc
index 8545a893b..5fb02e935 100644
--- a/src/kudu/common/partial_row-test.cc
+++ b/src/kudu/common/partial_row-test.cc
@@ -422,6 +422,53 @@ TEST_F(PartialRowTest, DecimalArray) {
   }
 }
 
+// An empty validity vector means all the elements in the array are valid.
+TEST_F(PartialRowTest, EmptyValidityVector) {
+  const Schema schema(
+      { ColumnSchema("key", INT32),
+        ColumnSchemaBuilder()
+           .name("bool_arr")
+           .type(BOOL)
+           .array(true)
+           .nullable(true),
+        ColumnSchemaBuilder()
+           .name("int32_arr")
+           .type(INT32)
+           .array(true)
+           .nullable(true),
+        ColumnSchemaBuilder()
+           .name("string_arr")
+           .type(STRING)
+           .array(true)
+           .nullable(true),
+      }, 1);
+
+  {
+    KuduPartialRow row(&schema);
+    ASSERT_OK(row.SetArrayBool(1, { false, true, }, {}));
+    ASSERT_TRUE(row.IsColumnSet(1));
+    ASSERT_FALSE(row.IsNull(1));
+    ASSERT_EQ("bool 1d-array bool_arr=[false, true]", row.ToString());
+  }
+  {
+    KuduPartialRow row(&schema);
+    ASSERT_OK(row.SetArrayInt32(2, {}, {}));
+    ASSERT_TRUE(row.IsColumnSet(2));
+    ASSERT_FALSE(row.IsNull(2));
+    ASSERT_EQ("int32 1d-array int32_arr=[]", row.ToString());
+  }
+  {
+    KuduPartialRow row(&schema);
+    const auto s = row.SetArrayInt32(2, { 0, 1 }, { true });
+    ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+    ASSERT_STR_CONTAINS(s.ToString(),
+                        "data and validity arrays must be the same length "
+                        "if the latter is non-empty");
+    ASSERT_FALSE(row.IsColumnSet(2));
+    ASSERT_FALSE(row.IsNull(2));
+  }
+}
+
 TEST_F(PartialRowTest, TestCopy) {
   KuduPartialRow row(&schema_);
 
diff --git a/src/kudu/common/partial_row.cc b/src/kudu/common/partial_row.cc
index 1d6ef0f8d..cafba09f7 100644
--- a/src/kudu/common/partial_row.cc
+++ b/src/kudu/common/partial_row.cc
@@ -209,9 +209,10 @@ template<typename T>
 Status KuduPartialRow::SetArray(int col_idx,
                                 const vector<typename T::element_cpp_type>& 
val,
                                 const vector<bool>& validity) {
-  if (PREDICT_FALSE(val.size() != validity.size())) {
+  // An empty validity vector means all the elements in the array are valid.
+  if (PREDICT_FALSE(!validity.empty() && val.size() != validity.size())) {
     return Status::InvalidArgument(
-        "data and validity arrays must be the same length");
+        "data and validity arrays must be the same length if the latter is 
non-empty");
   }
 
   const ColumnSchema& col = schema_->column(col_idx);
@@ -255,9 +256,10 @@ Status KuduPartialRow::SetArray<ArrayTypeTraits<BOOL>>(
     int col_idx,
     const vector<bool>& val_bool,
     const vector<bool>& validity) {
-  if (PREDICT_FALSE(val_bool.size() != validity.size())) {
+  // An empty validity vector means all the elements in the array are valid.
+  if (PREDICT_FALSE(!validity.empty() && val_bool.size() != validity.size())) {
     return Status::InvalidArgument(
-        "data and validity arrays must be the same length");
+        "data and validity arrays must be the same length if the latter is 
non-empty");
   }
   vector<uint8_t> val;
   val.reserve(val_bool.size());
diff --git a/src/kudu/common/row_operations.cc 
b/src/kudu/common/row_operations.cc
index 3059cb9d6..e424aef7b 100644
--- a/src/kudu/common/row_operations.cc
+++ b/src/kudu/common/row_operations.cc
@@ -43,6 +43,7 @@
 #include "kudu/util/safe_math.h"
 #include "kudu/util/slice.h"
 
+using kudu::ArrayCellMetadataView;
 using std::string;
 using std::vector;
 using strings::Substitute;
@@ -60,12 +61,11 @@ DEFINE_uint32(array_cell_max_elem_num, 1024,
 TAG_FLAG(array_cell_max_elem_num, advanced);
 TAG_FLAG(array_cell_max_elem_num, runtime);
 
-// Validate that log_min_segments_to_retain >= 1
 static bool ValidateArrayCellMaxElemNum(const char* flagname, uint32_t value) {
-  if (value > kudu::ArrayCellMetadataView::kArrayMaxElemNum) {
+  if (value > ArrayCellMetadataView::kArrayMaxElemNum) {
     LOG(ERROR) << Substitute(
         "'$0' set to invalid value $1; must not be greater than $2",
-        flagname, value, kudu::ArrayCellMetadataView::kArrayMaxElemNum);
+        flagname, value, ArrayCellMetadataView::kArrayMaxElemNum);
     return false;
   }
   return true;
diff --git a/src/kudu/common/types.h b/src/kudu/common/types.h
index 7790867f5..c9d3c771c 100644
--- a/src/kudu/common/types.h
+++ b/src/kudu/common/types.h
@@ -957,10 +957,11 @@ struct ArrayTypeTraits : public 
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
       if (idx != 0) {
         str->append(", ");
       }
-      if (BitmapTest(validity, idx)) {
-        
DataTypeTraits<ARRAY_ELEMENT_TYPE>::AppendDebugStringForValue(data_ptr, str);
-      } else {
+      // Null validity bitmap means all elements are valid.
+      if (validity && !BitmapTest(validity, idx)) {
         str->append("NULL");
+      } else {
+        
DataTypeTraits<ARRAY_ELEMENT_TYPE>::AppendDebugStringForValue(data_ptr, str);
       }
       data_ptr += sizeof(typename TypeTraits<ARRAY_ELEMENT_TYPE>::cpp_type);
     }
@@ -1030,7 +1031,7 @@ struct ArrayTypeTraits : public 
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
     }
 
     const uint8_t* b_not_null_bitmap = b_view.not_null_bitmap();
-    if (BitmapTest(b_not_null_bitmap, a_elem_num)) {
+    if (!b_not_null_bitmap || BitmapTest(b_not_null_bitmap, a_elem_num)) {
       // The trailing extra element in 'b' must be NULL if 'b' goes
       // consecutively after 'a'.
       return false;
@@ -1051,7 +1052,7 @@ struct ArrayTypeTraits : public 
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
  private:
   // Compare array represented by the specified array views facades up to
   // the specified number of elements. The comparison goes element-by-element,
-  // using Compare() method for the corresponding element scalar type.
+  // using Compare() method for the corresponding scalar type elements.
   static int Compare(const ArrayCellMetadataView& lhs,
                      const ArrayCellMetadataView& rhs,
                      size_t num_elems_to_compare) {
@@ -1072,18 +1073,16 @@ struct ArrayTypeTraits : public 
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
     const uint8_t* lhs_data_ptr = lhs.data_as(ARRAY_ELEMENT_TYPE);
     DCHECK(lhs_data_ptr || lhs_elems == 0);
     const uint8_t* lhs_not_null_bitmap = lhs.not_null_bitmap();
-    DCHECK(lhs_not_null_bitmap || lhs_elems == 0);
 
     const uint8_t* rhs_data_ptr = rhs.data_as(ARRAY_ELEMENT_TYPE);
     DCHECK(rhs_data_ptr || rhs_elems == 0);
     const uint8_t* rhs_not_null_bitmap = rhs.not_null_bitmap();
-    DCHECK(rhs_not_null_bitmap || rhs_elems == 0);
 
     for (size_t idx = 0; idx < cap_num_elems; ++idx,
         lhs_data_ptr += sizeof(typename 
TypeTraits<ARRAY_ELEMENT_TYPE>::cpp_type),
         rhs_data_ptr += sizeof(typename 
TypeTraits<ARRAY_ELEMENT_TYPE>::cpp_type)) {
-      if (!BitmapTest(lhs_not_null_bitmap, idx)) {
-        if (!BitmapTest(rhs_not_null_bitmap, idx)) {
+      if (lhs_not_null_bitmap && !BitmapTest(lhs_not_null_bitmap, idx)) {
+        if (rhs_not_null_bitmap && !BitmapTest(rhs_not_null_bitmap, idx)) {
           // Both elements are NULL: continue with next elements in the arrays.
           continue;
         }
@@ -1092,7 +1091,7 @@ struct ArrayTypeTraits : public 
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
         // same numbers (or two NULLs) up to the 'idx' position.
         return -1;
       } else {
-        if (!BitmapTest(rhs_not_null_bitmap, idx)) {
+        if (rhs_not_null_bitmap && !BitmapTest(rhs_not_null_bitmap, idx)) {
           // lhs > rhs: a non-NULL element in lhs at idx, but rhs non-NULL
           // at idx, while all the pairs of elements in the array contain
           // same numbers (or two NULLs) up to the 'idx' position.
diff --git a/src/kudu/util/bitmap-test.cc b/src/kudu/util/bitmap-test.cc
index 948bf1bc0..e2a33e17b 100644
--- a/src/kudu/util/bitmap-test.cc
+++ b/src/kudu/util/bitmap-test.cc
@@ -21,6 +21,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
+#include <initializer_list>
 #include <set>
 #include <string>
 #include <vector>
@@ -428,6 +429,32 @@ TEST(TestBitMap, BitmapToVector) {
   }
 }
 
+TEST(TestBitMap, EmptyBitset) {
+  const uint8_t* kNull = nullptr;
+  const uint8_t kEmpty[] = {};
+  for (const uint8_t* buf : { kNull, kEmpty }) {
+    for (bool initial_value : { false, true }) {
+      BitmapIterator bit(buf, 0);
+      bool value = initial_value;
+      const auto next = bit.Next(&value);
+      ASSERT_EQ(0, next);
+      // The output parameter is untouched.
+      ASSERT_EQ(initial_value, value);
+    }
+  }
+}
+
+TEST(TestBitMap, BitmapIteratorOnNullBitmap) {
+  for (const size_t num_bits : { 1, 5, 8, 10, 64 }) {
+    SCOPED_TRACE(std::to_string(num_bits));
+    BitmapIterator bit(nullptr, num_bits);
+    bool value = false;
+    const auto next = bit.Next(&value);
+    ASSERT_EQ(num_bits, next);
+    ASSERT_TRUE(value);
+  }
+}
+
 #ifndef NDEBUG
 TEST(TestBitMapDeathTest, TestCopyOverlap) {
   uint8_t bm[2] = { 0 };
diff --git a/src/kudu/util/bitmap.cc b/src/kudu/util/bitmap.cc
index 9de95ca72..17c6e268c 100644
--- a/src/kudu/util/bitmap.cc
+++ b/src/kudu/util/bitmap.cc
@@ -155,6 +155,7 @@ void BitmapCopy(uint8_t* dst, size_t dst_offset,
 }
 
 vector<bool> BitmapToVector(const uint8_t* bitmap, size_t num_bits) {
+  DCHECK(bitmap);
   BitmapIterator it(bitmap, num_bits);
   vector<bool> result(num_bits);
   bool is_set = false;
diff --git a/src/kudu/util/bitmap.h b/src/kudu/util/bitmap.h
index 96b078b24..c3feb92f8 100644
--- a/src/kudu/util/bitmap.h
+++ b/src/kudu/util/bitmap.h
@@ -141,8 +141,8 @@ class BitmapIterator {
  public:
   BitmapIterator(const uint8_t* map, size_t num_bits)
       : map_(map),
-        offset_(0),
-        num_bits_(num_bits) {
+        num_bits_(num_bits),
+        offset_(0) {
   }
 
   size_t Next(bool* value) {
@@ -152,6 +152,13 @@ class BitmapIterator {
       return 0;
     }
 
+    if (!map_) {
+      // Null bitmap means all the elements are valid.
+      *value = true;
+      offset_ = num_bits_;
+      return num_bits_;
+    }
+
     *value = BitmapTest(map_, offset_);
 
     size_t index;
@@ -167,8 +174,8 @@ class BitmapIterator {
 
  private:
   const uint8_t* const map_;
+  const size_t num_bits_;
   size_t offset_;
-  size_t num_bits_;
 };
 
 // Iterate over the bits in 'bitmap' and call 'func' for each set bit.

Reply via email to