This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new ac505c986 KUDU-1261 use empty validity vector when no nulls in array
ac505c986 is described below
commit ac505c986987e9079acf9cbd74d3ef56f728a94d
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Oct 20 21:35:36 2025 -0700
KUDU-1261 use empty validity vector when no nulls in array
This changelist updates conventions on supplying and consuming the
validity information on array elements in the Kudu C++ client API.
With this changelist, an empty validity vector is accepted if all
elements in data array to be written are non-null (i.e. valid).
The same convention is now used for array data returned as a result
of read/scan operation.
The Python client has been updated correspondingly.
This patch also contains a few new test scenarios to cover the updated
functionality.
Change-Id: I6f0284a1cdf23234d30d18ae4b9f6ee3a9bd82f4
Reviewed-on: http://gerrit.cloudera.org:8080/23568
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Abhishek Chennaka <[email protected]>
---
python/kudu/client.pyx | 29 +++++-----
src/kudu/cfile/cfile-test-base.h | 16 +++---
src/kudu/cfile/cfile_writer.cc | 2 +-
src/kudu/client/array_cell-internal.cc | 4 ++
src/kudu/client/array_cell.h | 8 ++-
src/kudu/client/scan_batch.cc | 8 ++-
src/kudu/common/array_cell_view.h | 90 +++++++++++++++++++++++++++----
src/kudu/common/array_type_serdes-test.cc | 63 +++++++++++++++++++++-
src/kudu/common/array_type_serdes.h | 11 ++--
src/kudu/common/partial_row-test.cc | 47 ++++++++++++++++
src/kudu/common/partial_row.cc | 10 ++--
src/kudu/common/row_operations.cc | 6 +--
src/kudu/common/types.h | 19 ++++---
src/kudu/util/bitmap-test.cc | 27 ++++++++++
src/kudu/util/bitmap.cc | 1 +
src/kudu/util/bitmap.h | 13 +++--
16 files changed, 294 insertions(+), 60 deletions(-)
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 041405d54..b2ed6db9c 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -1975,74 +1975,75 @@ cdef class Row:
size_t j
list result
+ # An empty validity vector means all the elements are non-null/valid.
if elem_type == KUDU_INT8:
check_status(self.row.GetArrayInt8(i, &cpp_data_int8,
&cpp_validity))
result = []
for j in range(cpp_data_int8.size()):
- result.append(cpp_data_int8[j] if cpp_validity[j] else None)
+ result.append(cpp_data_int8[j] if cpp_validity.empty() or
cpp_validity[j] else None)
return result
elif elem_type == KUDU_INT16:
check_status(self.row.GetArrayInt16(i, &cpp_data_int16,
&cpp_validity))
result = []
for j in range(cpp_data_int16.size()):
- result.append(cpp_data_int16[j] if cpp_validity[j] else None)
+ result.append(cpp_data_int16[j] if cpp_validity.empty() or
cpp_validity[j] else None)
return result
elif elem_type == KUDU_INT32:
check_status(self.row.GetArrayInt32(i, &cpp_data_int32,
&cpp_validity))
result = []
for j in range(cpp_data_int32.size()):
- result.append(cpp_data_int32[j] if cpp_validity[j] else None)
+ result.append(cpp_data_int32[j] if cpp_validity.empty() or
cpp_validity[j] else None)
return result
elif elem_type == KUDU_INT64:
check_status(self.row.GetArrayInt64(i, &cpp_data_int64,
&cpp_validity))
result = []
for j in range(cpp_data_int64.size()):
- result.append(cpp_data_int64[j] if cpp_validity[j] else None)
+ result.append(cpp_data_int64[j] if cpp_validity.empty() or
cpp_validity[j] else None)
return result
elif elem_type == KUDU_FLOAT:
check_status(self.row.GetArrayFloat(i, &cpp_data_float,
&cpp_validity))
result = []
for j in range(cpp_data_float.size()):
- result.append(cpp_data_float[j] if cpp_validity[j] else None)
+ result.append(cpp_data_float[j] if cpp_validity.empty() or
cpp_validity[j] else None)
return result
elif elem_type == KUDU_DOUBLE:
check_status(self.row.GetArrayDouble(i, &cpp_data_double,
&cpp_validity))
result = []
for j in range(cpp_data_double.size()):
- result.append(cpp_data_double[j] if cpp_validity[j] else None)
+ result.append(cpp_data_double[j] if cpp_validity.empty() or
cpp_validity[j] else None)
return result
elif elem_type == KUDU_BOOL:
check_status(self.row.GetArrayBool(i, &cpp_data_bool,
&cpp_validity))
result = []
for j in range(cpp_data_bool.size()):
- result.append(bool(cpp_data_bool[j]) if cpp_validity[j] else
None)
+ result.append(bool(cpp_data_bool[j]) if cpp_validity.empty()
or cpp_validity[j] else None)
return result
elif elem_type == KUDU_STRING:
check_status(self.row.GetArrayString(i, &cpp_data_slice,
&cpp_validity))
result = []
for j in range(cpp_data_slice.size()):
- result.append(frombytes(cpp_data_slice[j].ToString()) if
cpp_validity[j] else None)
+ result.append(frombytes(cpp_data_slice[j].ToString()) if
cpp_validity.empty() or cpp_validity[j] else None)
return result
elif elem_type == KUDU_BINARY:
check_status(self.row.GetArrayBinary(i, &cpp_data_slice,
&cpp_validity))
result = []
for j in range(cpp_data_slice.size()):
-
result.append(cpp_data_slice[j].data()[:cpp_data_slice[j].size()] if
cpp_validity[j] else None)
+
result.append(cpp_data_slice[j].data()[:cpp_data_slice[j].size()] if
cpp_validity.empty() or cpp_validity[j] else None)
return result
elif elem_type == KUDU_VARCHAR:
check_status(self.row.GetArrayVarchar(i, &cpp_data_slice,
&cpp_validity))
result = []
for j in range(cpp_data_slice.size()):
- result.append(frombytes(cpp_data_slice[j].ToString()) if
cpp_validity[j] else None)
+ result.append(frombytes(cpp_data_slice[j].ToString()) if
cpp_validity.empty() or cpp_validity[j] else None)
return result
elif elem_type == KUDU_DECIMAL:
@@ -2054,13 +2055,13 @@ cdef class Row:
check_status(self.row.GetArrayUnscaledDecimal(i,
&cpp_data_int32, &cpp_validity))
result = []
for j in range(cpp_data_int32.size()):
- result.append(from_unscaled_decimal(cpp_data_int32[j],
scale) if cpp_validity[j] else None)
+ result.append(from_unscaled_decimal(cpp_data_int32[j],
scale) if cpp_validity.empty() or cpp_validity[j] else None)
return result
elif precision <= 18:
check_status(self.row.GetArrayUnscaledDecimal(i,
&cpp_data_int64, &cpp_validity))
result = []
for j in range(cpp_data_int64.size()):
- result.append(from_unscaled_decimal(cpp_data_int64[j],
scale) if cpp_validity[j] else None)
+ result.append(from_unscaled_decimal(cpp_data_int64[j],
scale) if cpp_validity.empty() or cpp_validity[j] else None)
return result
else:
raise TypeError("Unsupported DECIMAL array precision:
{0}".format(precision))
@@ -2069,14 +2070,14 @@ cdef class Row:
check_status(self.row.GetArrayUnixTimeMicros(i, &cpp_data_int64,
&cpp_validity))
result = []
for j in range(cpp_data_int64.size()):
- result.append(from_unixtime_micros(cpp_data_int64[j]) if
cpp_validity[j] else None)
+ result.append(from_unixtime_micros(cpp_data_int64[j]) if
cpp_validity.empty() or cpp_validity[j] else None)
return result
elif elem_type == KUDU_DATE:
check_status(self.row.GetArrayDate(i, &cpp_data_int32,
&cpp_validity))
result = []
for j in range(cpp_data_int32.size()):
- result.append(unix_epoch_days_to_date(cpp_data_int32[j]) if
cpp_validity[j] else None)
+ result.append(unix_epoch_days_to_date(cpp_data_int32[j]) if
cpp_validity.empty() or cpp_validity[j] else None)
return result
else:
diff --git a/src/kudu/cfile/cfile-test-base.h b/src/kudu/cfile/cfile-test-base.h
index c4387bef5..8f18fc081 100644
--- a/src/kudu/cfile/cfile-test-base.h
+++ b/src/kudu/cfile/cfile-test-base.h
@@ -874,11 +874,14 @@ Status TimeReadFileForArrayDataType(
const auto elem_num = view.elem_num();
total_array_elements += elem_num;
- BitmapIterator bit(view.not_null_bitmap(), elem_num);
- bool is_not_null = false;
- while (size_t elem_count = bit.Next(&is_not_null)) {
- if (!is_not_null) {
- null_array_elements += elem_count;
+ if (view.not_null_bitmap()) {
+ DCHECK(view.has_nulls());
+ BitmapIterator bit(view.not_null_bitmap(), elem_num);
+ bool is_not_null = false;
+ while (size_t elem_count = bit.Next(&is_not_null)) {
+ if (!is_not_null) {
+ null_array_elements += elem_count;
+ }
}
}
@@ -890,9 +893,8 @@ Status TimeReadFileForArrayDataType(
const Slice* data = reinterpret_cast<const Slice*>(view.data_as(Type));
DCHECK(data);
const auto* bm = view.not_null_bitmap();
- DCHECK(bm);
for (size_t i = 0; i < elem_num; ++i, ++data) {
- if (BitmapTest(bm, i)) {
+ if (!bm || BitmapTest(bm, i)) {
total_elem_str_size += data->size();
}
}
diff --git a/src/kudu/cfile/cfile_writer.cc b/src/kudu/cfile/cfile_writer.cc
index 444f4fdf5..4f5004f50 100644
--- a/src/kudu/cfile/cfile_writer.cc
+++ b/src/kudu/cfile/cfile_writer.cc
@@ -540,7 +540,7 @@ Status CFileWriter::AppendNullableArrayEntries(const
uint8_t* bitmap,
}
const uint8_t* cell_non_null_bitmap = view.not_null_bitmap();
- DCHECK(cell_non_null_bitmap);
+ DCHECK(cell_non_null_bitmap || !view.has_nulls());
BitmapIterator elem_bitmap_iter(cell_non_null_bitmap,
cell_elem_num);
const uint8_t* data = view.data_as(elem_type_info->type());
diff --git a/src/kudu/client/array_cell-internal.cc
b/src/kudu/client/array_cell-internal.cc
index a101f3bdb..40d5c75f1 100644
--- a/src/kudu/client/array_cell-internal.cc
+++ b/src/kudu/client/array_cell-internal.cc
@@ -69,6 +69,10 @@ bool KuduArrayCellView::empty() const {
return data_->view_.empty();
}
+bool KuduArrayCellView::has_nulls() const {
+ return data_->view_.has_nulls();
+}
+
const uint8_t* KuduArrayCellView::not_null_bitmap() const {
return data_->view_.not_null_bitmap();
}
diff --git a/src/kudu/client/array_cell.h b/src/kudu/client/array_cell.h
index 353fcecf7..4710ad5b3 100644
--- a/src/kudu/client/array_cell.h
+++ b/src/kudu/client/array_cell.h
@@ -52,7 +52,13 @@ class KUDU_EXPORT KuduArrayCellView {
// Whether the array cell is empty, i.e. does not contain any elements.
bool empty() const;
- // Get non-null (a.k.a. validity) bitmap for the array elements.
+ // Whether at least one element in the array is null/invalid. If !has_nulls()
+ // holds true, it helps avoiding calls to 'not_null_bitmap()' and evaluating
+ // non-nullness/validity per element in particular usage scenarios.
+ bool has_nulls() const;
+
+ // Get non-null (a.k.a. validity) bitmap for the array elements. This returns
+ // null if has_nulls() == false.
const uint8_t* not_null_bitmap() const;
// Accessor for the cell's raw data in the format similar to what
diff --git a/src/kudu/client/scan_batch.cc b/src/kudu/client/scan_batch.cc
index 6cbdac6cd..9b4f2313e 100644
--- a/src/kudu/client/scan_batch.cc
+++ b/src/kudu/client/scan_batch.cc
@@ -327,8 +327,12 @@ Status KuduScanBatch::RowPtr::GetArray(int col_idx,
if (validity_out) {
validity_out->resize(elem_num);
if (!view.empty()) {
- DCHECK(view.not_null_bitmap());
- *validity_out = BitmapToVector(view.not_null_bitmap(), elem_num);
+ if (view.not_null_bitmap()) {
+ *validity_out = BitmapToVector(view.not_null_bitmap(), elem_num);
+ } else {
+ // All elements are valid: the validity array is empty.
+ validity_out->clear();
+ }
}
}
return Status::OK();
diff --git a/src/kudu/common/array_cell_view.h
b/src/kudu/common/array_cell_view.h
index fdbd60b4f..663cc8390 100644
--- a/src/kudu/common/array_cell_view.h
+++ b/src/kudu/common/array_cell_view.h
@@ -90,7 +90,8 @@ class ArrayCellMetadataView final {
: data_(buf),
size_(size),
content_(nullptr),
- is_initialized_(false) {
+ is_initialized_(false),
+ has_nulls_(false) {
}
Status Init() {
@@ -98,6 +99,7 @@ class ArrayCellMetadataView final {
if (size_ == 0) {
content_ = nullptr;
is_initialized_ = true;
+ has_nulls_ = false;
return Status::OK();
}
@@ -139,15 +141,33 @@ class ArrayCellMetadataView final {
return Status::IllegalState("null flatbuffers of non-zero size");
}
- DCHECK(content_);
- if (const size_t bit_num = content_->validity()->size(); bit_num != 0) {
+ const size_t values_size = elem_num_impl();
+ const size_t validity_size = content_->validity() ?
content_->validity()->size() : 0;
+ if (validity_size != 0 && values_size != validity_size) {
+ return Status::Corruption("number of data and validity elements differ");
+ }
+
+ has_nulls_ = false;
+ if (validity_size != 0) {
+ // If the validity vector is supplied and with all its elements non-zero.
+ const auto& validity = *(DCHECK_NOTNULL(content_->validity()));
+ has_nulls_ = std::any_of(validity.cbegin(), validity.cend(),
+ [&](uint8_t e) { return e == 0; });
+ }
+
+ if (has_nulls_) {
+ const size_t bit_num = values_size;
+ DCHECK_GT(bit_num, 0);
bitmap_.reset(new uint8_t[BitmapSize(bit_num)]);
+ // Assuming the most of the elements are non-null/valid, it's usually
less
+ // calls to flip particular bits from 1 to 0 than flipping them from 0
to 1.
+ // TODO(aserbin): consider counting alternating strategy based on the
number
+ // of non-zero values that comes from computing
'has_nulls_'
auto* bm = bitmap_.get();
- const auto* v = content_->validity();
+ memset(bm, 0xff, BitmapSize(bit_num));
+ const auto& v = *(DCHECK_NOTNULL(content_->validity()));
for (size_t idx = 0; idx < bit_num; ++idx) {
- if (v->Get(idx) != 0) {
- BitmapSet(bm, idx);
- } else {
+ if (v.Get(idx) == 0) {
BitmapClear(bm, idx);
}
}
@@ -189,20 +209,26 @@ class ArrayCellMetadataView final {
// Number of elements in the array.
size_t elem_num() const {
DCHECK(is_initialized_);
- return content_ ? content_->validity()->size() : 0;
+ return elem_num_impl();
}
bool empty() const {
- DCHECK(is_initialized_);
- return content_ ? content_->validity()->empty() : true;
+ return elem_num() == 0;
}
// Non-null (a.k.a. validity) bitmap for the array elements.
const uint8_t* not_null_bitmap() const {
DCHECK(is_initialized_);
+ DCHECK((has_nulls_ && bitmap_) || (!has_nulls_ && !bitmap_));
return bitmap_.get();
}
+ bool has_nulls() const {
+ DCHECK(is_initialized_);
+ DCHECK((has_nulls_ && bitmap_) || (!has_nulls_ && !bitmap_));
+ return has_nulls_;
+ }
+
const uint8_t* data_as(DataType data_type) const {
DCHECK(is_initialized_);
if (empty()) {
@@ -258,6 +284,47 @@ class ArrayCellMetadataView final {
return content_ ? content_->data_as<T>()->values()->Data() : nullptr;
}
+ size_t elem_num_impl() const {
+ if (!content_) {
+ return 0;
+ }
+ if (!content_->data()) {
+ return 0;
+ }
+ const auto data_type = content_->data_type();
+ switch (data_type) {
+ case serdes::ScalarArray::Int8Array:
+ return content_->data_as<serdes::Int8Array>()->values()->size();
+ case serdes::ScalarArray::UInt8Array:
+ return content_->data_as<serdes::UInt8Array>()->values()->size();
+ case serdes::ScalarArray::Int16Array:
+ return content_->data_as<serdes::Int16Array>()->values()->size();
+ case serdes::ScalarArray::UInt16Array:
+ return content_->data_as<serdes::UInt16Array>()->values()->size();
+ case serdes::ScalarArray::Int32Array:
+ return content_->data_as<serdes::Int32Array>()->values()->size();
+ case serdes::ScalarArray::UInt32Array:
+ return content_->data_as<serdes::UInt32Array>()->values()->size();
+ case serdes::ScalarArray::Int64Array:
+ return content_->data_as<serdes::Int64Array>()->values()->size();
+ case serdes::ScalarArray::UInt64Array:
+ return content_->data_as<serdes::UInt64Array>()->values()->size();
+ case serdes::ScalarArray::FloatArray:
+ return content_->data_as<serdes::FloatArray>()->values()->size();
+ case serdes::ScalarArray::DoubleArray:
+ return content_->data_as<serdes::DoubleArray>()->values()->size();
+ case serdes::ScalarArray::StringArray:
+ return content_->data_as<serdes::StringArray>()->values()->size();
+ case serdes::ScalarArray::BinaryArray:
+ return content_->data_as<serdes::BinaryArray>()->values()->size();
+ default:
+ LOG(DFATAL) << "unknown ScalarArray type: " <<
static_cast<size_t>(data_type);
+ break;
+ }
+
+ return 0;
+ }
+
// Flatbuffer-encoded data; a non-owning raw pointer.
const uint8_t* const data_;
@@ -279,6 +346,9 @@ class ArrayCellMetadataView final {
// Whether the Init() method has been successfully run for this object.
bool is_initialized_;
+
+ // Whether the underlying array has at least one null/invalid element.
+ bool has_nulls_;
};
} // namespace kudu
diff --git a/src/kudu/common/array_type_serdes-test.cc
b/src/kudu/common/array_type_serdes-test.cc
index 34de4c064..7e8284489 100644
--- a/src/kudu/common/array_type_serdes-test.cc
+++ b/src/kudu/common/array_type_serdes-test.cc
@@ -19,6 +19,7 @@
#include <cstdint>
#include <cstring>
+#include <initializer_list>
#include <memory>
#include <string>
#include <vector>
@@ -78,7 +79,8 @@ TEST(ArrayTypeSerdesTest, Basic) {
ASSERT_OK(view.Init());
ASSERT_EQ(val.size(), view.elem_num());
const auto* view_validity_bitmap = view.not_null_bitmap();
- ASSERT_TRUE(BitmapEquals(validity_bitmap, view_validity_bitmap,
view.elem_num()));
+ ASSERT_NE(nullptr, view_validity_bitmap);
+ ASSERT_TRUE(view.has_nulls());
// Verify the data matches the source.
{
@@ -99,4 +101,63 @@ TEST(ArrayTypeSerdesTest, Basic) {
}
}
+// When all the elements in the array are non-null/valid, the validity bitmap
+// accessed via ArrayCellMetadataView is null in both two cases:
+// * the supplied validity vector have all the elements set to 'true'
+// * the supplied validity vector is empty
+TEST(ArrayTypeSerdesTest, AllNonNullElements) {
+ constexpr const uint8_t kThreeOnes[] = { 0b00000111 };
+ constexpr const uint8_t* kNull = nullptr;
+ const vector<bool> kEmpty{};
+
+ for (const uint8_t* validity_bitmap : { kNull, kThreeOnes }) {
+ const vector<int16_t> val{ 0, 1, 2, };
+ const vector<bool>& validity_vector =
+ validity_bitmap ? BitmapToVector(validity_bitmap, val.size()) : kEmpty;
+ if (validity_bitmap) {
+ ASSERT_EQ(val.size(), validity_vector.size());
+ }
+
+ unique_ptr<uint8_t[]> buf_data;
+ size_t buf_data_size = 0;
+ ASSERT_OK(serdes::Serialize(GetTypeInfo(INT16),
+ reinterpret_cast<const uint8_t*>(val.data()),
+ val.size(),
+ validity_vector,
+ &buf_data,
+ &buf_data_size));
+ ASSERT_TRUE(buf_data);
+ const Slice cell(buf_data.get(), buf_data_size);
+
+ Arena arena(128);
+ Slice arena_cell;
+ ASSERT_OK(serdes::SerializeIntoArena(
+ GetTypeInfo(INT16),
+ reinterpret_cast<const uint8_t*>(val.data()),
+ validity_bitmap,
+ val.size(),
+ &arena,
+ &arena_cell));
+
+ // Make sure Serialize() an SerializeInfoArena() produce the same data.
+ ASSERT_EQ(cell, arena_cell);
+
+ // Peek into the serialized buffer using ArrayCellMetadataView and compare
+ // the source data with the view into the serialized buffer.
+ ArrayCellMetadataView view(cell.data(), cell.size());
+ ASSERT_OK(view.Init());
+ ASSERT_EQ(val.size(), view.elem_num());
+ const auto* view_validity_bitmap = view.not_null_bitmap();
+ ASSERT_EQ(nullptr, view_validity_bitmap);
+ ASSERT_FALSE(view.has_nulls());
+
+ // Verify the data matches the source.
+ {
+ const uint8_t* data_view = view.data_as(INT16);
+ ASSERT_NE(nullptr, data_view);
+ ASSERT_EQ(0, memcmp(data_view, val.data(), sizeof(int16_t) *
val.size()));
+ }
+ }
+}
+
} // namespace kudu
diff --git a/src/kudu/common/array_type_serdes.h
b/src/kudu/common/array_type_serdes.h
index 7e8890925..9acd64b4e 100644
--- a/src/kudu/common/array_type_serdes.h
+++ b/src/kudu/common/array_type_serdes.h
@@ -55,7 +55,7 @@ void BuildFlatbuffers(
static const Slice kEmptySlice(static_cast<uint8_t*>(nullptr), 0);
const Slice* ptr = reinterpret_cast<const Slice*>(column_data);
for (size_t idx = 0; idx < nrows; ++idx) {
- val[idx] = validity[idx] ? *(ptr + idx) : kEmptySlice;
+ val[idx] = (validity.empty() || validity[idx]) ? *(ptr + idx) :
kEmptySlice;
}
} else {
static_assert(!std::is_same<Slice, ElementType>::value,
@@ -64,7 +64,8 @@ void BuildFlatbuffers(
}
}
- auto validity_vector = builder.CreateVector(validity);
+ const auto validity_vector =
+ !validity.empty() ? builder.CreateVector(validity) : 0;
if constexpr (KUDU_DATA_TYPE == STRING) {
auto values = FB_TYPE::Traits::Create(
builder, builder.CreateVectorOfStrings<ElementType>(val));
@@ -107,7 +108,7 @@ Status Serialize(
DCHECK(out_buf);
DCHECK(out_buf_size);
- DCHECK_EQ(validity.size(), nrows);
+ DCHECK(validity.empty() || validity.size() == nrows);
if (PREDICT_FALSE(column_data == nullptr && nrows > 0)) {
return Status::InvalidArgument("inconsistent data and validity info for
array");
@@ -141,6 +142,7 @@ Status SerializeIntoArena(
size_t nrows,
Arena* arena,
Slice* out) {
+ static const std::vector<bool> kAllValid{};
typedef typename DataTypeTraits<KUDU_DATA_TYPE>::cpp_type ElementType;
DCHECK(arena);
@@ -148,7 +150,8 @@ Status SerializeIntoArena(
flatbuffers::FlatBufferBuilder builder(
nrows * sizeof(ElementType) + nrows + FLATBUFFERS_MIN_BUFFER_SIZE);
- const std::vector<bool>& validity = BitmapToVector(validity_bitmap, nrows);
+ const std::vector<bool>& validity =
+ validity_bitmap ? BitmapToVector(validity_bitmap, nrows) : kAllValid;
BuildFlatbuffers<KUDU_DATA_TYPE, FB_TYPE>(column_data, nrows, validity,
&builder);
// Copy the serialized data into the arena.
diff --git a/src/kudu/common/partial_row-test.cc
b/src/kudu/common/partial_row-test.cc
index 8545a893b..5fb02e935 100644
--- a/src/kudu/common/partial_row-test.cc
+++ b/src/kudu/common/partial_row-test.cc
@@ -422,6 +422,53 @@ TEST_F(PartialRowTest, DecimalArray) {
}
}
+// An empty validity vector means all the elements in the array are valid.
+TEST_F(PartialRowTest, EmptyValidityVector) {
+ const Schema schema(
+ { ColumnSchema("key", INT32),
+ ColumnSchemaBuilder()
+ .name("bool_arr")
+ .type(BOOL)
+ .array(true)
+ .nullable(true),
+ ColumnSchemaBuilder()
+ .name("int32_arr")
+ .type(INT32)
+ .array(true)
+ .nullable(true),
+ ColumnSchemaBuilder()
+ .name("string_arr")
+ .type(STRING)
+ .array(true)
+ .nullable(true),
+ }, 1);
+
+ {
+ KuduPartialRow row(&schema);
+ ASSERT_OK(row.SetArrayBool(1, { false, true, }, {}));
+ ASSERT_TRUE(row.IsColumnSet(1));
+ ASSERT_FALSE(row.IsNull(1));
+ ASSERT_EQ("bool 1d-array bool_arr=[false, true]", row.ToString());
+ }
+ {
+ KuduPartialRow row(&schema);
+ ASSERT_OK(row.SetArrayInt32(2, {}, {}));
+ ASSERT_TRUE(row.IsColumnSet(2));
+ ASSERT_FALSE(row.IsNull(2));
+ ASSERT_EQ("int32 1d-array int32_arr=[]", row.ToString());
+ }
+ {
+ KuduPartialRow row(&schema);
+ const auto s = row.SetArrayInt32(2, { 0, 1 }, { true });
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(),
+ "data and validity arrays must be the same length "
+ "if the latter is non-empty");
+ ASSERT_FALSE(row.IsColumnSet(2));
+ ASSERT_FALSE(row.IsNull(2));
+ }
+}
+
TEST_F(PartialRowTest, TestCopy) {
KuduPartialRow row(&schema_);
diff --git a/src/kudu/common/partial_row.cc b/src/kudu/common/partial_row.cc
index 1d6ef0f8d..cafba09f7 100644
--- a/src/kudu/common/partial_row.cc
+++ b/src/kudu/common/partial_row.cc
@@ -209,9 +209,10 @@ template<typename T>
Status KuduPartialRow::SetArray(int col_idx,
const vector<typename T::element_cpp_type>&
val,
const vector<bool>& validity) {
- if (PREDICT_FALSE(val.size() != validity.size())) {
+ // An empty validity vector means all the elements in the array are valid.
+ if (PREDICT_FALSE(!validity.empty() && val.size() != validity.size())) {
return Status::InvalidArgument(
- "data and validity arrays must be the same length");
+ "data and validity arrays must be the same length if the latter is
non-empty");
}
const ColumnSchema& col = schema_->column(col_idx);
@@ -255,9 +256,10 @@ Status KuduPartialRow::SetArray<ArrayTypeTraits<BOOL>>(
int col_idx,
const vector<bool>& val_bool,
const vector<bool>& validity) {
- if (PREDICT_FALSE(val_bool.size() != validity.size())) {
+ // An empty validity vector means all the elements in the array are valid.
+ if (PREDICT_FALSE(!validity.empty() && val_bool.size() != validity.size())) {
return Status::InvalidArgument(
- "data and validity arrays must be the same length");
+ "data and validity arrays must be the same length if the latter is
non-empty");
}
vector<uint8_t> val;
val.reserve(val_bool.size());
diff --git a/src/kudu/common/row_operations.cc
b/src/kudu/common/row_operations.cc
index 3059cb9d6..e424aef7b 100644
--- a/src/kudu/common/row_operations.cc
+++ b/src/kudu/common/row_operations.cc
@@ -43,6 +43,7 @@
#include "kudu/util/safe_math.h"
#include "kudu/util/slice.h"
+using kudu::ArrayCellMetadataView;
using std::string;
using std::vector;
using strings::Substitute;
@@ -60,12 +61,11 @@ DEFINE_uint32(array_cell_max_elem_num, 1024,
TAG_FLAG(array_cell_max_elem_num, advanced);
TAG_FLAG(array_cell_max_elem_num, runtime);
-// Validate that log_min_segments_to_retain >= 1
static bool ValidateArrayCellMaxElemNum(const char* flagname, uint32_t value) {
- if (value > kudu::ArrayCellMetadataView::kArrayMaxElemNum) {
+ if (value > ArrayCellMetadataView::kArrayMaxElemNum) {
LOG(ERROR) << Substitute(
"'$0' set to invalid value $1; must not be greater than $2",
- flagname, value, kudu::ArrayCellMetadataView::kArrayMaxElemNum);
+ flagname, value, ArrayCellMetadataView::kArrayMaxElemNum);
return false;
}
return true;
diff --git a/src/kudu/common/types.h b/src/kudu/common/types.h
index 7790867f5..c9d3c771c 100644
--- a/src/kudu/common/types.h
+++ b/src/kudu/common/types.h
@@ -957,10 +957,11 @@ struct ArrayTypeTraits : public
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
if (idx != 0) {
str->append(", ");
}
- if (BitmapTest(validity, idx)) {
-
DataTypeTraits<ARRAY_ELEMENT_TYPE>::AppendDebugStringForValue(data_ptr, str);
- } else {
+ // Null validity bitmap means all elements are valid.
+ if (validity && !BitmapTest(validity, idx)) {
str->append("NULL");
+ } else {
+
DataTypeTraits<ARRAY_ELEMENT_TYPE>::AppendDebugStringForValue(data_ptr, str);
}
data_ptr += sizeof(typename TypeTraits<ARRAY_ELEMENT_TYPE>::cpp_type);
}
@@ -1030,7 +1031,7 @@ struct ArrayTypeTraits : public
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
}
const uint8_t* b_not_null_bitmap = b_view.not_null_bitmap();
- if (BitmapTest(b_not_null_bitmap, a_elem_num)) {
+ if (!b_not_null_bitmap || BitmapTest(b_not_null_bitmap, a_elem_num)) {
// The trailing extra element in 'b' must be NULL if 'b' goes
// consecutively after 'a'.
return false;
@@ -1051,7 +1052,7 @@ struct ArrayTypeTraits : public
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
private:
// Compare array represented by the specified array views facades up to
// the specified number of elements. The comparison goes element-by-element,
- // using Compare() method for the corresponding element scalar type.
+ // using Compare() method for the corresponding scalar type elements.
static int Compare(const ArrayCellMetadataView& lhs,
const ArrayCellMetadataView& rhs,
size_t num_elems_to_compare) {
@@ -1072,18 +1073,16 @@ struct ArrayTypeTraits : public
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
const uint8_t* lhs_data_ptr = lhs.data_as(ARRAY_ELEMENT_TYPE);
DCHECK(lhs_data_ptr || lhs_elems == 0);
const uint8_t* lhs_not_null_bitmap = lhs.not_null_bitmap();
- DCHECK(lhs_not_null_bitmap || lhs_elems == 0);
const uint8_t* rhs_data_ptr = rhs.data_as(ARRAY_ELEMENT_TYPE);
DCHECK(rhs_data_ptr || rhs_elems == 0);
const uint8_t* rhs_not_null_bitmap = rhs.not_null_bitmap();
- DCHECK(rhs_not_null_bitmap || rhs_elems == 0);
for (size_t idx = 0; idx < cap_num_elems; ++idx,
lhs_data_ptr += sizeof(typename
TypeTraits<ARRAY_ELEMENT_TYPE>::cpp_type),
rhs_data_ptr += sizeof(typename
TypeTraits<ARRAY_ELEMENT_TYPE>::cpp_type)) {
- if (!BitmapTest(lhs_not_null_bitmap, idx)) {
- if (!BitmapTest(rhs_not_null_bitmap, idx)) {
+ if (lhs_not_null_bitmap && !BitmapTest(lhs_not_null_bitmap, idx)) {
+ if (rhs_not_null_bitmap && !BitmapTest(rhs_not_null_bitmap, idx)) {
// Both elements are NULL: continue with next elements in the arrays.
continue;
}
@@ -1092,7 +1091,7 @@ struct ArrayTypeTraits : public
ArrayDataTypeTraits<ARRAY_ELEMENT_TYPE> {
// same numbers (or two NULLs) up to the 'idx' position.
return -1;
} else {
- if (!BitmapTest(rhs_not_null_bitmap, idx)) {
+ if (rhs_not_null_bitmap && !BitmapTest(rhs_not_null_bitmap, idx)) {
// lhs > rhs: a non-NULL element in lhs at idx, but rhs non-NULL
// at idx, while all the pairs of elements in the array contain
// same numbers (or two NULLs) up to the 'idx' position.
diff --git a/src/kudu/util/bitmap-test.cc b/src/kudu/util/bitmap-test.cc
index 948bf1bc0..e2a33e17b 100644
--- a/src/kudu/util/bitmap-test.cc
+++ b/src/kudu/util/bitmap-test.cc
@@ -21,6 +21,7 @@
#include <cstddef>
#include <cstdint>
#include <cstring>
+#include <initializer_list>
#include <set>
#include <string>
#include <vector>
@@ -428,6 +429,32 @@ TEST(TestBitMap, BitmapToVector) {
}
}
+TEST(TestBitMap, EmptyBitset) {
+ const uint8_t* kNull = nullptr;
+ const uint8_t kEmpty[] = {};
+ for (const uint8_t* buf : { kNull, kEmpty }) {
+ for (bool initial_value : { false, true }) {
+ BitmapIterator bit(buf, 0);
+ bool value = initial_value;
+ const auto next = bit.Next(&value);
+ ASSERT_EQ(0, next);
+ // The output parameter is untouched.
+ ASSERT_EQ(initial_value, value);
+ }
+ }
+}
+
+TEST(TestBitMap, BitmapIteratorOnNullBitmap) {
+ for (const size_t num_bits : { 1, 5, 8, 10, 64 }) {
+ SCOPED_TRACE(std::to_string(num_bits));
+ BitmapIterator bit(nullptr, num_bits);
+ bool value = false;
+ const auto next = bit.Next(&value);
+ ASSERT_EQ(num_bits, next);
+ ASSERT_TRUE(value);
+ }
+}
+
#ifndef NDEBUG
TEST(TestBitMapDeathTest, TestCopyOverlap) {
uint8_t bm[2] = { 0 };
diff --git a/src/kudu/util/bitmap.cc b/src/kudu/util/bitmap.cc
index 9de95ca72..17c6e268c 100644
--- a/src/kudu/util/bitmap.cc
+++ b/src/kudu/util/bitmap.cc
@@ -155,6 +155,7 @@ void BitmapCopy(uint8_t* dst, size_t dst_offset,
}
vector<bool> BitmapToVector(const uint8_t* bitmap, size_t num_bits) {
+ DCHECK(bitmap);
BitmapIterator it(bitmap, num_bits);
vector<bool> result(num_bits);
bool is_set = false;
diff --git a/src/kudu/util/bitmap.h b/src/kudu/util/bitmap.h
index 96b078b24..c3feb92f8 100644
--- a/src/kudu/util/bitmap.h
+++ b/src/kudu/util/bitmap.h
@@ -141,8 +141,8 @@ class BitmapIterator {
public:
BitmapIterator(const uint8_t* map, size_t num_bits)
: map_(map),
- offset_(0),
- num_bits_(num_bits) {
+ num_bits_(num_bits),
+ offset_(0) {
}
size_t Next(bool* value) {
@@ -152,6 +152,13 @@ class BitmapIterator {
return 0;
}
+ if (!map_) {
+ // Null bitmap means all the elements are valid.
+ *value = true;
+ offset_ = num_bits_;
+ return num_bits_;
+ }
+
*value = BitmapTest(map_, offset_);
size_t index;
@@ -167,8 +174,8 @@ class BitmapIterator {
private:
const uint8_t* const map_;
+ const size_t num_bits_;
size_t offset_;
- size_t num_bits_;
};
// Iterate over the bits in 'bitmap' and call 'func' for each set bit.