This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 60f5e5267 KUDU-1261 make ArrayCellMetadataView::Init() more robust
60f5e5267 is described below
commit 60f5e5267b92c39485a66121d3ce3cc7ef57b0e0
Author: Alexey Serbin <[email protected]>
AuthorDate: Sat Oct 25 00:41:05 2025 -0700
KUDU-1261 make ArrayCellMetadataView::Init() more robust
This makes ArrayCellMetadataView::Init() implementation more robust.
In particular, it no longer crashes on flatbuffers with empty Content
and type set to String or Binary. Neither C++ nor Java client side
generates such flatbuffers content as of now, but the code should be
robust enough to handle any input.
New test scenarios to cover the regression are present as well.
In addition, I moved all the code in array_type_serdes-test.cc
into kudu::serdes namespace.
Change-Id: I1f93e91b0a178fd3701f7ef3c1da855e56bb9f57
Reviewed-on: http://gerrit.cloudera.org:8080/23601
Tested-by: Marton Greber <[email protected]>
Reviewed-by: Marton Greber <[email protected]>
Reviewed-by: Abhishek Chennaka <[email protected]>
---
src/kudu/common/array_cell_view.h | 37 ++++--
src/kudu/common/array_type_serdes-test.cc | 195 +++++++++++++++++++++++++-----
2 files changed, 190 insertions(+), 42 deletions(-)
diff --git a/src/kudu/common/array_cell_view.h
b/src/kudu/common/array_cell_view.h
index 5699c4ee5..724206de8 100644
--- a/src/kudu/common/array_cell_view.h
+++ b/src/kudu/common/array_cell_view.h
@@ -35,6 +35,10 @@
namespace kudu {
+namespace serdes {
+class ArrayTypeSerdesTest_Basic_Test;
+}
+
constexpr serdes::ScalarArray KuduToScalarArrayType(DataType data_type) {
switch (data_type) {
case INT8:
@@ -100,8 +104,8 @@ class ArrayCellMetadataView final {
if (size_ == 0) {
content_ = nullptr;
elem_num_ = 0;
- is_initialized_ = true;
has_nulls_ = false;
+ is_initialized_ = true;
return Status::OK();
}
@@ -143,6 +147,14 @@ class ArrayCellMetadataView final {
return Status::IllegalState("null flatbuffers of non-zero size");
}
+ // Short-curciut for the case when no fields are present.
+ if (PREDICT_FALSE(!content_->data() && !content_->validity())) {
+ elem_num_ = 0;
+ has_nulls_ = false;
+ is_initialized_ = true;
+ return Status::OK();
+ }
+
elem_num_ = GetElemNum();
const size_t validity_size = content_->validity() ?
content_->validity()->size() : 0;
if (validity_size != 0 && BitmapSize(elem_num_) != validity_size) {
@@ -173,21 +185,22 @@ class ArrayCellMetadataView final {
// Build the metadata on the spans of binary/string elements
// in the buffer.
+ DCHECK(content_->data());
if (data_type == serdes::ScalarArray::StringArray) {
- const auto* values = content_->data_as<serdes::StringArray>()->values();
+ const auto* values = DCHECK_NOTNULL(
+ content_->data_as<serdes::StringArray>())->values();
binary_data_spans_.reserve(values->size());
- for (auto cit = values->cbegin(); cit != values->end(); ++cit) {
- const auto* str = *cit;
- DCHECK(str);
+ for (auto cit = values->cbegin(); cit != values->cend(); ++cit) {
+ const auto* str = DCHECK_NOTNULL(*cit);
binary_data_spans_.emplace_back(str->c_str(), str->size());
}
} else {
DCHECK(serdes::ScalarArray::BinaryArray == data_type);
- const auto* values = content_->data_as<serdes::BinaryArray>()->values();
+ const auto* values = DCHECK_NOTNULL(
+ content_->data_as<serdes::BinaryArray>())->values();
binary_data_spans_.reserve(values->size());
- for (auto cit = values->cbegin(); cit != values->end(); ++cit) {
- const auto* byte_seq = cit->values();
- DCHECK(byte_seq);
+ for (auto cit = values->cbegin(); cit != values->cend(); ++cit) {
+ const auto* byte_seq = DCHECK_NOTNULL(cit->values());
binary_data_spans_.emplace_back(byte_seq->Data(), byte_seq->size());
}
}
@@ -267,7 +280,7 @@ class ArrayCellMetadataView final {
}
private:
- FRIEND_TEST(ArrayTypeSerdesTest, Basic);
+ FRIEND_TEST(serdes::ArrayTypeSerdesTest, Basic);
template<typename T>
const uint8_t* data() const {
@@ -276,10 +289,10 @@ class ArrayCellMetadataView final {
}
size_t GetElemNum() const {
- if (!content_) {
+ if (PREDICT_FALSE(!content_)) {
return 0;
}
- if (!content_->data()) {
+ if (PREDICT_FALSE(!content_->data())) {
return 0;
}
const auto data_type = content_->data_type();
diff --git a/src/kudu/common/array_type_serdes-test.cc
b/src/kudu/common/array_type_serdes-test.cc
index 1345799b5..5e9c74c20 100644
--- a/src/kudu/common/array_type_serdes-test.cc
+++ b/src/kudu/common/array_type_serdes-test.cc
@@ -24,24 +24,25 @@
#include <string>
#include <vector>
+#include <flatbuffers/base.h>
+#include <flatbuffers/flatbuffer_builder.h>
#include <gtest/gtest.h>
#include "kudu/common/array_cell_view.h"
#include "kudu/common/common.pb.h"
+#include "kudu/common/serdes/array1d.fb.h"
#include "kudu/common/types.h"
#include "kudu/util/bitmap.h"
#include "kudu/util/memory/arena.h"
#include "kudu/util/slice.h"
+#include "kudu/util/status.h"
#include "kudu/util/test_macros.h"
using std::unique_ptr;
using std::vector;
namespace kudu {
-
namespace serdes {
-struct Int32Array;
-} // namespace serdes
TEST(ArrayTypeSerdesTest, Basic) {
const vector<int32_t> val{ 0, 1, 12, 5, 26, 42, };
@@ -51,24 +52,23 @@ TEST(ArrayTypeSerdesTest, Basic) {
unique_ptr<uint8_t[]> buf_data;
size_t buf_data_size = 0;
- ASSERT_OK(serdes::Serialize(GetTypeInfo(INT32),
- reinterpret_cast<const uint8_t*>(val.data()),
- val.size(),
- validity_vector,
- &buf_data,
- &buf_data_size));
+ ASSERT_OK(Serialize(GetTypeInfo(INT32),
+ reinterpret_cast<const uint8_t*>(val.data()),
+ val.size(),
+ validity_vector,
+ &buf_data,
+ &buf_data_size));
ASSERT_TRUE(buf_data);
const Slice cell(buf_data.get(), buf_data_size);
Arena arena(128);
Slice arena_cell;
- ASSERT_OK(serdes::SerializeIntoArena(
- GetTypeInfo(INT32),
- reinterpret_cast<const uint8_t*>(val.data()),
- validity_bitmap,
- val.size(),
- &arena,
- &arena_cell));
+ ASSERT_OK(SerializeIntoArena(GetTypeInfo(INT32),
+ reinterpret_cast<const uint8_t*>(val.data()),
+ validity_bitmap,
+ val.size(),
+ &arena,
+ &arena_cell));
// Make sure Serialize() an SerializeInfoArena() produce the same data.
ASSERT_EQ(cell, arena_cell);
@@ -90,7 +90,7 @@ TEST(ArrayTypeSerdesTest, Basic) {
ASSERT_EQ(0, memcmp(data_view, val.data(), sizeof(int32_t) * val.size()));
}
{
- const uint8_t* data_view = view.data<serdes::Int32Array>();
+ const uint8_t* data_view = view.data<Int32Array>();
ASSERT_NE(nullptr, data_view);
ASSERT_EQ(0, memcmp(data_view, val.data(), sizeof(int32_t) * val.size()));
}
@@ -122,24 +122,23 @@ TEST(ArrayTypeSerdesTest, AllNonNullElements) {
unique_ptr<uint8_t[]> buf_data;
size_t buf_data_size = 0;
- ASSERT_OK(serdes::Serialize(GetTypeInfo(INT16),
- reinterpret_cast<const uint8_t*>(val.data()),
- val.size(),
- validity_vector,
- &buf_data,
- &buf_data_size));
+ ASSERT_OK(Serialize(GetTypeInfo(INT16),
+ reinterpret_cast<const uint8_t*>(val.data()),
+ val.size(),
+ validity_vector,
+ &buf_data,
+ &buf_data_size));
ASSERT_TRUE(buf_data);
const Slice cell(buf_data.get(), buf_data_size);
Arena arena(128);
Slice arena_cell;
- ASSERT_OK(serdes::SerializeIntoArena(
- GetTypeInfo(INT16),
- reinterpret_cast<const uint8_t*>(val.data()),
- validity_bitmap,
- val.size(),
- &arena,
- &arena_cell));
+ ASSERT_OK(SerializeIntoArena(GetTypeInfo(INT16),
+ reinterpret_cast<const uint8_t*>(val.data()),
+ validity_bitmap,
+ val.size(),
+ &arena,
+ &arena_cell));
// Make sure Serialize() an SerializeInfoArena() produce the same data.
ASSERT_EQ(cell, arena_cell);
@@ -162,4 +161,140 @@ TEST(ArrayTypeSerdesTest, AllNonNullElements) {
}
}
+TEST(ArrayTypeSerdesTest, EmptyData) {
+ for (const auto array_type : EnumValuesScalarArray()) {
+ SCOPED_TRACE(EnumNameScalarArray(array_type));
+ const vector<int32_t> val{};
+ unique_ptr<uint8_t[]> buf_data;
+ size_t buf_data_size = 0;
+ ASSERT_OK(Serialize(GetTypeInfo(INT8),
+ reinterpret_cast<const uint8_t*>(val.data()),
+ val.size(),
+ {},
+ &buf_data,
+ &buf_data_size));
+ ASSERT_TRUE(buf_data);
+ const Slice cell(buf_data.get(), buf_data_size);
+
+ ArrayCellMetadataView view(cell.data(), cell.size());
+ ASSERT_OK(view.Init());
+ ASSERT_EQ(val.size(), view.elem_num());
+ ASSERT_TRUE(view.empty());
+ ASSERT_FALSE(view.has_nulls());
+ const auto* view_validity_bitmap = view.not_null_bitmap();
+ ASSERT_EQ(nullptr, view_validity_bitmap);
+ }
+}
+
+static const DataType kScalarColumnTypes[] = {
+ UINT8,
+ INT8,
+ UINT16,
+ INT16,
+ UINT32,
+ INT32,
+ UINT64,
+ INT64,
+ STRING,
+ BOOL,
+ FLOAT,
+ DOUBLE,
+ BINARY,
+ UNIXTIME_MICROS,
+ INT128,
+ DECIMAL32,
+ DECIMAL64,
+ DECIMAL128,
+ VARCHAR,
+ DATE,
+};
+
+// Verify how ArrayCellMetadataView works with empty content, where when
neither
+// 'data' nor 'validity' field is present, and other combinations when one
field
+// is present, but empty, and another is absent or empty. It shouldn't crash
+// and should successfully parse the flatbuffers data as expected.
+TEST(ArrayTypeSerdesTest, EmptyContent) {
+ constexpr int NO_DATA_NO_VALIDITY = 0;
+ constexpr int NO_DATA_EMPTY_VALIDITY = 1;
+ constexpr int EMPTY_DATA_NO_VALIDITY = 2;
+ constexpr int EMPTY_DATA_EMPTY_VALIDITY = 3;
+
+ constexpr int kFieldOptions[] = {
+ NO_DATA_NO_VALIDITY,
+ NO_DATA_EMPTY_VALIDITY,
+ EMPTY_DATA_NO_VALIDITY,
+ EMPTY_DATA_EMPTY_VALIDITY,
+ };
+
+ // Try various types that correspond to different code paths in the
+ // implementation of ArrayCellMetadataView::Init().
+ for (auto opt : kFieldOptions) {
+ for (const auto array_type : EnumValuesScalarArray()) {
+ SCOPED_TRACE(EnumNameScalarArray(array_type));
+ flatbuffers::FlatBufferBuilder bld;
+ switch (opt) {
+ case NO_DATA_NO_VALIDITY:
+ bld.Finish(CreateContent(bld, array_type, 0, 0));
+ break;
+ case NO_DATA_EMPTY_VALIDITY:
+ bld.Finish(CreateContent(bld, array_type, 0, {}));
+ break;
+ case EMPTY_DATA_NO_VALIDITY:
+ bld.Finish(CreateContent(bld, array_type, {}, 0));
+ break;
+ case EMPTY_DATA_EMPTY_VALIDITY:
+ bld.Finish(CreateContent(bld, array_type, {}, {}));
+ break;
+ default:
+ FAIL();
+ break;
+ }
+
+ size_t buf_size = 0;
+ size_t buf_offset = 0;
+ unique_ptr<uint8_t[]> buf(bld.ReleaseRaw(buf_size, buf_offset));
+ ASSERT_NE(nullptr, buf.get());
+ ASSERT_LT(buf_offset, buf_size);
+ const Slice cell(buf.get() + buf_offset, buf_size);
+ ASSERT_GE(buf_size - buf_offset, FLATBUFFERS_MIN_BUFFER_SIZE);
+
+ ArrayCellMetadataView view(cell.data(), cell.size());
+ ASSERT_OK(view.Init());
+ ASSERT_TRUE(view.empty());
+ ASSERT_EQ(0, view.elem_num());
+ ASSERT_FALSE(view.has_nulls());
+ ASSERT_EQ(nullptr, view.not_null_bitmap());
+ for (auto data_type : kScalarColumnTypes) {
+ SCOPED_TRACE(DataType_Name(data_type));
+ ASSERT_EQ(nullptr, view.data_as(data_type));
+ }
+ }
+ }
+}
+
+TEST(ArrayTypeSerdesTest, NoDataButNonEmptyValidity) {
+ const vector<uint8_t> validity{ 0x01 };
+ // Try various types that correspond to different code paths in the
+ // implementation of ArrayCellMetadataView::Init().
+ for (const auto array_type : EnumValuesScalarArray()) {
+ SCOPED_TRACE(EnumNameScalarArray(array_type));
+ flatbuffers::FlatBufferBuilder bld;
+ const auto validity_fb = bld.CreateVector(validity);
+ bld.Finish(CreateContent(bld, array_type, 0, validity_fb));
+
+ size_t buf_size = 0;
+ size_t buf_offset = 0;
+ unique_ptr<uint8_t[]> buf(bld.ReleaseRaw(buf_size, buf_offset));
+ ASSERT_NE(nullptr, buf.get());
+ ASSERT_LT(buf_offset, buf_size);
+ const Slice cell(buf.get() + buf_offset, buf_size);
+
+ ArrayCellMetadataView view(cell.data(), cell.size());
+ const auto s = view.Init();
+ ASSERT_TRUE(s.IsCorruption()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "'validity' and 'data' fields not in
sync");
+ }
+}
+
+} // namespace serdes
} // namespace kudu