This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new aabe09da6 KUDU-1261 introduce 1D arrays into ColumnSchema
aabe09da6 is described below
commit aabe09da6ffbeaafa040bb00657ec285426c8eaa
Author: Alexey Serbin <[email protected]>
AuthorDate: Mon Jun 9 20:07:30 2025 -0700
KUDU-1261 introduce 1D arrays into ColumnSchema
This changelist introduces support for one-dimensional arrays into
ColumnSchema, ColumnSchemaBuilder, and ColumnSchemaBuilderFromPB()
utility function. The newly introduced functionality comes with
basic test coverage.
Change-Id: I9aeff9a78039651bb727ca65af4e071a2c887270
Reviewed-on: http://gerrit.cloudera.org:8080/23002
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Abhishek Chennaka <[email protected]>
---
src/kudu/client/client-unittest.cc | 51 ++++++-
src/kudu/client/schema-internal.h | 23 +++
src/kudu/client/schema.cc | 197 +++++++++++++++++++++++--
src/kudu/client/schema.h | 83 ++++++++++-
src/kudu/common/row.h | 1 -
src/kudu/common/row_operations.cc | 13 +-
src/kudu/common/row_operations.h | 6 +-
src/kudu/common/row_operations.proto | 2 +-
src/kudu/common/schema-test.cc | 171 ++++++++++++++++++++-
src/kudu/common/schema.h | 63 +++++++-
src/kudu/common/wire_protocol-test.cc | 125 ++++++++++++++++
src/kudu/common/wire_protocol.cc | 66 ++++++++-
src/kudu/integration-tests/alter_table-test.cc | 66 ++++++++-
src/kudu/master/catalog_manager.cc | 17 +++
src/kudu/master/master-test.cc | 97 ++++++++++++
15 files changed, 939 insertions(+), 42 deletions(-)
diff --git a/src/kudu/client/client-unittest.cc
b/src/kudu/client/client-unittest.cc
index 63352c3ce..97cecf311 100644
--- a/src/kudu/client/client-unittest.cc
+++ b/src/kudu/client/client-unittest.cc
@@ -599,11 +599,58 @@ TEST(KuduColumnSchemaTest, TestEquals) {
ASSERT_NE(a32, a16);
const int kDefaultOf7 = 7;
- KuduColumnSchema a32_dflt("a", KuduColumnSchema::INT32,
/*is_nullable=*/false,
- /*is_immutable=*/false,
/*is_auto_incrementing=*/false,
+ KuduColumnSchema a32_dflt("a",
+ KuduColumnSchema::INT32,
+ /*is_nullable=*/false,
+ /*is_immutable=*/false,
+ /*is_auto_incrementing=*/false,
+ /*is_array=*/false,
/*default_value=*/&kDefaultOf7);
ASSERT_NE(a32, a32_dflt);
}
+TEST(ClientUnitTest, NestedTypeColumnBasic) {
+ const KuduColumnSchema::KuduArrayTypeDescriptor
desc(KuduColumnSchema::STRING);
+
+ // Specifying just NestedType() for a nested type column is OK.
+ KuduSchema schema_a;
+ {
+ KuduSchemaBuilder b;
+ b.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+ b.AddColumn("nested")->
+ NestedType(KuduColumnSchema::KuduNestedTypeDescriptor(desc));
+ ASSERT_OK(b.Build(&schema_a));
+ }
+
+ // Calling Type(NESTED) with follow-up NestedType(...) is equivalent to
+ // calling just NestedType(...).
+ KuduSchema schema_b;
+ {
+ // Type(KuduColumnSchema::NESTED)->
+ KuduSchemaBuilder b;
+ b.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+ b.AddColumn("nested")->
+ Type(KuduColumnSchema::NESTED)->
+ NestedType(KuduColumnSchema::KuduNestedTypeDescriptor(desc));
+ ASSERT_OK(b.Build(&schema_b));
+ }
+ ASSERT_EQ(schema_a, schema_b);
+}
+
+
+// Verify that omitted nested type information leads to an error when trying
+// to build Kudu schema out of such column specification.
+TEST(ClientUnitTest, MissingNestedTypeColumnInfo) {
+ KuduSchemaBuilder b;
+ b.AddColumn("key")->Type(KuduColumnSchema::INT32)->NotNull()->PrimaryKey();
+ b.AddColumn("nested")->Type(KuduColumnSchema::NESTED);
+
+ KuduSchema schema;
+ const auto s = b.Build(&schema);
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(),
+ "missing information on a NESTED type for column:
nested");
+}
+
} // namespace client
} // namespace kudu
diff --git a/src/kudu/client/schema-internal.h
b/src/kudu/client/schema-internal.h
index 09aa6dca6..2018cecd2 100644
--- a/src/kudu/client/schema-internal.h
+++ b/src/kudu/client/schema-internal.h
@@ -75,6 +75,7 @@ class KuduColumnSpec::Data {
const std::string name;
std::optional<KuduColumnSchema::DataType> type;
+ std::optional<KuduColumnSchema::KuduNestedTypeDescriptor> nested_type;
std::optional<int8_t> precision;
std::optional<int8_t> scale;
std::optional<uint16_t> length;
@@ -92,5 +93,27 @@ class KuduColumnSpec::Data {
std::optional<std::string> comment;
};
+class KuduColumnSchema::KuduNestedTypeDescriptor::Data final {
+ public:
+ explicit Data(const KuduArrayTypeDescriptor& desc)
+ : kind_(ARRAY),
+ descriptor(desc) {
+ }
+
+ ~Data() = default;
+
+ const KuduNestedType kind_;
+
+ union Descriptor {
+ explicit Descriptor(const KuduArrayTypeDescriptor& desc)
+ : array(desc) {
+ }
+ ~Descriptor() = default;
+
+ const KuduArrayTypeDescriptor array;
+ } descriptor;
+};
+
+
} // namespace client
} // namespace kudu
diff --git a/src/kudu/client/schema.cc b/src/kudu/client/schema.cc
index 27e150c66..9decb0f81 100644
--- a/src/kudu/client/schema.cc
+++ b/src/kudu/client/schema.cc
@@ -140,6 +140,7 @@ kudu::DataType
ToInternalDataType(KuduColumnSchema::DataType type,
case KuduColumnSchema::STRING: return kudu::STRING;
case KuduColumnSchema::BINARY: return kudu::BINARY;
case KuduColumnSchema::BOOL: return kudu::BOOL;
+ case KuduColumnSchema::NESTED: return kudu::NESTED;
case KuduColumnSchema::DECIMAL:
if (attributes.precision() <= kMaxDecimal32Precision) {
return kudu::DECIMAL32;
@@ -173,6 +174,7 @@ KuduColumnSchema::DataType
FromInternalDataType(kudu::DataType type) {
case kudu::DECIMAL64:
case kudu::DECIMAL128:
return KuduColumnSchema::DECIMAL;
+ case kudu::NESTED: return KuduColumnSchema::NESTED;
default: LOG(FATAL) << "Unexpected internal data type: " << type;
}
}
@@ -296,6 +298,13 @@ KuduColumnSpec*
KuduColumnSpec::Type(KuduColumnSchema::DataType type) {
return this;
}
+KuduColumnSpec* KuduColumnSpec::NestedType(
+ const KuduColumnSchema::KuduNestedTypeDescriptor& type_info) {
+ data_->type = KuduColumnSchema::DataType::NESTED;
+ data_->nested_type.emplace(type_info);
+ return this;
+}
+
KuduColumnSpec* KuduColumnSpec::Default(KuduValue* value) {
if (value == nullptr) return this;
if (data_->default_val) {
@@ -467,6 +476,24 @@ Status KuduColumnSpec::ToColumnSchema(KuduColumnSchema*
col) const {
case KuduColumnSchema::VARCHAR:
RETURN_NOT_OK(validate_varchar());
break;
+ case KuduColumnSchema::NESTED:
+ if (!data_->nested_type.has_value()) {
+ return Status::InvalidArgument("missing information on a NESTED type
for column",
+ data_->name);
+ }
+ if (const auto& desc = data_->nested_type.value(); desc.is_array()) {
+ const auto* array_info = desc.array();
+ DCHECK(array_info);
+ if (array_info->type() == KuduColumnSchema::DECIMAL) {
+ RETURN_NOT_OK(validate_decimal());
+ break;
+ }
+ if (array_info->type() == KuduColumnSchema::VARCHAR) {
+ RETURN_NOT_OK(validate_varchar());
+ break;
+ }
+ }
+ [[fallthrough]]; // rest of NESTED are validated in the 'default' case
default:
if (data_->precision) {
return Status::InvalidArgument(
@@ -516,13 +543,30 @@ Status KuduColumnSpec::ToColumnSchema(KuduColumnSchema*
col) const {
// BlockSize: '0' signifies server-side default.
const int32_t block_size = data_->block_size ? data_->block_size.value() : 0;
+ KuduColumnSchema::DataType type = data_->type.value();
+ if (type == KuduColumnSchema::DataType::NESTED) {
+ DCHECK(data_->nested_type.has_value());
+ const auto& nested_descriptor = data_->nested_type.value();
+ if (!nested_descriptor.is_array()) {
+ return Status::NotSupported("only scalar arrays supported yet");
+ }
+ const auto* array_descriptor = nested_descriptor.array();
+ DCHECK(array_descriptor);
+ // Only 1D scalar array are supported yet.
+ DCHECK(!array_descriptor->nested_type());
+ // Use array element type for KuduColumnSchema constructor below
+ // in case of arrays.
+ type = array_descriptor->type();
+ }
+
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
*col = KuduColumnSchema(data_->name,
- column_type,
+ type,
nullable,
immutable,
data_->auto_incrementing,
+ data_->nested_type.has_value(),
default_val,
KuduColumnStorageAttributes(encoding, compression,
block_size),
type_attrs,
@@ -795,6 +839,8 @@ string KuduColumnSchema::DataTypeToString(DataType type) {
return "VARCHAR";
case SERIAL:
return "SERIAL";
+ case NESTED:
+ return "NESTED";
}
LOG(FATAL) << "Unhandled type " << type;
}
@@ -831,6 +877,8 @@ string KuduColumnSchema::DataTypeToString(DataType type) {
*type = DATE;
} else if (type_uc == "SERIAL") {
*type = SERIAL;
+ } else if (type_uc == "NESTED") {
+ *type = NESTED;
} else {
return Status::InvalidArgument(Substitute(
"data type $0 is not supported", type_str));
@@ -843,6 +891,7 @@ KuduColumnSchema::KuduColumnSchema(const string &name,
bool is_nullable,
bool is_immutable,
bool is_auto_incrementing,
+ bool is_array,
const void* default_value,
const KuduColumnStorageAttributes&
storage_attributes,
const KuduColumnTypeAttributes&
type_attributes,
@@ -855,29 +904,43 @@ KuduColumnSchema::KuduColumnSchema(const string &name,
type_attr_private.precision = type_attributes.precision();
type_attr_private.scale = type_attributes.scale();
type_attr_private.length = type_attributes.length();
+ const kudu::DataType internal_type = ToInternalDataType(type,
type_attributes);
col_ = ColumnSchemaBuilder()
.name(name)
- .type(ToInternalDataType(type, type_attributes))
+ .type(internal_type)
.nullable(is_nullable)
.immutable(is_immutable)
.auto_incrementing(is_auto_incrementing)
+ .array(is_array)
.read_default(default_value)
.write_default(default_value)
.storage_attributes(attr_private)
.type_attributes(type_attr_private)
.comment(comment)
.New().release();
+ // Construct an instance of KuduNestedTypeDescriptor for 'ndesc_'.
+ if (!is_array) {
+ ndesc_ = nullptr;
+ } else {
+ // Only 1D arrays are supported yet.
+ DCHECK_NE(KuduColumnSchema::NESTED, type);
+ ndesc_ = new KuduNestedTypeDescriptor(KuduArrayTypeDescriptor(type));
+ }
}
KuduColumnSchema::KuduColumnSchema(const KuduColumnSchema& other)
- : col_(nullptr) {
+ : col_(nullptr),
+ ndesc_(nullptr) {
CopyFrom(other);
}
-KuduColumnSchema::KuduColumnSchema() : col_(nullptr) {
+KuduColumnSchema::KuduColumnSchema()
+ : col_(nullptr),
+ ndesc_(nullptr) {
}
KuduColumnSchema::~KuduColumnSchema() {
+ delete ndesc_;
delete col_;
}
@@ -895,6 +958,12 @@ void KuduColumnSchema::CopyFrom(const KuduColumnSchema&
other) {
} else {
col_ = nullptr;
}
+ delete ndesc_;
+ if (other.ndesc_) {
+ ndesc_ = new KuduNestedTypeDescriptor(*other.ndesc_);
+ } else {
+ ndesc_ = nullptr;
+ }
}
bool KuduColumnSchema::Equals(const KuduColumnSchema& other) const {
@@ -902,8 +971,12 @@ bool KuduColumnSchema::Equals(const KuduColumnSchema&
other) const {
}
bool KuduColumnSchema::operator==(const KuduColumnSchema& rhs) const {
- return this == &rhs || col_ == rhs.col_ ||
- (col_ != nullptr && col_->Equals(*rhs.col_, ColumnSchema::COMPARE_ALL));
+ return this == &rhs || (col_ == rhs.col_ && ndesc_ == rhs.ndesc_) || (
+ (col_ != nullptr && col_->Equals(*rhs.col_, ColumnSchema::COMPARE_ALL)) &&
+ ((ndesc_ == nullptr && rhs.ndesc_ == nullptr) ||
+ (ndesc_ != nullptr && rhs.ndesc_ != nullptr &&
+ ndesc_->is_array() && rhs.ndesc_->is_array() &&
+ ndesc_->array()->type() == rhs.ndesc_->array()->type())));
}
bool KuduColumnSchema::operator!=(const KuduColumnSchema& rhs) const {
@@ -926,6 +999,10 @@ KuduColumnSchema::DataType KuduColumnSchema::type() const {
return FromInternalDataType(DCHECK_NOTNULL(col_)->type_info()->type());
}
+const KuduColumnSchema::KuduNestedTypeDescriptor*
KuduColumnSchema::nested_type() const {
+ return ndesc_;
+}
+
KuduColumnTypeAttributes KuduColumnSchema::type_attributes() const {
ColumnTypeAttributes type_attributes =
DCHECK_NOTNULL(col_)->type_attributes();
return KuduColumnTypeAttributes(type_attributes.precision,
type_attributes.scale,
@@ -954,6 +1031,92 @@ const string& KuduColumnSchema::comment() const {
return DCHECK_NOTNULL(col_)->comment();
}
+KuduColumnSchema::KuduArrayTypeDescriptor::KuduArrayTypeDescriptor(
+ KuduColumnSchema::DataType element_type)
+ : type_(element_type) {
+}
+
+KuduColumnSchema::DataType
+KuduColumnSchema::KuduArrayTypeDescriptor::type() const {
+ return type_;
+}
+
+const KuduColumnSchema::KuduNestedTypeDescriptor*
+KuduColumnSchema::KuduArrayTypeDescriptor::nested_type() const {
+ // This version supports only scalar one-dimensional arrays.
+ return nullptr;
+}
+
+KuduColumnSchema::KuduNestedTypeDescriptor::KuduNestedTypeDescriptor(
+ const KuduArrayTypeDescriptor& desc)
+ : data_(new Data(desc)) {
+}
+
+KuduColumnSchema::KuduNestedTypeDescriptor::~KuduNestedTypeDescriptor() {
+ if (data_) {
+ delete data_;
+ }
+}
+
+KuduColumnSchema::KuduNestedTypeDescriptor::KuduNestedTypeDescriptor(
+ const KuduColumnSchema::KuduNestedTypeDescriptor& other) {
+ DCHECK(other.is_array());
+ if (other.is_array()) {
+ data_ = new Data(*other.array());
+ }
+}
+
+KuduColumnSchema::KuduNestedTypeDescriptor&
+KuduColumnSchema::KuduNestedTypeDescriptor::operator=(
+ const KuduColumnSchema::KuduNestedTypeDescriptor& other) {
+ if (this == &other) {
+ return *this;
+ }
+ delete data_;
+ data_ = nullptr;
+ DCHECK(other.is_array());
+ if (other.is_array()) {
+ data_ = new Data(*other.array());
+ }
+ return *this;
+}
+
+KuduColumnSchema::KuduNestedTypeDescriptor::KuduNestedTypeDescriptor(
+ KuduColumnSchema::KuduNestedTypeDescriptor&& other) noexcept {
+ DCHECK(other.data_);
+ DCHECK(other.is_array());
+ data_ = other.data_;
+ other.data_ = nullptr;
+}
+
+KuduColumnSchema::KuduNestedTypeDescriptor&
+KuduColumnSchema::KuduNestedTypeDescriptor::operator=(
+ KuduColumnSchema::KuduNestedTypeDescriptor&& other) noexcept {
+ if (this == &other) {
+ return *this;
+ }
+ delete data_;
+ data_ = nullptr;
+
+ DCHECK(other.data_);
+ data_ = other.data_;
+ other.data_ = nullptr;
+ return *this;
+}
+
+bool KuduColumnSchema::KuduNestedTypeDescriptor::is_array() const {
+ return data_->kind_ == ARRAY;
+}
+
+const KuduColumnSchema::KuduArrayTypeDescriptor*
+KuduColumnSchema::KuduNestedTypeDescriptor::array() const {
+ if (is_array()) {
+ DCHECK(data_);
+ return &data_->descriptor.array;
+ }
+ return nullptr;
+}
+
////////////////////////////////////////////////////////////
// KuduSchema
////////////////////////////////////////////////////////////
@@ -1026,9 +1189,25 @@ KuduColumnSchema KuduSchema::Column(size_t idx) const {
#pragma GCC diagnostic pop
KuduColumnTypeAttributes type_attrs(col.type_attributes().precision,
col.type_attributes().scale,
col.type_attributes().length);
- return KuduColumnSchema(col.name(),
FromInternalDataType(col.type_info()->type()),
- col.is_nullable(), col.is_immutable(),
col.is_auto_incrementing(),
- col.read_default_value(), attrs, type_attrs,
col.comment());
+
+ KuduColumnSchema::DataType col_type =
FromInternalDataType(col.type_info()->type());
+ if (const auto* type_info = col.type_info(); type_info->is_array()) {
+ const auto* nested_type_info = type_info->nested_type_info();
+ DCHECK(nested_type_info);
+ DCHECK(nested_type_info->is_array());
+ const auto& array_info = nested_type_info->array();
+ col_type = FromInternalDataType(array_info.elem_type_info()->type());
+ }
+ return KuduColumnSchema(col.name(),
+ col_type,
+ col.is_nullable(),
+ col.is_immutable(),
+ col.is_auto_incrementing(),
+ col.is_array(),
+ col.read_default_value(),
+ attrs,
+ type_attrs,
+ col.comment());
}
bool KuduSchema::HasColumn(const std::string& col_name, KuduColumnSchema*
col_schema) const {
diff --git a/src/kudu/client/schema.h b/src/kudu/client/schema.h
index 3ccf3beec..e877a3de1 100644
--- a/src/kudu/client/schema.h
+++ b/src/kudu/client/schema.h
@@ -230,7 +230,70 @@ class KUDU_EXPORT KuduColumnSchema {
VARCHAR = 11,
TIMESTAMP = UNIXTIME_MICROS, //!< deprecated, use UNIXTIME_MICROS
DATE = 12,
- SERIAL = 13
+ SERIAL = 13,
+ NESTED = 14, //<! A nested (non-scalar) type: comes with extra info
+ };
+
+ /// @brief Enumeration for supported nested types.
+ enum KuduNestedType {
+ ARRAY = 0,
+ };
+
+ // forward declaration needed for KuduArrayTypeDescriptor
+ class KuduNestedTypeDescriptor;
+
+ /// @brief ArrayTypeDescriptor encapsulates information for array data types.
+ class KuduArrayTypeDescriptor {
+ public:
+ /// @param [in] element_type
+ /// Array element type.
+ ///
+ explicit KuduArrayTypeDescriptor(KuduColumnSchema::DataType element_type);
+
+ /// @return Array element type; if it's @c DataType::NESTED, then
additional
+ /// information on the type is provided by the descriptor returned by
+ /// @c KuduArrayTypeDescriptor::nested_type()
+ KuduColumnSchema::DataType type() const;
+
+ /// @brief Get information on the array elements' nested type.
+ ///
+ /// @return pointer to the descriptor of the array elements' nested type
+ /// (i.e. @c DataType::NESTED type details); @c NULL if the array's
+ /// elements are of a scalar (i.e. other than @c DataType::NESTED) type
+ const KuduNestedTypeDescriptor* nested_type() const;
+
+ private:
+ KuduColumnSchema::DataType type_;
+ };
+
+ /// @brief This class provides additional information for NESTED data type
+ class KuduNestedTypeDescriptor {
+ public:
+ /// @brief Produce a descriptor for an array.
+ ///
+ /// @param [in] desc
+ /// Array-specific descriptor
+ explicit KuduNestedTypeDescriptor(const KuduArrayTypeDescriptor& desc);
+
+ ~KuduNestedTypeDescriptor();
+
+ /// @return @c true if the NESTED type is an array, @c false otherwise
+ bool is_array() const;
+
+ /// @return descriptor if the NESTED type is an array, @c NULL otherwise
+ const KuduArrayTypeDescriptor* array() const;
+
+ KuduNestedTypeDescriptor(const KuduNestedTypeDescriptor& other);
+ KuduNestedTypeDescriptor& operator=(const KuduNestedTypeDescriptor& desc);
+#if __cplusplus >= 201103
+ KuduNestedTypeDescriptor(KuduNestedTypeDescriptor&& other) noexcept;
+ KuduNestedTypeDescriptor& operator=(KuduNestedTypeDescriptor&& other)
noexcept;
+#endif
+
+ private:
+ class KUDU_NO_EXPORT Data;
+ // Owned.
+ Data* data_;
};
/// @param [in] type
@@ -302,6 +365,12 @@ class KUDU_EXPORT KuduColumnSchema {
/// @return Type of the column schema.
DataType type() const;
+ /// Get information on the column's nested type.
+ ///
+ /// @return nested type information for a scalar column;
+ /// @c NULL if the column isn't of a nested type.
+ const KuduNestedTypeDescriptor* nested_type() const;
+
/// @return @c true iff the column schema has the nullable attribute set.
bool is_nullable() const;
@@ -346,6 +415,7 @@ class KUDU_EXPORT KuduColumnSchema {
bool is_nullable = false,
bool is_immutable = false,
bool is_auto_incrementing = false,
+ bool is_array = false,
const void* default_value = NULL, //NOLINT(modernize-use-nullptr)
const KuduColumnStorageAttributes& storage_attributes =
KuduColumnStorageAttributes(),
const KuduColumnTypeAttributes& type_attributes =
KuduColumnTypeAttributes(),
@@ -357,6 +427,7 @@ class KUDU_EXPORT KuduColumnSchema {
// Owned.
ColumnSchema* col_;
+ KuduNestedTypeDescriptor* ndesc_;
};
/// @brief Builder API for specifying or altering a column
@@ -548,6 +619,16 @@ class KUDU_EXPORT KuduColumnSpec {
KuduColumnSpec* Type(KuduColumnSchema::DataType type);
///@}
+ /// Specify information on the column of a nested (non-scalar) type.
+ ///
+ /// @note Column data types may not be changed once a table is created.
+ ///
+ /// @param [in] type_info
+ /// The information on the nested data type.
+ /// @return Pointer to the modified object.
+ KuduColumnSpec* NestedType(const KuduColumnSchema::KuduNestedTypeDescriptor&
type_info);
+ ///@}
+
/// @name Operations only relevant for Alter Table
///@{
diff --git a/src/kudu/common/row.h b/src/kudu/common/row.h
index 4820188bb..ef904dd50 100644
--- a/src/kudu/common/row.h
+++ b/src/kudu/common/row.h
@@ -564,7 +564,6 @@ void* ContiguousRowCell<ConstContiguousRow>::mutable_ptr()
const;
template<>
void ContiguousRowCell<ConstContiguousRow>::set_null(bool null) const;
-
// Utility class for building rows corresponding to a given schema.
// This is used only by tests.
// TODO(todd): move it into a test utility.
diff --git a/src/kudu/common/row_operations.cc
b/src/kudu/common/row_operations.cc
index f79c6a02e..074a7959e 100644
--- a/src/kudu/common/row_operations.cc
+++ b/src/kudu/common/row_operations.cc
@@ -261,18 +261,21 @@ Status RowOperationsPBDecoder::ReadNonNullBitmap(const
uint8_t** bitmap) {
Status RowOperationsPBDecoder::GetColumnSlice(const ColumnSchema& col,
Slice* slice,
Status* row_status) {
- size_t size = col.type_info()->size();
+ const size_t size = col.type_info()->size();
if (PREDICT_FALSE(src_.size() < size)) {
return Status::Corruption("Not enough data for column", col.ToString());
}
// Find the data
- if (col.type_info()->physical_type() == BINARY) {
+ if (col.type_info()->physical_type() != BINARY) {
+ *slice = Slice(src_.data(), size);
+ } else {
// The Slice in the protobuf has a pointer relative to the indirect data,
// not a real pointer. Need to fix that.
- auto ptr_slice = reinterpret_cast<const Slice*>(src_.data());
+ auto* ptr_slice = reinterpret_cast<const Slice*>(src_.data());
auto offset_in_indirect = reinterpret_cast<uintptr_t>(ptr_slice->data());
bool overflowed = false;
- size_t max_offset = AddWithOverflowCheck(offset_in_indirect,
ptr_slice->size(), &overflowed);
+ const size_t max_offset = AddWithOverflowCheck(
+ offset_in_indirect, ptr_slice->size(), &overflowed);
if (PREDICT_FALSE(overflowed || max_offset > pb_->indirect_data().size()))
{
return Status::Corruption("Bad indirect slice");
}
@@ -287,8 +290,6 @@ Status RowOperationsPBDecoder::GetColumnSlice(const
ColumnSchema& col,
// validate subsequent columns and rows.
}
*slice = Slice(&pb_->indirect_data()[offset_in_indirect],
ptr_slice->size());
- } else {
- *slice = Slice(src_.data(), size);
}
src_.remove_prefix(size);
return Status::OK();
diff --git a/src/kudu/common/row_operations.h b/src/kudu/common/row_operations.h
index 5c4c6d23f..75c539c8c 100644
--- a/src/kudu/common/row_operations.h
+++ b/src/kudu/common/row_operations.h
@@ -131,9 +131,9 @@ class RowOperationsPBDecoder {
Status ReadNonNullBitmap(const uint8_t** bitmap);
// Read one row's column data from 'src_', read result is stored in 'slice'.
// Return bad Status if data is corrupt.
- // NOTE: If 'row_status' is not nullptr, column data validate will be
performed,
- // and if column data validate error (i.e. column size exceed the limit),
only
- // set bad Status to 'row_status', and return Status::OK.
+ // NOTE: If 'row_status' is not nullptr, column data validation is performed,
+ // and if the data is invalid (i.e. column size exceeds the limit),
+ // the 'row_status' is set to non-OK, and the function returns Status::OK.
Status GetColumnSlice(const ColumnSchema& col, Slice* slice, Status*
row_status);
// Same as above, but store result in 'dst'.
Status ReadColumn(const ColumnSchema& col, uint8_t* dst, Status* row_status);
diff --git a/src/kudu/common/row_operations.proto
b/src/kudu/common/row_operations.proto
index d5dea532b..efad65500 100644
--- a/src/kudu/common/row_operations.proto
+++ b/src/kudu/common/row_operations.proto
@@ -76,7 +76,7 @@ message RowOperationsPB {
// [column data]
// For each column which is set and not NULL, the column's data follows.
The data
// format of each cell is the canonical in-memory format (eg little
endian).
- // For string data, the pointers are relative to 'indirect_data'.
+ // For string/binary and nested type columns, the pointers are relative to
'indirect_data'.
//
// The rows are concatenated end-to-end with no padding/alignment.
optional bytes rows = 2 [(kudu.REDACT) = true];
diff --git a/src/kudu/common/schema-test.cc b/src/kudu/common/schema-test.cc
index 787425562..1bba4f301 100644
--- a/src/kudu/common/schema-test.cc
+++ b/src/kudu/common/schema-test.cc
@@ -100,7 +100,32 @@ static Status CopyRowToArena(const Slice& row,
return RelocateIndirectDataToArena(copied, dst_arena);
}
-class TestSchema : public KuduTest {};
+class TestSchema : public KuduTest {
+ public:
+ // Scalar data types for non-virtual columns.
+ static constexpr const DataType kScalarTypes[] = {
+ DataType::UINT8,
+ DataType::INT8,
+ DataType::UINT16,
+ DataType::INT16,
+ DataType::UINT32,
+ DataType::INT32,
+ DataType::UINT64,
+ DataType::INT64,
+ DataType::STRING,
+ DataType::BOOL,
+ DataType::FLOAT,
+ DataType::DOUBLE,
+ DataType::BINARY,
+ DataType::UNIXTIME_MICROS,
+ DataType::INT128,
+ DataType::DECIMAL32,
+ DataType::DECIMAL64,
+ DataType::DECIMAL128,
+ DataType::VARCHAR,
+ DataType::DATE
+ };
+};
// Test basic functionality of Schema definition
TEST_F(TestSchema, TestSchema) {
@@ -303,6 +328,150 @@ TEST_F(TestSchema, TestSchemaEqualsWithDecimal) {
EXPECT_NE(schema_18_10, schema_17_9);
}
+TEST_F(TestSchema, ArrayAndNonArrayColumnComparison) {
+ for (auto scalar_type : kScalarTypes) {
+ SCOPED_TRACE(DataType_Name(scalar_type));
+ const auto col_array = ColumnSchemaBuilder()
+ .name("col")
+ .type(scalar_type)
+ .Build();
+ const auto col_scalar = ColumnSchemaBuilder()
+ .name("col")
+ .type(scalar_type)
+ .array(true)
+ .Build();
+ ASSERT_FALSE(col_array.Equals(col_scalar, ColumnSchema::COMPARE_TYPE));
+ ASSERT_FALSE(col_array.Equals(col_scalar, ColumnSchema::COMPARE_ALL));
+ ASSERT_TRUE(col_array.Equals(col_scalar, ColumnSchema::COMPARE_NAME));
+ ASSERT_TRUE(col_array.Equals(col_scalar, ColumnSchema::COMPARE_OTHER));
+ }
+}
+
+TEST_F(TestSchema, ColumnSchemaForScalarType) {
+ for (auto scalar_type : kScalarTypes) {
+ SCOPED_TRACE(DataType_Name(scalar_type));
+ const auto schema = ColumnSchemaBuilder()
+ .name("col")
+ .type(scalar_type)
+ .Build();
+ ASSERT_FALSE(schema.is_array());
+ const auto* type_info = schema.type_info();
+ ASSERT_FALSE(type_info->is_array());
+ ASSERT_EQ(scalar_type, type_info->type());
+ const auto* descriptor = type_info->nested_type_info();
+ ASSERT_EQ(nullptr, descriptor);
+ }
+}
+
+TEST_F(TestSchema, ColumnSchemaForScalarArrays) {
+ for (auto scalar_type : kScalarTypes) {
+ SCOPED_TRACE(DataType_Name(scalar_type));
+ const auto schema = ColumnSchemaBuilder()
+ .name("col")
+ .type(scalar_type)
+ .array(true)
+ .Build();
+ ASSERT_TRUE(schema.is_array());
+ const auto* type_info = schema.type_info();
+ ASSERT_TRUE(type_info->is_array());
+ ASSERT_EQ(DataType::NESTED, type_info->type());
+ const auto* descriptor = type_info->nested_type_info();
+ ASSERT_TRUE(descriptor->is_array());
+ const auto& array_info = descriptor->array();
+ const TypeInfo* elem_info = array_info.elem_type_info();
+ ASSERT_EQ(scalar_type, elem_info->type());
+ ASSERT_FALSE(elem_info->is_array());
+ }
+}
+
+TEST_F(TestSchema, SimpleSchemaWithScalarArrays) {
+ ColumnSchema col0("key", STRING);
+ ColumnSchema col1(ColumnSchemaBuilder()
+ .name("int32_array")
+ .type(INT32)
+ .array(true));
+ ColumnSchema col2(ColumnSchemaBuilder()
+ .name("int64_array")
+ .type(INT64)
+ .array(true)
+ .nullable(true));
+ ColumnSchema col3(ColumnSchemaBuilder()
+ .name("string_array")
+ .type(STRING)
+ .array(true)
+ .nullable(true));
+
+ vector<ColumnSchema> cols = { col0, col1, col2, col3 };
+ Schema schema(cols, 1);
+
+ ASSERT_EQ((1 + 3) * sizeof(Slice), schema.byte_size());
+
+ EXPECT_EQ("(\n"
+ " key STRING NOT NULL,\n"
+ " int32_array INT32 1D-ARRAY NOT NULL,\n"
+ " int64_array INT64 1D-ARRAY NULLABLE,\n"
+ " string_array STRING 1D-ARRAY NULLABLE,\n"
+ " PRIMARY KEY (key)\n"
+ ")",
+ schema.ToString());
+
+ EXPECT_EQ("INT32 1D-ARRAY NOT NULL", schema.column(1).TypeToString());
+ EXPECT_EQ("INT64 1D-ARRAY NULLABLE", schema.column(2).TypeToString());
+ EXPECT_EQ("STRING 1D-ARRAY NULLABLE", schema.column(3).TypeToString());
+}
+
+TEST_F(TestSchema, SchemaWithScalarArraysEquals) {
+ ColumnSchema col0(ColumnSchemaBuilder()
+ .name("int_array")
+ .type(INT32)
+ .array(true));
+ ColumnSchema col1(ColumnSchemaBuilder()
+ .name("int_array")
+ .type(INT64)
+ .array(true)
+ .nullable(true));
+ ColumnSchema col2(ColumnSchemaBuilder()
+ .name("int64_vec")
+ .type(INT64)
+ .array(true)
+ .nullable(true));
+ ColumnSchema col3(ColumnSchemaBuilder()
+ .name("string_array")
+ .type(STRING)
+ .array(true)
+ .nullable(true));
+ ColumnSchema col4(ColumnSchemaBuilder()
+ .name("str_array")
+ .type(STRING)
+ .array(true));
+ ColumnSchema col5(ColumnSchemaBuilder()
+ .name("str")
+ .type(STRING));
+ ColumnSchema col6(ColumnSchemaBuilder()
+ .name("int64_scalar")
+ .type(INT64)
+ .nullable(true));
+ ColumnSchema col7(ColumnSchemaBuilder()
+ .name("int_array") // intentionally misleading name
+ .type(INT32));
+ ASSERT_TRUE(col0.Equals(col0));
+ ASSERT_TRUE(col1.Equals(col1));
+ ASSERT_TRUE(col2.Equals(col2));
+ ASSERT_TRUE(col3.Equals(col3));
+ ASSERT_TRUE(col4.Equals(col4));
+ ASSERT_TRUE(col0.Equals(col1, ColumnSchema::COMPARE_NAME));
+ ASSERT_FALSE(col0.Equals(col1, ColumnSchema::COMPARE_TYPE));
+ ASSERT_TRUE(col1.Equals(col2, ColumnSchema::COMPARE_OTHER));
+ ASSERT_TRUE(col1.Equals(col2, ColumnSchema::COMPARE_TYPE));
+ ASSERT_FALSE(col3.Equals(col4, ColumnSchema::COMPARE_TYPE)); // nullability
matters
+ ASSERT_TRUE(col3.Equals(col4, ColumnSchema::COMPARE_OTHER));
+ ASSERT_FALSE(col3.Equals(col5, ColumnSchema::COMPARE_TYPE));
+ ASSERT_FALSE(col4.Equals(col5, ColumnSchema::COMPARE_TYPE));
+ ASSERT_FALSE(col6.Equals(col1, ColumnSchema::COMPARE_TYPE));
+ ASSERT_TRUE(col7.Equals(col0, ColumnSchema::COMPARE_NAME));
+ ASSERT_FALSE(col7.Equals(col0, ColumnSchema::COMPARE_TYPE));
+}
+
TEST_F(TestSchema, TestColumnSchemaEquals) {
Slice default_str("read-write default");
ColumnSchema col1("key", STRING);
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index 360dbb404..be61345fe 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -212,9 +212,9 @@ class ColumnSchema final {
NULLABLE,
};
- // A minimalistic constructor with only name, type, and nullability of
- // the column to specify: the latter parameter is optional and by default
- // is set to Nullability::NOT_NULL.
+ // A minimalistic constructor for ColumnSchema of scalar data types
+ // with only name, type, and nullability of the column to specify. The latter
+ // parameter is optional and by default is set to Nullability::NOT_NULL.
//
// name: column name
// type: column type (e.g. UINT8, INT32, STRING, ...)
@@ -224,8 +224,9 @@ class ColumnSchema final {
// ColumnSchema col_a("a", UINT32);
// ColumnSchema col_b("b", STRING, ColumnSchema::NULLABLE);
//
- // Don't use this constructor if there are more column parameters to specify.
- // Instead, switch to ColumnSchemaBuilder():
+ // Use ColumnSchemaBuilder() if there are more parameters/attributes
+ // to specify, for example:
+ //
// uint32_t default_i32 = -15;
// ColumnSchema col_c(ColumnSchemaBuilder()
// .name("c")
@@ -263,6 +264,13 @@ class ColumnSchema final {
return is_auto_incrementing_;
}
+ // Whether the column is a one-dimensional array of a scalar type set by
+ // calling ColumnSchemaBuiler::type() method on the corresponding
+ // ColumnSchemaBuilder instance.
+ bool is_array() const {
+ return is_array_;
+ }
+
const std::string& name() const {
return name_;
}
@@ -338,10 +346,14 @@ class ColumnSchema final {
}
bool EqualsType(const ColumnSchema& other) const {
- if (this == &other) return true;
+ if (this == &other) {
+ return true;
+ }
return is_nullable_ == other.is_nullable_ &&
type_info()->type() == other.type_info()->type() &&
- type_attributes().EqualsForType(other.type_attributes(),
type_info()->type());
+ type_info()->is_array() == other.type_info()->is_array() &&
+ type_attributes().EqualsForType(other.type_attributes(),
+ type_info()->type());
}
// compare types in Equals function
@@ -462,6 +474,8 @@ class ColumnSchema final {
// Auto-incrementing column cannot be updated but written to by a client
and is auto
// filled in by Kudu by incrementing the previous highest written value
to the tablet.
// There can only be a single auto-incrementing column per table.
+ // is_array: true if the column is one-dimensional array of a scalar type
+ // NOTE: the 'type' parameter in this case corresponds to the element type
// read_default: default value used on read if the column was not present
before alter.
// The value will be copied and released on ColumnSchema destruction.
// write_default: default value added to the row if the column value was
@@ -479,16 +493,18 @@ class ColumnSchema final {
bool is_nullable,
bool is_immutable,
bool is_auto_incrementing,
+ bool is_array,
std::shared_ptr<Variant> read_default,
std::shared_ptr<Variant> write_default,
ColumnStorageAttributes storage_attributes,
ColumnTypeAttributes type_attributes,
std::string comment)
: name_(std::move(name)),
- type_info_(GetTypeInfo(type)),
+ type_info_(is_array ? GetArrayTypeInfo(type) : GetTypeInfo(type)),
is_nullable_(is_nullable),
is_immutable_(is_immutable),
is_auto_incrementing_(is_auto_incrementing),
+ is_array_(is_array),
read_default_(std::move(read_default)),
write_default_(std::move(write_default)),
storage_attributes_(storage_attributes),
@@ -505,6 +521,7 @@ class ColumnSchema final {
bool is_nullable_ = false;
bool is_immutable_ = false;
bool is_auto_incrementing_ = false;
+ bool is_array_ = false;
// use shared_ptr since the ColumnSchema is always copied around.
std::shared_ptr<Variant> read_default_{};
std::shared_ptr<Variant> write_default_{};
@@ -536,6 +553,7 @@ class ColumnSchemaBuilder final {
is_nullable_,
is_immutable_,
is_auto_incrementing_,
+ is_array_,
read_default_,
write_default_,
storage_attributes_,
@@ -553,6 +571,7 @@ class ColumnSchemaBuilder final {
is_nullable_,
is_immutable_,
is_auto_incrementing_,
+ is_array_,
read_default_,
write_default_,
storage_attributes_,
@@ -593,6 +612,13 @@ class ColumnSchemaBuilder final {
return *this;
}
+ // Whether the column is an array of scalar elements; the type of the
elements
+ // is set by calling the ColumnSchemaBuiler::type() method.
+ ColumnSchemaBuilder& array(bool is_array) {
+ is_array_ = is_array;
+ return *this;
+ }
+
// Set default value used on read if the column was not present before alter.
// The value 'read_default' is copied and the copy is passed to the produced
// ColumnSchema upon calling Build() or New().
@@ -649,6 +675,7 @@ class ColumnSchemaBuilder final {
// At least name and type should be set for a column.
DCHECK(!name_.empty());
DCHECK(type_ != DataType::UNKNOWN_DATA);
+ DCHECK(type_ != DataType::NESTED) << "arbitrary type nesting isn't
supported yet";
// Make sure the specified Variant data is consistent with the data type.
DCHECK(!read_default_ || type_ == read_default_->type());
@@ -659,17 +686,23 @@ class ColumnSchemaBuilder final {
DCHECK_EQ(INT64, type_);
DCHECK(!is_nullable_);
DCHECK(!is_immutable_);
+ DCHECK(!is_array_);
DCHECK(!read_default_);
DCHECK(!write_default_);
}
}
#endif // #if DCHECK_IS_ON() ...
+ // NOTE: in case of 1D arrays, 'type_' describes the type of array's elements
+ //
+ // TODO(aserbin): consolidate this one-off case with more generic approach
+ // when introducing support for other nested types
std::string name_{};
DataType type_ = DataType::UNKNOWN_DATA;
bool is_nullable_ = false;
bool is_immutable_ = false;
bool is_auto_incrementing_ = false;
+ bool is_array_ = false;
std::shared_ptr<Variant> read_default_{};
std::shared_ptr<Variant> write_default_{};
ColumnStorageAttributes storage_attributes_{};
@@ -899,6 +932,20 @@ class Schema {
return reinterpret_cast<const typename
DataTypeTraits<Type>::cpp_type*>(val);
}
+ template<class RowType>
+ const Slice*
+ ExtractColumnFromRow(const RowType& row, size_t idx) const {
+ DCHECK_SCHEMA_EQ(*this, *row.schema());
+ DCHECK_LT(idx, cols_.size());
+ const ColumnSchema& col_schema = cols_[idx];
+ DCHECK_EQ(DataType::NESTED, col_schema.type_info()->type());
+ DCHECK(col_schema.type_info()->is_array());
+
+ const void* val = col_schema.is_nullable() ? row.nullable_cell_ptr(idx)
+ : row.cell_ptr(idx);
+ return reinterpret_cast<const Slice*>(val);
+ }
+
// Stringify the given row, which conforms to this schema,
// in a way suitable for debugging. This isn't currently optimized
// so should be avoided in hot paths.
diff --git a/src/kudu/common/wire_protocol-test.cc
b/src/kudu/common/wire_protocol-test.cc
index 44546803a..a904f9c35 100644
--- a/src/kudu/common/wire_protocol-test.cc
+++ b/src/kudu/common/wire_protocol-test.cc
@@ -815,6 +815,131 @@ TEST_F(WireProtocolTest, TestInvalidReadAndWriteDefault) {
}
}
+TEST_F(WireProtocolTest, ScalarArray1DFromPB) {
+ {
+ ColumnSchemaPB pb;
+ pb.set_name("col");
+ pb.set_type(NESTED);
+ pb.set_is_nullable(true);
+ auto* array_info = pb.mutable_nested_type()->mutable_array();
+ array_info->set_type(INT8);
+ ColumnSchemaBuilder bld;
+ ASSERT_OK(ColumnSchemaBuilderFromPB(pb, &bld));
+ const ColumnSchema schema = bld.Build();
+ ASSERT_TRUE(schema.is_nullable());
+ ASSERT_TRUE(schema.is_array());
+ const TypeInfo* ti = schema.type_info();
+ ASSERT_EQ("int8 1d-array", ti->name());
+ ASSERT_FALSE(ti->is_virtual());
+ ASSERT_TRUE(ti->is_array());
+ ASSERT_EQ(BINARY, ti->physical_type());
+ ASSERT_EQ(NESTED, ti->type());
+ }
+ {
+ // IS_DELETED isn't accepted as array element type.
+ ColumnSchemaPB pb;
+ pb.set_name("col");
+ pb.set_type(NESTED);
+ pb.set_is_nullable(true);
+ auto* array_info = pb.mutable_nested_type()->mutable_array();
+ array_info->set_type(IS_DELETED);
+ ColumnSchemaBuilder bld;
+ const auto s = ColumnSchemaBuilderFromPB(pb, &bld);
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(),
+ "nested types: invalid scalar type 'IS_DELETED' for
array elements");
+ }
+}
+
+TEST_F(WireProtocolTest, ScalarArray1DToPB) {
+ {
+ ColumnSchema col(ColumnSchemaBuilder()
+ .name("col")
+ .type(STRING)
+ .array(true));
+ const auto* type_info = col.type_info();
+ ASSERT_EQ(DataType::NESTED, type_info->type());
+ ASSERT_NE(nullptr, type_info->nested_type_info());
+
+ ColumnSchemaPB pb;
+ ColumnSchemaToPB(col, &pb);
+ ColumnSchemaBuilder col_bld;
+ ASSERT_OK(ColumnSchemaBuilderFromPB(pb, &col_bld));
+ ColumnSchema col_fpb(col_bld);
+ ASSERT_TRUE(col_fpb.is_array());
+ ASSERT_FALSE(col_fpb.is_nullable());
+ ASSERT_FALSE(col_fpb.is_immutable());
+ ASSERT_FALSE(col_fpb.is_auto_incrementing());
+ const auto* fpb_type_info = col_fpb.type_info();
+ ASSERT_EQ(DataType::NESTED, fpb_type_info->type());
+
+ // A shortcut to verify the result: based on details of the type registry
+ // implementation, it's possible to compare pointers for type_info()
+ // for both the original and 'from-pb' schemas.
+ ASSERT_EQ(col.type_info(), fpb_type_info);
+
+ // Extra sanity checks.
+ const auto* fpb_nested_type_info = fpb_type_info->nested_type_info();
+ ASSERT_TRUE(fpb_nested_type_info->is_array());
+ const auto& fpb_array_type_info = fpb_nested_type_info->array();
+ ASSERT_EQ(DataType::STRING, fpb_array_type_info.elem_type_info()->type());
+ }
+}
+
+TEST_F(WireProtocolTest, NestedTypeValidityChecks) {
+ {
+ ColumnSchemaPB pb;
+ pb.set_name("col");
+ pb.set_is_nullable(true);
+ auto* array_info = pb.mutable_nested_type()->mutable_array();
+ array_info->set_type(INT64);
+ // The 'type' field should be set even if it's enough information on the
+ // nested type in the 'nested_type' field.
+ ColumnSchemaBuilder bld;
+ const auto s = ColumnSchemaBuilderFromPB(pb, &bld);
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "'type' field is missing");
+ }
+ {
+ ColumnSchemaPB pb;
+ pb.set_name("col");
+ pb.set_type(NESTED);
+ // With the 'type' field set to NESTED and the 'nested_type' field not
+ // present it should result in an error when trying to build ColumnSchema
+ // from ColumnSchemaPB.
+ ColumnSchemaBuilder bld;
+ const auto s = ColumnSchemaBuilderFromPB(pb, &bld);
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "missing 'nested_type' field for NESTED
type");
+ }
+ {
+ ColumnSchemaPB pb;
+ pb.set_name("col");
+ pb.set_type(BOOL);
+ pb.set_is_nullable(true);
+ auto* array_info = pb.mutable_nested_type()->mutable_array();
+ array_info->set_type(BOOL);
+ // When 'nested_type' field is set, the 'type' field must be set 'NESTED'.
+ ColumnSchemaBuilder bld;
+ const auto s = ColumnSchemaBuilderFromPB(pb, &bld);
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "found BOOL instead of NESTED in the
'type' field");
+ }
+ {
+ ColumnSchemaPB pb;
+ pb.set_name("col");
+ pb.set_type(NESTED);
+ pb.set_is_nullable(true);
+ auto* array_info = pb.mutable_nested_type()->mutable_array();
+ array_info->set_type(NESTED);
+ // Nested (multi-dimensional) arrays aren't supported yet.
+ ColumnSchemaBuilder bld;
+ const auto s = ColumnSchemaBuilderFromPB(pb, &bld);
+ ASSERT_TRUE(s.IsNotSupported()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "only 1d-arrays of scalar types are
supported yet");
+ }
+}
+
TEST_F(WireProtocolTest, TestColumnPredicateInList) {
ColumnSchema col1("col1", INT32);
vector<ColumnSchema> cols = { col1 };
diff --git a/src/kudu/common/wire_protocol.cc b/src/kudu/common/wire_protocol.cc
index 1a24da9af..1c8871f49 100644
--- a/src/kudu/common/wire_protocol.cc
+++ b/src/kudu/common/wire_protocol.cc
@@ -232,8 +232,19 @@ void ColumnSchemaToPB(const ColumnSchema& col_schema,
ColumnSchemaPB *pb, int fl
pb->set_name(col_schema.name());
pb->set_is_nullable(col_schema.is_nullable());
pb->set_immutable(col_schema.is_immutable());
- DataType type = col_schema.type_info()->type();
+ const auto* type_info = col_schema.type_info();
+ const DataType type = type_info->type();
pb->set_type(type);
+ // Set information on the nested type, if necessary.
+ if (type == DataType::NESTED) {
+ const auto* nested_type_info = type_info->nested_type_info();
+ DCHECK(nested_type_info);
+ // Only arrays are supported yet.
+ DCHECK(nested_type_info->is_array());
+ const auto& array_info = nested_type_info->array();
+ pb->mutable_nested_type()->mutable_array()->set_type(
+ array_info.elem_type_info()->type());
+ }
// Only serialize precision and scale for decimal types.
if (type == DataType::DECIMAL32 ||
type == DataType::DECIMAL64 ||
@@ -249,21 +260,21 @@ void ColumnSchemaToPB(const ColumnSchema& col_schema,
ColumnSchemaPB *pb, int fl
pb->set_cfile_block_size(col_schema.attributes().cfile_block_size);
}
if (col_schema.has_read_default()) {
- if (col_schema.type_info()->physical_type() == BINARY) {
+ if (type_info->physical_type() == BINARY) {
const Slice *read_slice = static_cast<const Slice
*>(col_schema.read_default_value());
pb->set_read_default_value(read_slice->data(), read_slice->size());
} else {
const void *read_value = col_schema.read_default_value();
- pb->set_read_default_value(read_value, col_schema.type_info()->size());
+ pb->set_read_default_value(read_value, type_info->size());
}
}
if (col_schema.has_write_default() && !(flags &
SCHEMA_PB_WITHOUT_WRITE_DEFAULT)) {
- if (col_schema.type_info()->physical_type() == BINARY) {
+ if (type_info->physical_type() == BINARY) {
const Slice *write_slice = static_cast<const Slice
*>(col_schema.write_default_value());
pb->set_write_default_value(write_slice->data(), write_slice->size());
} else {
const void *write_value = col_schema.write_default_value();
- pb->set_write_default_value(write_value, col_schema.type_info()->size());
+ pb->set_write_default_value(write_value, type_info->size());
}
}
if (!col_schema.comment().empty() && !(flags & SCHEMA_PB_WITHOUT_COMMENT)) {
@@ -277,9 +288,48 @@ void ColumnSchemaToPB(const ColumnSchema& col_schema,
ColumnSchemaPB *pb, int fl
Status ColumnSchemaBuilderFromPB(const ColumnSchemaPB& pb,
ColumnSchemaBuilder* csb) {
ColumnSchemaBuilder builder;
- builder.name(pb.name())
- .type(pb.type())
- .nullable(pb.is_nullable());
+ builder.name(pb.name()).nullable(pb.is_nullable());
+
+ if (PREDICT_FALSE(!pb.has_type())) {
+ return Status::InvalidArgument("'type' field is missing");
+ }
+ if (!pb.has_nested_type()) {
+ if (PREDICT_FALSE(pb.type() == DataType::NESTED)) {
+ return Status::InvalidArgument("missing 'nested_type' field for NESTED
type");
+ }
+ builder.type(pb.type());
+ } else {
+ if (PREDICT_FALSE(pb.type() != DataType::NESTED)) {
+ return Status::InvalidArgument(Substitute(
+ "nested types: found $0 instead of NESTED in the 'type' field",
+ DataType_Name(pb.type())));
+ }
+ builder.type(DataType::NESTED);
+ const auto& nested_type = pb.nested_type();
+ if (PREDICT_FALSE(!nested_type.has_array())) {
+ // As of now, only 1d-arrays of primitive types are supported.
+ return Status::NotSupported(
+ "nested types: only arrays of scalar types are supported yet");
+ }
+ const auto& descriptor = nested_type.array();
+ if (PREDICT_FALSE(!descriptor.has_type())) {
+ return Status::InvalidArgument("nested types: missing array element type
info");
+ }
+ const DataType elem_type = descriptor.type();
+ switch (elem_type) {
+ case DataType::NESTED:
+ return Status::NotSupported(
+ "nested types: only 1d-arrays of scalar types are supported yet");
+ case DataType::IS_DELETED:
+ case DataType::UNKNOWN_DATA:
+ return Status::InvalidArgument(Substitute(
+ "nested types: invalid scalar type '$0' for array elements",
+ DataType_Name(elem_type)));
+ default:
+ break;
+ }
+ builder.array(true).type(elem_type);
+ }
if (pb.has_read_default_value()) {
Slice read_default(pb.read_default_value());
diff --git a/src/kudu/integration-tests/alter_table-test.cc
b/src/kudu/integration-tests/alter_table-test.cc
index be03a77df..9933da9fc 100644
--- a/src/kudu/integration-tests/alter_table-test.cc
+++ b/src/kudu/integration-tests/alter_table-test.cc
@@ -276,6 +276,39 @@ class AlterTableTest : public KuduTest {
return table_alterer->timeout(timeout)->Alter();
}
+ Status AddNewNullableArrayColumn(
+ const string& table_name,
+ const string& column_name,
+ KuduColumnSchema::DataType array_elem_type,
+ const MonoDelta& timeout = MonoDelta::FromSeconds(30)) {
+ unique_ptr<KuduTableAlterer> alterer(client_->NewTableAlterer(table_name));
+ KuduColumnSchema::KuduArrayTypeDescriptor desc(array_elem_type);
+ auto* spec = alterer->
+ AddColumn(column_name)->
+ NestedType(KuduColumnSchema::KuduNestedTypeDescriptor(desc))->
+ Nullable();
+ switch (array_elem_type) {
+ case KuduColumnSchema::DECIMAL:
+ spec->Precision(18)->Scale(2);
+ break;
+ case KuduColumnSchema::VARCHAR:
+ spec->Length(42);
+ break;
+ default:
+ break;
+ }
+ return alterer->timeout(timeout)->Alter();
+ }
+
+ Status DropColumn(
+ const string& table_name,
+ const string& column_name,
+ const MonoDelta& timeout = MonoDelta::FromSeconds(30)) {
+ unique_ptr<KuduTableAlterer> alterer(client_->NewTableAlterer(table_name));
+ alterer->DropColumn(column_name);
+ return alterer->timeout(timeout)->Alter();
+ }
+
Status SetReplicationFactor(const string& table_name,
int32_t replication_factor) {
unique_ptr<KuduTableAlterer>
table_alterer(client_->NewTableAlterer(table_name));
@@ -420,9 +453,38 @@ const char* const AlterTableTest::kTableName =
"fake-table";
// on the TS handling the tablet of the altered table.
// TODO: create and verify multiple tablets when the client will support that.
TEST_F(AlterTableTest, TestTabletReports) {
- ASSERT_EQ(0, tablet_replica_->tablet()->metadata()->schema_version());
+ uint32_t schema_version = 0;
+ ASSERT_EQ(schema_version++,
tablet_replica_->tablet()->metadata()->schema_version());
ASSERT_OK(AddNewI32Column(kTableName, "new-i32", 0));
- ASSERT_EQ(1, tablet_replica_->tablet()->metadata()->schema_version());
+ ASSERT_EQ(schema_version++,
tablet_replica_->tablet()->metadata()->schema_version());
+
+ constexpr const KuduColumnSchema::DataType kArrayElemTypes[] = {
+ KuduColumnSchema::INT8,
+ KuduColumnSchema::INT16,
+ KuduColumnSchema::INT32,
+ KuduColumnSchema::INT64,
+ KuduColumnSchema::STRING,
+ KuduColumnSchema::BOOL,
+ KuduColumnSchema::FLOAT,
+ KuduColumnSchema::DOUBLE,
+ KuduColumnSchema::BINARY,
+ KuduColumnSchema::UNIXTIME_MICROS,
+ KuduColumnSchema::DECIMAL,
+ KuduColumnSchema::VARCHAR,
+ KuduColumnSchema::DATE,
+ };
+ for (auto elem_type : kArrayElemTypes) {
+ const string col_name = "arr_" +
KuduColumnSchema::DataTypeToString(elem_type);
+ SCOPED_TRACE(Substitute("column_name: $0", col_name));
+ ASSERT_OK(AddNewNullableArrayColumn(kTableName, col_name, elem_type));
+ ASSERT_EQ(schema_version++,
tablet_replica_->tablet()->metadata()->schema_version());
+ }
+ for (auto elem_type : kArrayElemTypes) {
+ const string col_name = "arr_" +
KuduColumnSchema::DataTypeToString(elem_type);
+ SCOPED_TRACE(Substitute("column_name: $0", col_name));
+ ASSERT_OK(DropColumn(kTableName, col_name));
+ ASSERT_EQ(schema_version++,
tablet_replica_->tablet()->metadata()->schema_version());
+ }
}
// Verify that adding an existing column will return an "already present" error
diff --git a/src/kudu/master/catalog_manager.cc
b/src/kudu/master/catalog_manager.cc
index 4457aa824..0aa083999 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1886,6 +1886,23 @@ Status ValidateClientSchema(const optional<string>& name,
ti->name(), col.name()));
}
+ // An artificial constraint: disallow creating tables with array columns
+ // of 128-bit integers (INT128 and DECIMAL128) since on-the-wire
+ // ser/des-ing of such arrays isn't yet supported
+ //
+ // TODO(aserbin): remove this artificial restriction once handling 128-bit
+ // integer arrays is implemented
+ if (ti->is_array()) {
+ const auto* nti = ti->nested_type_info();
+ DCHECK(ti);
+ const auto* eti = nti->array().elem_type_info();
+ if (PREDICT_FALSE(eti->type() == INT128 || eti->type() == DECIMAL128)) {
+ return Status::NotSupported(Substitute(
+ "columns of '$0' type are not yet supported (column '$1')",
+ ti->name(), col.name()));
+ }
+ }
+
// Check that the encodings are valid for the specified types.
const TypeEncodingInfo* dummy;
if (const auto s = TypeEncodingInfo::Get(ti, col.attributes().encoding,
&dummy);
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index 6780e305d..ab9893613 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -858,6 +858,103 @@ TEST_F(MasterTest, TestCatalog) {
}
}
+TEST_F(MasterTest, CreateTableWithArrayColumns) {
+ static constexpr const char* const kTableName = "array_columns_0";
+
+ const Schema table_schema({ ColumnSchema("key", INT32),
+ ColumnSchema("str", STRING),
+ ColumnSchemaBuilder()
+ .type(INT32)
+ .array(true)
+ .name("arr0")
+ .nullable(true),
+ ColumnSchemaBuilder()
+ .type(INT64)
+ .array(true)
+ .name("arr1"),
+ }, 1);
+
+ ASSERT_OK(CreateTable(kTableName, table_schema));
+
+ ListTablesResponsePB tables;
+ NO_FATALS(DoListAllTables(&tables));
+ ASSERT_EQ(1, tables.tables_size());
+ ASSERT_EQ(kTableName, tables.tables(0).name());
+
+ // Drop the table.
+ {
+ DeleteTableRequestPB req;
+ DeleteTableResponsePB resp;
+ RpcController controller;
+ req.mutable_table()->set_table_name(kTableName);
+ ASSERT_OK(proxy_->DeleteTable(req, &resp, &controller));
+ SCOPED_TRACE(SecureDebugString(resp));
+ ASSERT_FALSE(resp.has_error());
+ }
+
+ // List tables: there should be no tables.
+ NO_FATALS(DoListAllTables(&tables));
+ ASSERT_EQ(0, tables.tables_size());
+}
+
+// Arrays of 128-bit integers (INT128, DECIMAL128) aren't supported yet due to
+// limitations of the framework used for on-the-wire data ser/des-ing.
+//
+// TODO(aserbin): remove this test once the limitation is addressed
+TEST_F(MasterTest, TableNonSupportedArrayTypeColumns) {
+ static constexpr const char* const kTableName = "array_128bit_int_columns";
+
+ // INT128 array columns aren't supported yet.
+ {
+ const Schema table_schema({ ColumnSchema("key", INT32),
+ ColumnSchemaBuilder()
+ .type(BOOL)
+ .array(true)
+ .name("arr_0")
+ .nullable(true),
+ ColumnSchemaBuilder()
+ .type(INT128)
+ .array(true)
+ .name("arr_1")
+ .nullable(true),
+ ColumnSchemaBuilder()
+ .type(INT128)
+ .array(true)
+ .name("arr_2"),
+ }, 1);
+
+ const auto s = CreateTable(kTableName, table_schema);
+ ASSERT_TRUE(s.IsNotSupported()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "columns of 'int128 1d-array' type are "
+ "not yet supported (column 'arr_1')");
+ }
+
+ // DECIMAL128 array columns aren't supported yet.
+ {
+ const Schema table_schema({ ColumnSchema("key", INT32),
+ ColumnSchemaBuilder()
+ .type(DECIMAL128)
+ .array(true)
+ .name("arr_0"),
+ ColumnSchemaBuilder()
+ .type(DECIMAL128)
+ .array(true)
+ .name("arr_1")
+ .nullable(true),
+ ColumnSchemaBuilder()
+ .type(INT64)
+ .array(true)
+ .name("arr_2")
+ .nullable(true),
+ }, 1);
+
+ const auto s = CreateTable(kTableName, table_schema);
+ ASSERT_TRUE(s.IsNotSupported()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), "columns of 'decimal (128 bit) 1d-array'
type are "
+ "not yet supported (column 'arr_0')");
+ }
+}
+
TEST_F(MasterTest, ListTablesWithTableFilter) {
static const char* const kUserTableName = "user_table";
static const char* const kSystemTableName = "system_table";