This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 03d7cb4ea KUDU-1261 array columns aren't yet supported as key columns
03d7cb4ea is described below
commit 03d7cb4ea285efbaa942194670a72516e041bcf2
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Oct 15 20:06:54 2025 -0700
KUDU-1261 array columns aren't yet supported as key columns
This changelist adds a guardrail to explicitly disallow creating Kudu
tables with array key columns. This artificial restriction is added
to prevent unexpected behavior since a proper encoding for array column
cells isn't implemented yet. A new unit test to verify the expected
behavior is added as well.
As a bit of context, to properly encode an array cell, it's necessary
to make sure that:
* the contents of the memory buffer backing the cell is 'normalized',
i.e. the interpretation of null/non-valid elements of a particular
type is always the same when feeding data to the encoder
* run the encoder against the sub-range of the memory buffer that
contains array elements' data only, and not include metadata, etc.
Change-Id: If37a4dd2d1689aa51ab09e6cb71f01664dc2ee1a
Reviewed-on: http://gerrit.cloudera.org:8080/23552
Reviewed-by: Abhishek Chennaka <[email protected]>
Reviewed-by: Ashwani Raina <[email protected]>
Tested-by: Alexey Serbin <[email protected]>
---
src/kudu/common/key_encoder.cc | 13 +++++++--
src/kudu/master/catalog_manager.cc | 2 +-
src/kudu/master/master-test.cc | 56 +++++++++++++++++++++++++++++++++++++-
3 files changed, 67 insertions(+), 4 deletions(-)
diff --git a/src/kudu/common/key_encoder.cc b/src/kudu/common/key_encoder.cc
index a2053fb64..72aeb7f53 100644
--- a/src/kudu/common/key_encoder.cc
+++ b/src/kudu/common/key_encoder.cc
@@ -31,14 +31,14 @@ namespace kudu {
// A resolver for Encoders
template <typename Buffer>
-class EncoderResolver {
+class EncoderResolver final {
public:
const KeyEncoder<Buffer>& GetKeyEncoder(DataType t) {
DCHECK(HasKeyEncoderForType(t));
return *encoders_[t];
}
- bool HasKeyEncoderForType(DataType t) {
+ bool HasKeyEncoderForType(DataType t) const {
return t < encoders_.size() && encoders_[t];
}
@@ -79,6 +79,15 @@ const KeyEncoder<Buffer>& GetKeyEncoder(const TypeInfo*
typeinfo) {
// Returns true if the type is allowed in keys.
bool IsTypeAllowableInKey(const TypeInfo* typeinfo) {
+ if (typeinfo->nested_type_info()) {
+ // An artificial constraint: NESTED types (e.g., array) aren't yet
supported
+ // as primary key columns.
+ //
+ // TODO(aserbin): implement array-specific key encoding instead of just
+ // encoding raw data in the buffer that backs run-time
+ // in-memory representation of array cells
+ return false;
+ }
return Singleton<EncoderResolver<faststring>>::get()->HasKeyEncoderForType(
typeinfo->physical_type());
}
diff --git a/src/kudu/master/catalog_manager.cc
b/src/kudu/master/catalog_manager.cc
index 0aa083999..85f1cb761 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1871,7 +1871,7 @@ Status ValidateClientSchema(const optional<string>& name,
for (int i = 0; i < schema.num_key_columns(); i++) {
if (!IsTypeAllowableInKey(schema.column(i).type_info())) {
return Status::InvalidArgument(
- "key column may not have type of BOOL, FLOAT, or DOUBLE");
+ "key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED
(e.g., array)");
}
}
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index ab9893613..431a69e94 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -2055,7 +2055,61 @@ TEST_F(MasterTest, TestCreateTableInvalidKeyType) {
Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>());
ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
ASSERT_STR_CONTAINS(s.ToString(),
- "key column may not have type of BOOL, FLOAT, or DOUBLE");
+ "key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED");
+ }
+}
+
+// NESTED type columns (e.g., arrays) aren't yet supported in primary keys.
+TEST_F(MasterTest, CreateTableWithArrayKeyType) {
+ constexpr const char* const kTableName = "array_primary_key_column";
+ constexpr const char* const kErrMsg =
+ "key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED";
+
+ const Schema table_schemas[] = {
+ {
+ {
+ ColumnSchemaBuilder().type(INT32).array(true).name("key"),
+ }, 1
+ },
+ {
+ {
+ ColumnSchemaBuilder().type(STRING).array(true).name("key"),
+ ColumnSchemaBuilder().type(INT64).name("c1").nullable(true),
+ }, 1
+ },
+ {
+ {
+ ColumnSchemaBuilder().type(INT16).array(true).name("key"),
+ ColumnSchemaBuilder().type(INT8).name("c1").nullable(true),
+ }, 1
+ },
+ {
+ {
+ ColumnSchema("key", INT32),
+ ColumnSchemaBuilder().type(INT64).array(true).name("arr0"),
+ }, 2
+ },
+ {
+ {
+ ColumnSchemaBuilder().type(STRING).array(true).name("arr0"),
+ ColumnSchema("key", INT32),
+ ColumnSchemaBuilder().type(INT64).array(true).name("arr1"),
+ }, 3
+ },
+ {
+ {
+ ColumnSchemaBuilder().type(BOOL).array(true).name("arr0"),
+ ColumnSchema("key", BINARY),
+ }, 2
+ },
+ };
+
+ const vector<KuduPartialRow> split_rows{};
+ for (const auto& schema : table_schemas) {
+ SCOPED_TRACE(schema.ToString(Schema::BASE_INFO));
+ const auto s = CreateTable(kTableName, schema, split_rows);
+ ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+ ASSERT_STR_CONTAINS(s.ToString(), kErrMsg);
}
}