This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 03d7cb4ea KUDU-1261 array columns aren't yet supported as key columns
03d7cb4ea is described below

commit 03d7cb4ea285efbaa942194670a72516e041bcf2
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Oct 15 20:06:54 2025 -0700

    KUDU-1261 array columns aren't yet supported as key columns
    
    This changelist adds a guardrail to explicitly disallow creating Kudu
    tables with array key columns.  This artificial restriction is added
    to prevent unexpected behavior since a proper encoding for array column
    cells isn't implemented yet.  A new unit test to verify the expected
    behavior is added as well.
    
    As a bit of context, to properly encode an array cell, it's necessary
    to make sure that:
      * the contents of the memory buffer backing the cell is 'normalized',
        i.e. the interpretation of null/non-valid elements of a particular
        type is always the same when feeding data to the encoder
      * run the encoder against the sub-range of the memory buffer that
        contains array elements' data only, and not include metadata, etc.
    
    Change-Id: If37a4dd2d1689aa51ab09e6cb71f01664dc2ee1a
    Reviewed-on: http://gerrit.cloudera.org:8080/23552
    Reviewed-by: Abhishek Chennaka <[email protected]>
    Reviewed-by: Ashwani Raina <[email protected]>
    Tested-by: Alexey Serbin <[email protected]>
---
 src/kudu/common/key_encoder.cc     | 13 +++++++--
 src/kudu/master/catalog_manager.cc |  2 +-
 src/kudu/master/master-test.cc     | 56 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/src/kudu/common/key_encoder.cc b/src/kudu/common/key_encoder.cc
index a2053fb64..72aeb7f53 100644
--- a/src/kudu/common/key_encoder.cc
+++ b/src/kudu/common/key_encoder.cc
@@ -31,14 +31,14 @@ namespace kudu {
 
 // A resolver for Encoders
 template <typename Buffer>
-class EncoderResolver {
+class EncoderResolver final {
  public:
   const KeyEncoder<Buffer>& GetKeyEncoder(DataType t) {
     DCHECK(HasKeyEncoderForType(t));
     return *encoders_[t];
   }
 
-  bool HasKeyEncoderForType(DataType t) {
+  bool HasKeyEncoderForType(DataType t) const {
     return t < encoders_.size() && encoders_[t];
   }
 
@@ -79,6 +79,15 @@ const KeyEncoder<Buffer>& GetKeyEncoder(const TypeInfo* 
typeinfo) {
 
 // Returns true if the type is allowed in keys.
 bool IsTypeAllowableInKey(const TypeInfo* typeinfo) {
+  if (typeinfo->nested_type_info()) {
+    // An artificial constraint: NESTED types (e.g., array) aren't yet 
supported
+    // as primary key columns.
+    //
+    // TODO(aserbin): implement array-specific key encoding instead of just
+    //                encoding raw data in the buffer that backs run-time
+    //                in-memory representation of array cells
+    return false;
+  }
   return Singleton<EncoderResolver<faststring>>::get()->HasKeyEncoderForType(
       typeinfo->physical_type());
 }
diff --git a/src/kudu/master/catalog_manager.cc 
b/src/kudu/master/catalog_manager.cc
index 0aa083999..85f1cb761 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1871,7 +1871,7 @@ Status ValidateClientSchema(const optional<string>& name,
   for (int i = 0; i < schema.num_key_columns(); i++) {
     if (!IsTypeAllowableInKey(schema.column(i).type_info())) {
       return Status::InvalidArgument(
-          "key column may not have type of BOOL, FLOAT, or DOUBLE");
+          "key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED 
(e.g., array)");
     }
   }
 
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index ab9893613..431a69e94 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -2055,7 +2055,61 @@ TEST_F(MasterTest, TestCreateTableInvalidKeyType) {
     Status s = CreateTable(kTableName, kTableSchema, vector<KuduPartialRow>());
     ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
     ASSERT_STR_CONTAINS(s.ToString(),
-        "key column may not have type of BOOL, FLOAT, or DOUBLE");
+        "key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED");
+  }
+}
+
+// NESTED type columns (e.g., arrays) aren't yet supported in primary keys.
+TEST_F(MasterTest, CreateTableWithArrayKeyType) {
+  constexpr const char* const kTableName = "array_primary_key_column";
+  constexpr const char* const kErrMsg =
+      "key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED";
+
+  const Schema table_schemas[] = {
+    {
+      {
+        ColumnSchemaBuilder().type(INT32).array(true).name("key"),
+      }, 1
+    },
+    {
+      {
+        ColumnSchemaBuilder().type(STRING).array(true).name("key"),
+        ColumnSchemaBuilder().type(INT64).name("c1").nullable(true),
+      }, 1
+    },
+    {
+      {
+        ColumnSchemaBuilder().type(INT16).array(true).name("key"),
+        ColumnSchemaBuilder().type(INT8).name("c1").nullable(true),
+      }, 1
+    },
+    {
+      {
+        ColumnSchema("key", INT32),
+        ColumnSchemaBuilder().type(INT64).array(true).name("arr0"),
+      }, 2
+    },
+    {
+      {
+        ColumnSchemaBuilder().type(STRING).array(true).name("arr0"),
+        ColumnSchema("key", INT32),
+        ColumnSchemaBuilder().type(INT64).array(true).name("arr1"),
+      }, 3
+    },
+    {
+      {
+        ColumnSchemaBuilder().type(BOOL).array(true).name("arr0"),
+        ColumnSchema("key", BINARY),
+      }, 2
+    },
+  };
+
+  const vector<KuduPartialRow> split_rows{};
+  for (const auto& schema : table_schemas) {
+    SCOPED_TRACE(schema.ToString(Schema::BASE_INFO));
+    const auto s = CreateTable(kTableName, schema, split_rows);
+    ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+    ASSERT_STR_CONTAINS(s.ToString(), kErrMsg);
   }
 }
 

Reply via email to