This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new f8ab470c7 KUDU-1261 Add HMS integration for array datatype
f8ab470c7 is described below

commit f8ab470c7f6e4217b5a148149c59bfb99b9ea542
Author: Marton Greber <[email protected]>
AuthorDate: Thu Oct 9 15:24:51 2025 +0200

    KUDU-1261 Add HMS integration for array datatype
    
    This patch adds the necessary array type information to the
    Kudu HMS integration.
    
    Change-Id: Iab1d4c0920043c213757d35ea7bf721a06e8cf44
    Reviewed-on: http://gerrit.cloudera.org:8080/23521
    Reviewed-by: Alexey Serbin <[email protected]>
    Tested-by: Alexey Serbin <[email protected]>
---
 src/kudu/hms/hms_catalog-test.cc | 82 ++++++++++++++++++++++++++++++++++++++++
 src/kudu/hms/hms_catalog.cc      | 33 ++++++++++++----
 2 files changed, 107 insertions(+), 8 deletions(-)

diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc
index 3b28052f9..b29bf79d2 100644
--- a/src/kudu/hms/hms_catalog-test.cc
+++ b/src/kudu/hms/hms_catalog-test.cc
@@ -530,5 +530,87 @@ TEST_F(HmsCatalogTest, TestMetastoreUuid) {
   });
 }
 
+TEST_F(HmsCatalogTest, TestArrayTypes) {
+  const string kTableId = "test-array-table-id";
+  const string kClusterId = "test-cluster-id";
+  const string kComment = "array types table";
+
+  SchemaBuilder b;
+  ASSERT_OK(b.AddKeyColumn("key", DataType::INT32));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("int32_array")
+                            .type(INT32)
+                            .array(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("int64_array")
+                            .type(INT64)
+                            .array(true)
+                            .nullable(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("string_array")
+                            .type(STRING)
+                            .array(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("bool_array")
+                            .type(BOOL)
+                            .array(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("float_array")
+                            .type(FLOAT)
+                            .array(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("double_array")
+                            .type(DOUBLE)
+                            .array(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("decimal_array")
+                            .type(DECIMAL64)
+                            .type_attributes(ColumnTypeAttributes(18, 10))
+                            .array(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("varchar_array")
+                            .type(VARCHAR)
+                            .type_attributes(ColumnTypeAttributes(50))
+                            .array(true)));
+  ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+                            .name("date_array")
+                            .type(DATE)
+                            .array(true)));
+  Schema schema = b.Build();
+
+  const string table_name = "default.array_table";
+  ASSERT_OK(hms_catalog_->CreateTable(kTableId, table_name, kClusterId,
+                                      nullopt, schema, kComment));
+
+  hive::Table table;
+  ASSERT_OK(hms_client_->GetTable("default", "array_table", &table));
+  ASSERT_EQ(table.parameters[HmsClient::kKuduTableIdKey], kTableId);
+  ASSERT_EQ(table.parameters[HmsClient::kKuduClusterIdKey], kClusterId);
+  ASSERT_EQ(table.parameters[HmsClient::kStorageHandlerKey], 
HmsClient::kKuduStorageHandler);
+  ASSERT_EQ(table.parameters[HmsClient::kTableCommentKey], kComment);
+
+  ASSERT_EQ(schema.num_columns(), table.sd.cols.size());
+  EXPECT_EQ("key", table.sd.cols[0].name);
+  EXPECT_EQ("int", table.sd.cols[0].type);
+  EXPECT_EQ("int32_array", table.sd.cols[1].name);
+  EXPECT_EQ("array<int>", table.sd.cols[1].type);
+  EXPECT_EQ("int64_array", table.sd.cols[2].name);
+  EXPECT_EQ("array<bigint>", table.sd.cols[2].type);
+  EXPECT_EQ("string_array", table.sd.cols[3].name);
+  EXPECT_EQ("array<string>", table.sd.cols[3].type);
+  EXPECT_EQ("bool_array", table.sd.cols[4].name);
+  EXPECT_EQ("array<boolean>", table.sd.cols[4].type);
+  EXPECT_EQ("float_array", table.sd.cols[5].name);
+  EXPECT_EQ("array<float>", table.sd.cols[5].type);
+  EXPECT_EQ("double_array", table.sd.cols[6].name);
+  EXPECT_EQ("array<double>", table.sd.cols[6].type);
+  EXPECT_EQ("decimal_array", table.sd.cols[7].name);
+  EXPECT_EQ("array<decimal(18,10)>", table.sd.cols[7].type);
+  EXPECT_EQ("varchar_array", table.sd.cols[8].name);
+  EXPECT_EQ("array<varchar(50)>", table.sd.cols[8].type);
+  EXPECT_EQ("date_array", table.sd.cols[9].name);
+  EXPECT_EQ("array<date>", table.sd.cols[9].type);
+}
+
 } // namespace hms
 } // namespace kudu
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index 38821675a..058c01270 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -326,9 +326,8 @@ Status HmsCatalog::GetUuid(string* uuid) {
 
 namespace {
 
-string column_to_field_type(const ColumnSchema& column) {
-  // See org.apache.hadoop.hive.serde.serdeConstants.
-  switch (column.type_info()->type()) {
+string element_type_to_field_type(DataType type, const ColumnTypeAttributes& 
type_attrs) {
+  switch (type) {
     case BOOL: return "boolean";
     case INT8: return "tinyint";
     case INT16: return "smallint";
@@ -337,21 +336,39 @@ string column_to_field_type(const ColumnSchema& column) {
     case DECIMAL32:
     case DECIMAL64:
     case DECIMAL128: return Substitute("decimal($0,$1)",
-                                       column.type_attributes().precision,
-                                       column.type_attributes().scale);
+                                       type_attrs.precision,
+                                       type_attrs.scale);
     case FLOAT: return "float";
     case DOUBLE: return "double";
     case STRING: return "string";
     case BINARY: return "binary";
-    case VARCHAR: return Substitute("varchar($0)",
-                                    column.type_attributes().length);
+    case VARCHAR: return Substitute("varchar($0)", type_attrs.length);
     case UNIXTIME_MICROS: return "timestamp";
     case DATE: return "date";
-    default: LOG(FATAL) << "unhandled column type: " << column.TypeToString();
+    default: LOG(FATAL) << "unhandled element type: " << DataType_Name(type);
   }
   __builtin_unreachable();
 }
 
+string column_to_field_type(const ColumnSchema& column) {
+  // See org.apache.hadoop.hive.serde.serdeConstants.
+  const auto* type_info = column.type_info();
+  const DataType type = type_info->type();
+
+  if (type == NESTED) {
+    const TypeInfo* elem_type_info = GetArrayElementTypeInfo(*type_info);
+    DCHECK(elem_type_info);
+    const DataType elem_type = elem_type_info->type();
+    // Kudu only supports 1D arrays of scalar types currently.
+    DCHECK_NE(elem_type, NESTED) << "nested arrays are not supported";
+    const string elem_type_str = element_type_to_field_type(
+        elem_type, column.type_attributes());
+    return Substitute("array<$0>", elem_type_str);
+  }
+
+  return element_type_to_field_type(type, column.type_attributes());
+}
+
 hive::FieldSchema column_to_field(const ColumnSchema& column) {
   hive::FieldSchema field;
   field.name = column.name();

Reply via email to