This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new f8ab470c7 KUDU-1261 Add HMS integration for array datatype
f8ab470c7 is described below
commit f8ab470c7f6e4217b5a148149c59bfb99b9ea542
Author: Marton Greber <[email protected]>
AuthorDate: Thu Oct 9 15:24:51 2025 +0200
KUDU-1261 Add HMS integration for array datatype
This patch adds the necessary array type information to the
Kudu HMS integration.
Change-Id: Iab1d4c0920043c213757d35ea7bf721a06e8cf44
Reviewed-on: http://gerrit.cloudera.org:8080/23521
Reviewed-by: Alexey Serbin <[email protected]>
Tested-by: Alexey Serbin <[email protected]>
---
src/kudu/hms/hms_catalog-test.cc | 82 ++++++++++++++++++++++++++++++++++++++++
src/kudu/hms/hms_catalog.cc | 33 ++++++++++++----
2 files changed, 107 insertions(+), 8 deletions(-)
diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc
index 3b28052f9..b29bf79d2 100644
--- a/src/kudu/hms/hms_catalog-test.cc
+++ b/src/kudu/hms/hms_catalog-test.cc
@@ -530,5 +530,87 @@ TEST_F(HmsCatalogTest, TestMetastoreUuid) {
});
}
+TEST_F(HmsCatalogTest, TestArrayTypes) {
+ const string kTableId = "test-array-table-id";
+ const string kClusterId = "test-cluster-id";
+ const string kComment = "array types table";
+
+ SchemaBuilder b;
+ ASSERT_OK(b.AddKeyColumn("key", DataType::INT32));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("int32_array")
+ .type(INT32)
+ .array(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("int64_array")
+ .type(INT64)
+ .array(true)
+ .nullable(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("string_array")
+ .type(STRING)
+ .array(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("bool_array")
+ .type(BOOL)
+ .array(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("float_array")
+ .type(FLOAT)
+ .array(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("double_array")
+ .type(DOUBLE)
+ .array(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("decimal_array")
+ .type(DECIMAL64)
+ .type_attributes(ColumnTypeAttributes(18, 10))
+ .array(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("varchar_array")
+ .type(VARCHAR)
+ .type_attributes(ColumnTypeAttributes(50))
+ .array(true)));
+ ASSERT_OK(b.AddColumn(ColumnSchemaBuilder()
+ .name("date_array")
+ .type(DATE)
+ .array(true)));
+ Schema schema = b.Build();
+
+ const string table_name = "default.array_table";
+ ASSERT_OK(hms_catalog_->CreateTable(kTableId, table_name, kClusterId,
+ nullopt, schema, kComment));
+
+ hive::Table table;
+ ASSERT_OK(hms_client_->GetTable("default", "array_table", &table));
+ ASSERT_EQ(table.parameters[HmsClient::kKuduTableIdKey], kTableId);
+ ASSERT_EQ(table.parameters[HmsClient::kKuduClusterIdKey], kClusterId);
+ ASSERT_EQ(table.parameters[HmsClient::kStorageHandlerKey],
HmsClient::kKuduStorageHandler);
+ ASSERT_EQ(table.parameters[HmsClient::kTableCommentKey], kComment);
+
+ ASSERT_EQ(schema.num_columns(), table.sd.cols.size());
+ EXPECT_EQ("key", table.sd.cols[0].name);
+ EXPECT_EQ("int", table.sd.cols[0].type);
+ EXPECT_EQ("int32_array", table.sd.cols[1].name);
+ EXPECT_EQ("array<int>", table.sd.cols[1].type);
+ EXPECT_EQ("int64_array", table.sd.cols[2].name);
+ EXPECT_EQ("array<bigint>", table.sd.cols[2].type);
+ EXPECT_EQ("string_array", table.sd.cols[3].name);
+ EXPECT_EQ("array<string>", table.sd.cols[3].type);
+ EXPECT_EQ("bool_array", table.sd.cols[4].name);
+ EXPECT_EQ("array<boolean>", table.sd.cols[4].type);
+ EXPECT_EQ("float_array", table.sd.cols[5].name);
+ EXPECT_EQ("array<float>", table.sd.cols[5].type);
+ EXPECT_EQ("double_array", table.sd.cols[6].name);
+ EXPECT_EQ("array<double>", table.sd.cols[6].type);
+ EXPECT_EQ("decimal_array", table.sd.cols[7].name);
+ EXPECT_EQ("array<decimal(18,10)>", table.sd.cols[7].type);
+ EXPECT_EQ("varchar_array", table.sd.cols[8].name);
+ EXPECT_EQ("array<varchar(50)>", table.sd.cols[8].type);
+ EXPECT_EQ("date_array", table.sd.cols[9].name);
+ EXPECT_EQ("array<date>", table.sd.cols[9].type);
+}
+
} // namespace hms
} // namespace kudu
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index 38821675a..058c01270 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -326,9 +326,8 @@ Status HmsCatalog::GetUuid(string* uuid) {
namespace {
-string column_to_field_type(const ColumnSchema& column) {
- // See org.apache.hadoop.hive.serde.serdeConstants.
- switch (column.type_info()->type()) {
+string element_type_to_field_type(DataType type, const ColumnTypeAttributes&
type_attrs) {
+ switch (type) {
case BOOL: return "boolean";
case INT8: return "tinyint";
case INT16: return "smallint";
@@ -337,21 +336,39 @@ string column_to_field_type(const ColumnSchema& column) {
case DECIMAL32:
case DECIMAL64:
case DECIMAL128: return Substitute("decimal($0,$1)",
- column.type_attributes().precision,
- column.type_attributes().scale);
+ type_attrs.precision,
+ type_attrs.scale);
case FLOAT: return "float";
case DOUBLE: return "double";
case STRING: return "string";
case BINARY: return "binary";
- case VARCHAR: return Substitute("varchar($0)",
- column.type_attributes().length);
+ case VARCHAR: return Substitute("varchar($0)", type_attrs.length);
case UNIXTIME_MICROS: return "timestamp";
case DATE: return "date";
- default: LOG(FATAL) << "unhandled column type: " << column.TypeToString();
+ default: LOG(FATAL) << "unhandled element type: " << DataType_Name(type);
}
__builtin_unreachable();
}
+string column_to_field_type(const ColumnSchema& column) {
+ // See org.apache.hadoop.hive.serde.serdeConstants.
+ const auto* type_info = column.type_info();
+ const DataType type = type_info->type();
+
+ if (type == NESTED) {
+ const TypeInfo* elem_type_info = GetArrayElementTypeInfo(*type_info);
+ DCHECK(elem_type_info);
+ const DataType elem_type = elem_type_info->type();
+ // Kudu only supports 1D arrays of scalar types currently.
+ DCHECK_NE(elem_type, NESTED) << "nested arrays are not supported";
+ const string elem_type_str = element_type_to_field_type(
+ elem_type, column.type_attributes());
+ return Substitute("array<$0>", elem_type_str);
+ }
+
+ return element_type_to_field_type(type, column.type_attributes());
+}
+
hive::FieldSchema column_to_field(const ColumnSchema& column) {
hive::FieldSchema field;
field.name = column.name();