This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 7b5b2dfbae [#4874] improvement(hive): add validation for Hive struct
data type (#8212)
7b5b2dfbae is described below
commit 7b5b2dfbae6a8fe097c7240b8f2ed84b2bc0c512
Author: mchades <[email protected]>
AuthorDate: Tue Aug 26 11:34:18 2025 +0800
[#4874] improvement(hive): add validation for Hive struct data type (#8212)
### What changes were proposed in this pull request?
add validation for Hive struct data type
### Why are the changes needed?
Fix: #4874
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
tests added
---
.../catalog/hive/HiveCatalogOperations.java | 2 ++
.../gravitino/catalog/hive/TestHiveTable.java | 26 ++++++++++++++++++++++
.../hive/converter/HiveDataTypeConverter.java | 13 ++++++++++-
docs/apache-hive-catalog.md | 3 ++-
4 files changed, 42 insertions(+), 2 deletions(-)
diff --git
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveCatalogOperations.java
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveCatalogOperations.java
index 43842677c8..482dbc67da 100644
---
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveCatalogOperations.java
+++
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveCatalogOperations.java
@@ -817,6 +817,8 @@ public class HiveCatalogOperations implements
CatalogOperations, SupportsSchemas
} catch (TException | InterruptedException e) {
throw new RuntimeException(
"Failed to create Hive table " + tableIdent.name() + " in Hive
Metastore", e);
+ } catch (RuntimeException e) {
+ throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
diff --git
a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/TestHiveTable.java
b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/TestHiveTable.java
index cd143b1e8e..5a2f574fd7 100644
---
a/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/TestHiveTable.java
+++
b/catalogs/catalog-hive/src/test/java/org/apache/gravitino/catalog/hive/TestHiveTable.java
@@ -218,6 +218,32 @@ public class TestHiveTable extends
MiniHiveMetastoreService {
distribution,
sortOrders));
Assertions.assertTrue(exception.getMessage().contains("Table already
exists"));
+
+ // Test struct field with comment
+ HiveColumn structCol =
+ HiveColumn.builder()
+ .withName("struct_col")
+ .withType(
+ Types.StructType.of(
+ Types.StructType.Field.of(
+ "field1", Types.StringType.get(), true, "field
comment")))
+ .build();
+ Column[] illegalColumns = new Column[] {structCol};
+ exception =
+ Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () ->
+ tableCatalog.createTable(
+ NameIdentifier.of(
+ META_LAKE_NAME, hiveCatalog.name(), hiveSchema.name(),
genRandomName()),
+ illegalColumns,
+ HIVE_COMMENT,
+ properties,
+ new Transform[0],
+ distribution,
+ sortOrders));
+ Assertions.assertEquals(
+ "Hive does not support comments in struct fields: field1",
exception.getMessage());
}
@Test
diff --git
a/catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/converter/HiveDataTypeConverter.java
b/catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/converter/HiveDataTypeConverter.java
index bde5b6e134..6295618549 100644
---
a/catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/converter/HiveDataTypeConverter.java
+++
b/catalogs/hive-metastore-common/src/main/java/org/apache/gravitino/hive/converter/HiveDataTypeConverter.java
@@ -41,6 +41,7 @@ import static
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getUnionTyp
import static
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getVarcharTypeInfo;
import static
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString;
+import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -116,7 +117,17 @@ public class HiveDataTypeConverter implements
DataTypeConverter<TypeInfo, String
Types.StructType structType = (Types.StructType) type;
List<TypeInfo> typeInfos =
Arrays.stream(structType.fields())
- .map(t -> fromGravitino(t.type()))
+ .map(
+ t -> {
+ // Hive does not support comments in struct fields. This
is no way to preserve
+ // comments in struct fields when converting from
Gravitino to Hive.
+ // See: https://issues.apache.org/jira/browse/HIVE-26593
+ Preconditions.checkArgument(
+ t.comment() == null,
+ "Hive does not support comments in struct fields:
%s",
+ t.name());
+ return fromGravitino(t.type());
+ })
.collect(Collectors.toList());
List<String> names =
Arrays.stream(structType.fields())
diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md
index d9839e1e8a..3ea3f10e56 100644
--- a/docs/apache-hive-catalog.md
+++ b/docs/apache-hive-catalog.md
@@ -132,7 +132,8 @@ The following table lists the data types mapped from the
Hive catalog to Graviti
| `uniontype` | `union` | 0.2.0 |
:::info
-Since 0.6.0-incubating, the data types other than listed above are mapped to
Gravitino **[External
Type](./manage-relational-metadata-using-gravitino.md#external-type)** that
represents an unresolvable data type from the Hive catalog.
+1. Since 0.6.0-incubating, the data types other than listed above are mapped
to Gravitino **[External
Type](./manage-relational-metadata-using-gravitino.md#external-type)** that
represents an unresolvable data type from the Hive catalog.
+2. Since version 1.0.0, using the `struct` data type with field comments will
throw an error, as it does not work for Hive tables (see
[HIVE-26593](https://issues.apache.org/jira/browse/HIVE-26593)).
:::
### Table properties