This is an automated email from the ASF dual-hosted git repository.
ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new eb83b389dd HIVE-26507 (addendum): Do not allow hive to iceberg
migration if source table contains CHAR or VARCHAR columns (#3593)
eb83b389dd is described below
commit eb83b389dddacf2543caf205400dca1f11070def
Author: László Pintér <[email protected]>
AuthorDate: Tue Sep 13 20:07:51 2022 +0200
HIVE-26507 (addendum): Do not allow hive to iceberg migration if source
table contains CHAR or VARCHAR columns (#3593)
---
.../apache/iceberg/hive/HiveSchemaConverter.java | 8 ++-
.../iceberg/mr/hive/HiveIcebergMetaHook.java | 44 +++++++++++++++++
.../iceberg/mr/hive/TestHiveIcebergCTAS.java | 22 +++++----
.../iceberg/mr/hive/TestHiveIcebergMigration.java | 57 +++++++++++++++++++++-
.../hive/TestHiveIcebergStorageHandlerNoScan.java | 17 ++-----
5 files changed, 121 insertions(+), 27 deletions(-)
diff --git
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
index 49a4a401fe..e011abb7be 100644
---
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
+++
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
@@ -99,8 +99,12 @@ class HiveSchemaConverter {
return Types.BinaryType.get();
case CHAR:
case VARCHAR:
- throw new IllegalArgumentException("Unsupported Hive type (" +
- ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() + ") for
Iceberg tables.");
+ Preconditions.checkArgument(autoConvert, "Unsupported Hive type
%s, use string " +
+ "instead or enable automatic type conversion, set
'iceberg.mr.schema.auto.conversion' to true",
+ ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+
+ LOG.debug("Using auto conversion from CHAR/VARCHAR to STRING");
+ return Types.StringType.get();
case STRING:
return Types.StringType.get();
case TIMESTAMP:
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 29e0565239..c22ac6db11 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -30,6 +30,7 @@ import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
+import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -52,6 +53,12 @@ import org.apache.hadoop.hive.ql.parse.PartitionTransform;
import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionStateUtil;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.iceberg.BaseMetastoreTableOperations;
@@ -412,8 +419,45 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
if (!hasCorrectFileFormat) {
throw new MetaException("Cannot convert hive table to iceberg with input
format: " + sd.getInputFormat());
}
+
+ List<TypeInfo> typeInfos =
+ hmsTable.getSd().getCols().stream().map(f ->
TypeInfoUtils.getTypeInfoFromTypeString(f.getType()))
+ .collect(Collectors.toList());
+ for (TypeInfo typeInfo : typeInfos) {
+ validateColumnType(typeInfo);
+ }
+ }
+
+ private void validateColumnType(TypeInfo typeInfo) throws MetaException {
+ switch (typeInfo.getCategory()) {
+ case PRIMITIVE:
+ PrimitiveObjectInspector.PrimitiveCategory primitiveCategory =
+ ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
+ if
(primitiveCategory.equals(PrimitiveObjectInspector.PrimitiveCategory.CHAR) ||
primitiveCategory.equals(
+ PrimitiveObjectInspector.PrimitiveCategory.VARCHAR)) {
+ throw new MetaException(String.format(
+ "Cannot convert hive table to iceberg that contains column type
%s. " + "Use string type columns instead",
+ primitiveCategory));
+ }
+ break;
+ case STRUCT:
+ List<TypeInfo> structTypeInfos = ((StructTypeInfo)
typeInfo).getAllStructFieldTypeInfos();
+ for (TypeInfo structTypeInfo : structTypeInfos) {
+ validateColumnType(structTypeInfo);
+ }
+ break;
+ case LIST:
+ validateColumnType(((ListTypeInfo) typeInfo).getListElementTypeInfo());
+ break;
+ case MAP:
+ MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+ validateColumnType(mapTypeInfo.getMapKeyTypeInfo());
+ validateColumnType(mapTypeInfo.getMapValueTypeInfo());
+ break;
+ }
}
+
@Override
public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table
hmsTable, EnvironmentContext context)
throws MetaException {
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
index 7c338e9b61..8ecb9e31bd 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
@@ -257,7 +257,9 @@ public class TestHiveIcebergCTAS extends
HiveIcebergStorageHandlerWithEngineBase
Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType ==
TestTables.TestTableType.HIVE_CATALOG);
Map<String, Type> notSupportedTypes = ImmutableMap.of(
"TINYINT", Types.IntegerType.get(),
- "SMALLINT", Types.IntegerType.get());
+ "SMALLINT", Types.IntegerType.get(),
+ "VARCHAR(1)", Types.StringType.get(),
+ "CHAR(1)", Types.StringType.get());
shell.setHiveSessionValue(InputFormatConfig.SCHEMA_AUTO_CONVERSION,
"true");
@@ -290,19 +292,19 @@ public class TestHiveIcebergCTAS extends
HiveIcebergStorageHandlerWithEngineBase
"boolean_col_5 BOOLEAN, " +
"float_col_6 FLOAT, " +
"bigint_col_7 BIGINT, " +
- "string0098_col_8 STRING, " +
+ "varchar0098_col_8 VARCHAR(98), " +
"timestamp_col_9 TIMESTAMP, " +
"bigint_col_10 BIGINT, " +
"decimal0903_col_11 DECIMAL(9, 3), " +
"timestamp_col_12 TIMESTAMP, " +
"timestamp_col_13 TIMESTAMP, " +
"float_col_14 FLOAT, " +
- "string0254_col_15 STRING, " +
+ "char0254_col_15 CHAR(254), " +
"double_col_16 DOUBLE, " +
"timestamp_col_17 TIMESTAMP, " +
"boolean_col_18 BOOLEAN, " +
"decimal2608_col_19 DECIMAL(26, 8), " +
- "string0216_col_20 STRING, " +
+ "varchar0216_col_20 VARCHAR(216), " +
"string_col_21 STRING, " +
"bigint_col_22 BIGINT, " +
"boolean_col_23 BOOLEAN, " +
@@ -315,7 +317,7 @@ public class TestHiveIcebergCTAS extends
HiveIcebergStorageHandlerWithEngineBase
"decimal2020_col_30 DECIMAL(20, 20), " +
"boolean_col_31 BOOLEAN, " +
"double_col_32 DOUBLE, " +
- "string0148_col_33 STRING, " +
+ "varchar0148_col_33 VARCHAR(148), " +
"decimal2121_col_34 DECIMAL(21, 21), " +
"tinyint_col_35 TINYINT, " +
"boolean_col_36 BOOLEAN, " +
@@ -326,7 +328,7 @@ public class TestHiveIcebergCTAS extends
HiveIcebergStorageHandlerWithEngineBase
"decimal1408_col_41 DECIMAL(14, 8), " +
"string_col_42 STRING, " +
"decimal0902_col_43 DECIMAL(9, 2), " +
- "string0204_col_44 STRING, " +
+ "varchar0204_col_44 VARCHAR(204), " +
"boolean_col_45 BOOLEAN, " +
"timestamp_col_46 TIMESTAMP, " +
"boolean_col_47 BOOLEAN, " +
@@ -339,12 +341,12 @@ public class TestHiveIcebergCTAS extends
HiveIcebergStorageHandlerWithEngineBase
"timestamp_col_54 TIMESTAMP, " +
"int_col_55 INT, " +
"decimal0505_col_56 DECIMAL(5, 5), " +
- "string0155_col_57 STRING, " +
+ "char0155_col_57 CHAR(155), " +
"boolean_col_58 BOOLEAN, " +
"bigint_col_59 BIGINT, " +
"boolean_col_60 BOOLEAN, " +
"boolean_col_61 BOOLEAN, " +
- "string0249_col_62 STRING, " +
+ "char0249_col_62 CHAR(249), " +
"boolean_col_63 BOOLEAN, " +
"timestamp_col_64 TIMESTAMP, " +
"decimal1309_col_65 DECIMAL(13, 9), " +
@@ -356,12 +358,12 @@ public class TestHiveIcebergCTAS extends
HiveIcebergStorageHandlerWithEngineBase
"timestamp_col_71 TIMESTAMP, " +
"double_col_72 DOUBLE, " +
"boolean_col_73 BOOLEAN, " +
- "string0222_col_74 STRING, " +
+ "char0222_col_74 CHAR(222), " +
"float_col_75 FLOAT, " +
"string_col_76 STRING, " +
"decimal2612_col_77 DECIMAL(26, 12), " +
"timestamp_col_78 TIMESTAMP, " +
- "string0128_col_79 STRING, " +
+ "char0128_col_79 CHAR(128), " +
"timestamp_col_80 TIMESTAMP, " +
"double_col_81 DOUBLE, " +
"timestamp_col_82 TIMESTAMP, " +
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
index 5a68400475..d8380892a7 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
@@ -33,6 +33,7 @@ import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.mr.InputFormatConfig;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.thrift.TException;
@@ -50,6 +51,57 @@ import org.mockito.Mockito;
*/
public class TestHiveIcebergMigration extends
HiveIcebergStorageHandlerWithEngineBase {
+ @Test
+ public void testMigrateHiveTableWithPrimitiveTypeColumnsToIceberg() throws
TException, InterruptedException {
+ shell.setHiveSessionValue(InputFormatConfig.SCHEMA_AUTO_CONVERSION,
"true");
+ TableIdentifier identifier = TableIdentifier.of("default", "tbl_alltypes");
+ shell.executeStatement(String.format("CREATE EXTERNAL TABLE %s (" +
+ "a INT, " +
+ "decimal_col DECIMAL(30, 3), " +
+ "tinyint_col TINYINT, " +
+ "boolean_col BOOLEAN, " +
+ "float_col FLOAT, " +
+ "bigint_col BIGINT, " +
+ "double_col DOUBLE, " +
+ "string_col STRING, " +
+ "int_col INT, " +
+ "smallint_col SMALLINT) " +
+ "STORED AS %s %s %s",
+ identifier.name(), fileFormat.name(),
testTables.locationForCreateTableSQL(identifier),
+ testTables.propertiesForCreateTableSQL(ImmutableMap.of())));
+
+ shell.executeStatement(String.format("INSERT INTO TABLE %s VALUES(" +
+ "1, " +
+ "13.234, " +
+ "8, " +
+ "false, " +
+ "0.7896, " +
+ "543643275, " +
+ "5462435243, " +
+ "'wfewjifwejfoewfnvewokfow', " +
+ "43221, " +
+ "129 " +
+ ")", identifier.name()));
+
+ validateMigration(identifier.name());
+ }
+
+ @Test
+ public void
testMigrateHiveTableWithUnsupportedPrimitiveTypeColumnToIceberg() {
+ // enough to test once
+ Assume.assumeTrue(fileFormat == FileFormat.ORC && isVectorized &&
+ testTableType == TestTables.TestTableType.HIVE_CATALOG);
+ TableIdentifier identifier = TableIdentifier.of("default",
"tbl_unsupportedtypes");
+ shell.executeStatement(String.format("CREATE EXTERNAL TABLE %s (" +
+ "char_col CHAR(10)) STORED AS %s %s %s", identifier.name(),
fileFormat.name(),
+ testTables.locationForCreateTableSQL(identifier),
testTables.propertiesForCreateTableSQL(ImmutableMap.of())));
+ AssertHelpers.assertThrows("should throw exception",
IllegalArgumentException.class,
+ "Cannot convert hive table to iceberg that", () -> {
+ shell.executeStatement(String.format("ALTER TABLE %s SET
TBLPROPERTIES " +
+
"('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler')",
identifier.name()));
+ });
+ }
+
@Test
public void testMigrateHiveTableWithComplexTypeColumnsToIceberg() throws
TException, InterruptedException {
TableIdentifier identifier = TableIdentifier.of("default", "tbl_complex");
@@ -221,7 +273,10 @@ public class TestHiveIcebergMigration extends
HiveIcebergStorageHandlerWithEngin
List<Object[]> alterResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
Assert.assertEquals(originalResult.size(), alterResult.size());
for (int i = 0; i < originalResult.size(); i++) {
- Assert.assertTrue(Arrays.equals(originalResult.get(i),
alterResult.get(i)));
+ Assert.assertEquals(originalResult.get(i).length,
alterResult.get(i).length);
+ for (int j = 0; j < originalResult.get(i).length; j++) {
+ Assert.assertEquals(String.valueOf(originalResult.get(i)[j]),
String.valueOf(alterResult.get(i)[j]));
+ }
}
Table hmsTable = shell.metastore().getTable("default", tableName);
validateSd(hmsTable, "iceberg");
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index 2a617e88fe..dd76545aac 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -781,7 +781,9 @@ public class TestHiveIcebergStorageHandlerNoScan {
// Can not create INTERVAL types from normal create table, so leave them
out from this test
Map<String, Type> notSupportedTypes = ImmutableMap.of(
"TINYINT", Types.IntegerType.get(),
- "SMALLINT", Types.IntegerType.get());
+ "SMALLINT", Types.IntegerType.get(),
+ "VARCHAR(1)", Types.StringType.get(),
+ "CHAR(1)", Types.StringType.get());
shell.setHiveSessionValue(InputFormatConfig.SCHEMA_AUTO_CONVERSION,
"true");
@@ -796,19 +798,6 @@ public class TestHiveIcebergStorageHandlerNoScan {
Assert.assertEquals(notSupportedTypes.get(notSupportedType),
icebergTable.schema().columns().get(0).type());
shell.executeStatement("DROP TABLE not_supported_types");
}
-
- List<String> notCompatibleTypes = ImmutableList.of("VARCHAR(1)",
"CHAR(1)");
-
- for (String notCompatibleType : notCompatibleTypes) {
- AssertHelpers.assertThrows("should throw exception",
IllegalArgumentException.class,
- "Unsupported Hive type", () -> {
- shell.executeStatement("CREATE EXTERNAL TABLE not_compatible_types
(not_supported " + notCompatibleType +
- ") STORED BY ICEBERG " +
- testTables.locationForCreateTableSQL(identifier) +
- testTables.propertiesForCreateTableSQL(
- ImmutableMap.of(InputFormatConfig.EXTERNAL_TABLE_PURGE,
"TRUE")));
- });
- }
}
@Test