This is an automated email from the ASF dual-hosted git repository.

ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new eb83b389dd HIVE-26507 (addendum): Do not allow hive to iceberg 
migration if source table contains CHAR or VARCHAR columns (#3593)
eb83b389dd is described below

commit eb83b389dddacf2543caf205400dca1f11070def
Author: László Pintér <[email protected]>
AuthorDate: Tue Sep 13 20:07:51 2022 +0200

    HIVE-26507 (addendum): Do not allow hive to iceberg migration if source 
table contains CHAR or VARCHAR columns (#3593)
---
 .../apache/iceberg/hive/HiveSchemaConverter.java   |  8 ++-
 .../iceberg/mr/hive/HiveIcebergMetaHook.java       | 44 +++++++++++++++++
 .../iceberg/mr/hive/TestHiveIcebergCTAS.java       | 22 +++++----
 .../iceberg/mr/hive/TestHiveIcebergMigration.java  | 57 +++++++++++++++++++++-
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  | 17 ++-----
 5 files changed, 121 insertions(+), 27 deletions(-)

diff --git 
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
 
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
index 49a4a401fe..e011abb7be 100644
--- 
a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
+++ 
b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
@@ -99,8 +99,12 @@ class HiveSchemaConverter {
             return Types.BinaryType.get();
           case CHAR:
           case VARCHAR:
-            throw new IllegalArgumentException("Unsupported Hive type (" +
-                ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() + ") for 
Iceberg tables.");
+            Preconditions.checkArgument(autoConvert, "Unsupported Hive type 
%s, use string " +
+                    "instead or enable automatic type conversion, set 
'iceberg.mr.schema.auto.conversion' to true",
+                ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+
+            LOG.debug("Using auto conversion from CHAR/VARCHAR to STRING");
+            return Types.StringType.get();
           case STRING:
             return Types.StringType.get();
           case TIMESTAMP:
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 29e0565239..c22ac6db11 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -30,6 +30,7 @@ import java.util.Objects;
 import java.util.Optional;
 import java.util.Properties;
 import java.util.Set;
+import java.util.stream.Collectors;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -52,6 +53,12 @@ import org.apache.hadoop.hive.ql.parse.PartitionTransform;
 import org.apache.hadoop.hive.ql.parse.TransformSpec;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionStateUtil;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.iceberg.BaseMetastoreTableOperations;
@@ -412,8 +419,45 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
     if (!hasCorrectFileFormat) {
       throw new MetaException("Cannot convert hive table to iceberg with input 
format: " + sd.getInputFormat());
     }
+
+    List<TypeInfo> typeInfos =
+        hmsTable.getSd().getCols().stream().map(f -> 
TypeInfoUtils.getTypeInfoFromTypeString(f.getType()))
+            .collect(Collectors.toList());
+    for (TypeInfo typeInfo : typeInfos) {
+      validateColumnType(typeInfo);
+    }
+  }
+
+  private void validateColumnType(TypeInfo typeInfo) throws MetaException {
+    switch (typeInfo.getCategory()) {
+      case PRIMITIVE:
+        PrimitiveObjectInspector.PrimitiveCategory primitiveCategory =
+            ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
+        if 
(primitiveCategory.equals(PrimitiveObjectInspector.PrimitiveCategory.CHAR) || 
primitiveCategory.equals(
+            PrimitiveObjectInspector.PrimitiveCategory.VARCHAR)) {
+          throw new MetaException(String.format(
+              "Cannot convert hive table to iceberg that contains column type 
%s. " + "Use string type columns instead",
+             primitiveCategory));
+        }
+        break;
+      case STRUCT:
+        List<TypeInfo> structTypeInfos = ((StructTypeInfo) 
typeInfo).getAllStructFieldTypeInfos();
+        for (TypeInfo structTypeInfo : structTypeInfos) {
+          validateColumnType(structTypeInfo);
+        }
+        break;
+      case LIST:
+        validateColumnType(((ListTypeInfo) typeInfo).getListElementTypeInfo());
+        break;
+      case MAP:
+        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+        validateColumnType(mapTypeInfo.getMapKeyTypeInfo());
+        validateColumnType(mapTypeInfo.getMapValueTypeInfo());
+        break;
+    }
   }
 
+
   @Override
   public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table 
hmsTable, EnvironmentContext context)
       throws MetaException {
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
index 7c338e9b61..8ecb9e31bd 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
@@ -257,7 +257,9 @@ public class TestHiveIcebergCTAS extends 
HiveIcebergStorageHandlerWithEngineBase
     Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == 
TestTables.TestTableType.HIVE_CATALOG);
     Map<String, Type> notSupportedTypes = ImmutableMap.of(
         "TINYINT", Types.IntegerType.get(),
-        "SMALLINT", Types.IntegerType.get());
+        "SMALLINT", Types.IntegerType.get(),
+        "VARCHAR(1)", Types.StringType.get(),
+        "CHAR(1)", Types.StringType.get());
 
     shell.setHiveSessionValue(InputFormatConfig.SCHEMA_AUTO_CONVERSION, 
"true");
 
@@ -290,19 +292,19 @@ public class TestHiveIcebergCTAS extends 
HiveIcebergStorageHandlerWithEngineBase
         "boolean_col_5 BOOLEAN, " +
         "float_col_6 FLOAT, " +
         "bigint_col_7 BIGINT, " +
-        "string0098_col_8 STRING, " +
+        "varchar0098_col_8 VARCHAR(98), " +
         "timestamp_col_9 TIMESTAMP, " +
         "bigint_col_10 BIGINT, " +
         "decimal0903_col_11 DECIMAL(9, 3), " +
         "timestamp_col_12 TIMESTAMP, " +
         "timestamp_col_13 TIMESTAMP, " +
         "float_col_14 FLOAT, " +
-        "string0254_col_15 STRING, " +
+        "char0254_col_15 CHAR(254), " +
         "double_col_16 DOUBLE, " +
         "timestamp_col_17 TIMESTAMP, " +
         "boolean_col_18 BOOLEAN, " +
         "decimal2608_col_19 DECIMAL(26, 8), " +
-        "string0216_col_20 STRING, " +
+        "varchar0216_col_20 VARCHAR(216), " +
         "string_col_21 STRING, " +
         "bigint_col_22 BIGINT, " +
         "boolean_col_23 BOOLEAN, " +
@@ -315,7 +317,7 @@ public class TestHiveIcebergCTAS extends 
HiveIcebergStorageHandlerWithEngineBase
         "decimal2020_col_30 DECIMAL(20, 20), " +
         "boolean_col_31 BOOLEAN, " +
         "double_col_32 DOUBLE, " +
-        "string0148_col_33 STRING, " +
+        "varchar0148_col_33 VARCHAR(148), " +
         "decimal2121_col_34 DECIMAL(21, 21), " +
         "tinyint_col_35 TINYINT, " +
         "boolean_col_36 BOOLEAN, " +
@@ -326,7 +328,7 @@ public class TestHiveIcebergCTAS extends 
HiveIcebergStorageHandlerWithEngineBase
         "decimal1408_col_41 DECIMAL(14, 8), " +
         "string_col_42 STRING, " +
         "decimal0902_col_43 DECIMAL(9, 2), " +
-        "string0204_col_44 STRING, " +
+        "varchar0204_col_44 VARCHAR(204), " +
         "boolean_col_45 BOOLEAN, " +
         "timestamp_col_46 TIMESTAMP, " +
         "boolean_col_47 BOOLEAN, " +
@@ -339,12 +341,12 @@ public class TestHiveIcebergCTAS extends 
HiveIcebergStorageHandlerWithEngineBase
         "timestamp_col_54 TIMESTAMP, " +
         "int_col_55 INT, " +
         "decimal0505_col_56 DECIMAL(5, 5), " +
-        "string0155_col_57 STRING, " +
+        "char0155_col_57 CHAR(155), " +
         "boolean_col_58 BOOLEAN, " +
         "bigint_col_59 BIGINT, " +
         "boolean_col_60 BOOLEAN, " +
         "boolean_col_61 BOOLEAN, " +
-        "string0249_col_62 STRING, " +
+        "char0249_col_62 CHAR(249), " +
         "boolean_col_63 BOOLEAN, " +
         "timestamp_col_64 TIMESTAMP, " +
         "decimal1309_col_65 DECIMAL(13, 9), " +
@@ -356,12 +358,12 @@ public class TestHiveIcebergCTAS extends 
HiveIcebergStorageHandlerWithEngineBase
         "timestamp_col_71 TIMESTAMP, " +
         "double_col_72 DOUBLE, " +
         "boolean_col_73 BOOLEAN, " +
-        "string0222_col_74 STRING, " +
+        "char0222_col_74 CHAR(222), " +
         "float_col_75 FLOAT, " +
         "string_col_76 STRING, " +
         "decimal2612_col_77 DECIMAL(26, 12), " +
         "timestamp_col_78 TIMESTAMP, " +
-        "string0128_col_79 STRING, " +
+        "char0128_col_79 CHAR(128), " +
         "timestamp_col_80 TIMESTAMP, " +
         "double_col_81 DOUBLE, " +
         "timestamp_col_82 TIMESTAMP, " +
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
index 5a68400475..d8380892a7 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
@@ -33,6 +33,7 @@ import org.apache.iceberg.BaseMetastoreTableOperations;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.mr.InputFormatConfig;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.thrift.TException;
@@ -50,6 +51,57 @@ import org.mockito.Mockito;
  */
 public class TestHiveIcebergMigration extends 
HiveIcebergStorageHandlerWithEngineBase {
 
+  @Test
+  public void testMigrateHiveTableWithPrimitiveTypeColumnsToIceberg() throws 
TException, InterruptedException {
+    shell.setHiveSessionValue(InputFormatConfig.SCHEMA_AUTO_CONVERSION, 
"true");
+    TableIdentifier identifier = TableIdentifier.of("default", "tbl_alltypes");
+    shell.executeStatement(String.format("CREATE EXTERNAL TABLE %s (" +
+        "a INT, " +
+        "decimal_col DECIMAL(30, 3), " +
+        "tinyint_col TINYINT, " +
+        "boolean_col BOOLEAN, " +
+        "float_col FLOAT, " +
+        "bigint_col BIGINT, " +
+        "double_col DOUBLE, " +
+        "string_col STRING, " +
+        "int_col INT, " +
+        "smallint_col SMALLINT) " +
+        "STORED AS %s %s %s",
+        identifier.name(), fileFormat.name(), 
testTables.locationForCreateTableSQL(identifier),
+        testTables.propertiesForCreateTableSQL(ImmutableMap.of())));
+
+    shell.executeStatement(String.format("INSERT INTO TABLE %s VALUES(" +
+        "1, " +
+        "13.234, " +
+        "8, " +
+        "false, " +
+        "0.7896, " +
+        "543643275, " +
+        "5462435243, " +
+        "'wfewjifwejfoewfnvewokfow', " +
+        "43221, " +
+        "129 " +
+        ")", identifier.name()));
+
+    validateMigration(identifier.name());
+  }
+
+  @Test
+  public void 
testMigrateHiveTableWithUnsupportedPrimitiveTypeColumnToIceberg() {
+    // enough to test once
+    Assume.assumeTrue(fileFormat == FileFormat.ORC && isVectorized &&
+        testTableType == TestTables.TestTableType.HIVE_CATALOG);
+    TableIdentifier identifier = TableIdentifier.of("default", 
"tbl_unsupportedtypes");
+    shell.executeStatement(String.format("CREATE EXTERNAL TABLE %s (" +
+        "char_col CHAR(10)) STORED AS %s %s %s", identifier.name(), 
fileFormat.name(),
+        testTables.locationForCreateTableSQL(identifier), 
testTables.propertiesForCreateTableSQL(ImmutableMap.of())));
+    AssertHelpers.assertThrows("should throw exception", 
IllegalArgumentException.class,
+        "Cannot convert hive table to iceberg that", () -> {
+          shell.executeStatement(String.format("ALTER TABLE %s SET 
TBLPROPERTIES " +
+              
"('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler')", 
identifier.name()));
+        });
+  }
+
   @Test
   public void testMigrateHiveTableWithComplexTypeColumnsToIceberg() throws 
TException, InterruptedException {
     TableIdentifier identifier = TableIdentifier.of("default", "tbl_complex");
@@ -221,7 +273,10 @@ public class TestHiveIcebergMigration extends 
HiveIcebergStorageHandlerWithEngin
     List<Object[]> alterResult = shell.executeStatement("SELECT * FROM " + 
tableName + " ORDER BY a");
     Assert.assertEquals(originalResult.size(), alterResult.size());
     for (int i = 0; i < originalResult.size(); i++) {
-      Assert.assertTrue(Arrays.equals(originalResult.get(i), 
alterResult.get(i)));
+      Assert.assertEquals(originalResult.get(i).length, 
alterResult.get(i).length);
+      for (int j = 0; j < originalResult.get(i).length; j++) {
+        Assert.assertEquals(String.valueOf(originalResult.get(i)[j]), 
String.valueOf(alterResult.get(i)[j]));
+      }
     }
     Table hmsTable = shell.metastore().getTable("default", tableName);
     validateSd(hmsTable, "iceberg");
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index 2a617e88fe..dd76545aac 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -781,7 +781,9 @@ public class TestHiveIcebergStorageHandlerNoScan {
     // Can not create INTERVAL types from normal create table, so leave them 
out from this test
     Map<String, Type> notSupportedTypes = ImmutableMap.of(
         "TINYINT", Types.IntegerType.get(),
-        "SMALLINT", Types.IntegerType.get());
+        "SMALLINT", Types.IntegerType.get(),
+        "VARCHAR(1)", Types.StringType.get(),
+        "CHAR(1)", Types.StringType.get());
 
     shell.setHiveSessionValue(InputFormatConfig.SCHEMA_AUTO_CONVERSION, 
"true");
 
@@ -796,19 +798,6 @@ public class TestHiveIcebergStorageHandlerNoScan {
       Assert.assertEquals(notSupportedTypes.get(notSupportedType), 
icebergTable.schema().columns().get(0).type());
       shell.executeStatement("DROP TABLE not_supported_types");
     }
-
-    List<String> notCompatibleTypes = ImmutableList.of("VARCHAR(1)", 
"CHAR(1)");
-
-    for (String notCompatibleType : notCompatibleTypes) {
-      AssertHelpers.assertThrows("should throw exception", 
IllegalArgumentException.class,
-          "Unsupported Hive type", () -> {
-            shell.executeStatement("CREATE EXTERNAL TABLE not_compatible_types 
(not_supported " + notCompatibleType +
-                ") STORED BY ICEBERG " +
-                testTables.locationForCreateTableSQL(identifier) +
-                testTables.propertiesForCreateTableSQL(
-                    ImmutableMap.of(InputFormatConfig.EXTERNAL_TABLE_PURGE, 
"TRUE")));
-          });
-    }
   }
 
   @Test

Reply via email to