nsivabalan commented on a change in pull request #2160:
URL: https://github.com/apache/hudi/pull/2160#discussion_r577552355



##########
File path: 
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java
##########
@@ -155,113 +150,75 @@ private static boolean isFieldExistsInSchema(Map<String, 
String> newTableSchema,
    * @param parquetType : Single paruet field

Review comment:
       minor. fix java docs. param talks about parquet type. 

##########
File path: 
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java
##########
@@ -155,113 +150,75 @@ private static boolean isFieldExistsInSchema(Map<String, 
String> newTableSchema,
    * @param parquetType : Single paruet field
    * @return : Equivalent sHive schema
    */
-  private static String convertField(final Type parquetType) {
+  private static String convertFieldFromAvro(final Schema schema) {
     StringBuilder field = new StringBuilder();
-    if (parquetType.isPrimitive()) {
-      final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
-          parquetType.asPrimitiveType().getPrimitiveTypeName();
-      final OriginalType originalType = parquetType.getOriginalType();
-      if (originalType == OriginalType.DECIMAL) {
-        final DecimalMetadata decimalMetadata = 
parquetType.asPrimitiveType().getDecimalMetadata();
-        return 
field.append("DECIMAL(").append(decimalMetadata.getPrecision()).append(" , ")
-            .append(decimalMetadata.getScale()).append(")").toString();
-      } else if (originalType == OriginalType.DATE) {
+    Schema.Type type = schema.getType();
+    LogicalType logicalType = schema.getLogicalType();
+    if (logicalType != null) {
+      if (logicalType instanceof LogicalTypes.Decimal) {
+        return field.append("DECIMAL(").append(((LogicalTypes.Decimal) 
logicalType).getPrecision()).append(" , ")
+            .append(((LogicalTypes.Decimal) 
logicalType).getScale()).append(")").toString();
+      } else if (logicalType instanceof LogicalTypes.Date) {
         return field.append("DATE").toString();
+      } else {
+        Log.info("not handle the type transform");

Review comment:
       do we need INFO logging here? won't this be printed for every primitive 
type. can you help me understand. 

##########
File path: 
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
##########
@@ -84,31 +87,32 @@ public void testSchemaConvertArray() throws IOException {
     MessageType schema = 
Types.buildMessage().optionalGroup().as(OriginalType.LIST).repeatedGroup()

Review comment:
       minor. do we need to fix line 79(old) / 82 (new) for java doc. it links 
to parquet list. guess we are moving to Avro with this diff. 

##########
File path: 
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
##########
@@ -84,31 +87,32 @@ public void testSchemaConvertArray() throws IOException {
     MessageType schema = 
Types.buildMessage().optionalGroup().as(OriginalType.LIST).repeatedGroup()
         
.optional(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("list").named("int_list")
         .named("ArrayOfInts");
-
-    String schemaString = HiveSchemaUtil.generateSchemaString(schema);
+    AvroSchemaConverter avroSchemaConverter = new AvroSchemaConverter(new 
Configuration());

Review comment:
       guess we don't have good coverage of tests for convertFieldFromAvro. All 
I see are tests for testSchemaConvertArray and 
testSchemaConvertTimestampMicros. Would you mind adding tests for 
convertFieldFromAvro to cover all field types (primitives, list, enum, map, 
nested records)

##########
File path: 
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java
##########
@@ -155,113 +150,75 @@ private static boolean isFieldExistsInSchema(Map<String, 
String> newTableSchema,
    * @param parquetType : Single paruet field
    * @return : Equivalent sHive schema
    */
-  private static String convertField(final Type parquetType) {
+  private static String convertFieldFromAvro(final Schema schema) {

Review comment:
       Did you take inspiration from somewhere to convert Avro schema to hive 
field? Can you attach any references. 

##########
File path: 
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java
##########
@@ -155,113 +150,75 @@ private static boolean isFieldExistsInSchema(Map<String, 
String> newTableSchema,
    * @param parquetType : Single paruet field
    * @return : Equivalent sHive schema
    */
-  private static String convertField(final Type parquetType) {
+  private static String convertFieldFromAvro(final Schema schema) {
     StringBuilder field = new StringBuilder();
-    if (parquetType.isPrimitive()) {
-      final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
-          parquetType.asPrimitiveType().getPrimitiveTypeName();
-      final OriginalType originalType = parquetType.getOriginalType();
-      if (originalType == OriginalType.DECIMAL) {
-        final DecimalMetadata decimalMetadata = 
parquetType.asPrimitiveType().getDecimalMetadata();
-        return 
field.append("DECIMAL(").append(decimalMetadata.getPrecision()).append(" , ")
-            .append(decimalMetadata.getScale()).append(")").toString();
-      } else if (originalType == OriginalType.DATE) {
+    Schema.Type type = schema.getType();
+    LogicalType logicalType = schema.getLogicalType();
+    if (logicalType != null) {
+      if (logicalType instanceof LogicalTypes.Decimal) {
+        return field.append("DECIMAL(").append(((LogicalTypes.Decimal) 
logicalType).getPrecision()).append(" , ")
+            .append(((LogicalTypes.Decimal) 
logicalType).getScale()).append(")").toString();
+      } else if (logicalType instanceof LogicalTypes.Date) {
         return field.append("DATE").toString();
+      } else {
+        Log.info("not handle the type transform");
       }
-      // TODO - fix the method naming here
-      return parquetPrimitiveTypeName.convert(new 
PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
-        @Override
-        public String convertBOOLEAN(PrimitiveType.PrimitiveTypeName 
primitiveTypeName) {
-          return "boolean";
-        }
-
-        @Override
-        public String convertINT32(PrimitiveType.PrimitiveTypeName 
primitiveTypeName) {
-          return "int";
-        }
-
-        @Override
-        public String convertINT64(PrimitiveType.PrimitiveTypeName 
primitiveTypeName) {
-          return "bigint";
-        }
-
-        @Override
-        public String convertINT96(PrimitiveType.PrimitiveTypeName 
primitiveTypeName) {
-          return "timestamp-millis";
-        }
-
-        @Override
-        public String convertFLOAT(PrimitiveType.PrimitiveTypeName 
primitiveTypeName) {
-          return "float";
-        }
-
-        @Override
-        public String convertDOUBLE(PrimitiveType.PrimitiveTypeName 
primitiveTypeName) {
-          return "double";
-        }
-
-        @Override
-        public String 
convertFIXED_LEN_BYTE_ARRAY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-          return "binary";
-        }
-
-        @Override
-        public String convertBINARY(PrimitiveType.PrimitiveTypeName 
primitiveTypeName) {
-          if (originalType == OriginalType.UTF8 || originalType == 
OriginalType.ENUM) {
-            return "string";
-          } else {
-            return "binary";
-          }
+    }
+    if (type.equals(Schema.Type.BOOLEAN)) {

Review comment:
       +1

##########
File path: 
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java
##########
@@ -327,20 +284,14 @@ private static String createHiveMap(String keyType, 
String valueType) {
   /**
    * Create an Array Hive schema from equivalent parquet list type.
    */
-  private static String createHiveArray(Type elementType, String elementName) {
+  private static String createHiveArrayFromAvro(String schemaName, Schema 
elementType) {
     StringBuilder array = new StringBuilder();
     array.append("ARRAY< ");
-    if (elementType.isPrimitive()) {
-      array.append(convertField(elementType));
+    if (elementType.getType().equals(Schema.Type.RECORD) && 
(elementType.getFields().size() == 1
+        && !elementType.getName().equals("array") && 
!elementType.getName().endsWith("_tuple"))) {

Review comment:
       I am not very conversant with these conversions. But if we can ensure we 
have full coverage of tests for all scenarios, should be easy to validate if 
these are required. 

##########
File path: 
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
##########
@@ -510,7 +514,7 @@ public void testReadSchemaForMOR(boolean useJdbc) throws 
Exception {
     assertEquals(hiveClientRT.getTableSchema(snapshotTableName).size(),
         SchemaTestUtil.getSimpleSchema().getFields().size() + 
HiveTestUtil.hiveSyncConfig.partitionFields.size()
             + HoodieRecord.HOODIE_META_COLUMNS.size(),
-        "Hive Schema should match the table schema + partition field");
+        "Hive Schema should matcHoodieDLAClient.javah the table schema + 
partition field");

Review comment:
       minor typo. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to