This is an automated email from the ASF dual-hosted git repository.

suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new bf8f7b1  [GOBBLIN-999] Separate Hive-Avro type related constants out 
of Avro2ORC specific module to make it re-usable
bf8f7b1 is described below

commit bf8f7b1ec70bd83e4256343ed249577924f0a330
Author: autumnust <[email protected]>
AuthorDate: Fri Dec 6 15:46:04 2019 -0800

    [GOBBLIN-999] Separate Hive-Avro type related constants out of Avro2ORC 
specific module to make it re-usable
    
    Closes #2844 from autumnust/avro-hive-TypeInfo-
    reuse
---
 .../hive/query/HiveAvroORCQueryGenerator.java      | 58 +++---------------
 .../apache/gobblin/util/HiveAvroTypeConstants.java | 69 ++++++++++++++++++++++
 2 files changed, 77 insertions(+), 50 deletions(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/query/HiveAvroORCQueryGenerator.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/query/HiveAvroORCQueryGenerator.java
index 7f8dacc..8d593ed 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/query/HiveAvroORCQueryGenerator.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/conversion/hive/query/HiveAvroORCQueryGenerator.java
@@ -22,7 +22,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
-import java.util.Set;
 
 import java.util.stream.Collectors;
 import lombok.ToString;
@@ -31,6 +30,7 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.gobblin.util.HiveAvroTypeConstants;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.serde.serdeConstants;
@@ -47,12 +47,9 @@ import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 
@@ -86,45 +83,6 @@ public class HiveAvroORCQueryGenerator {
         DEFAULT_TBL_PROPERTIES.setProperty(ORC_ROW_INDEX_STRIDE_KEY, 
DEFAULT_ORC_ROW_INDEX_STRIDE);
       }
 
-  // Avro to Hive schema mapping
-  private static final Map<Schema.Type, String> 
AVRO_TO_HIVE_COLUMN_MAPPING_V_12 = ImmutableMap
-      .<Schema.Type, String>builder()
-      .put(Schema.Type.NULL,    "void")
-      .put(Schema.Type.BOOLEAN, "boolean")
-      .put(Schema.Type.INT,     "int")
-      .put(Schema.Type.LONG,    "bigint")
-      .put(Schema.Type.FLOAT,   "float")
-      .put(Schema.Type.DOUBLE,  "double")
-      .put(Schema.Type.BYTES,   "binary")
-      .put(Schema.Type.STRING,  "string")
-      .put(Schema.Type.RECORD,  "struct")
-      .put(Schema.Type.MAP,     "map")
-      .put(Schema.Type.ARRAY,   "array")
-      .put(Schema.Type.UNION,   "uniontype")
-      .put(Schema.Type.ENUM,    "string")
-      .put(Schema.Type.FIXED,   "binary")
-      .build();
-
-  // Hive evolution types supported
-  private static final Map<String, Set<String>> HIVE_COMPATIBLE_TYPES = 
ImmutableMap
-      .<String, Set<String>>builder()
-      .put("tinyint", ImmutableSet.<String>builder()
-          .add("smallint", "int", "bigint", "float", "double", "decimal", 
"string", "varchar").build())
-      .put("smallint",  ImmutableSet.<String>builder().add("int", "bigint", 
"float", "double", "decimal", "string",
-          "varchar").build())
-      .put("int",       ImmutableSet.<String>builder().add("bigint", "float", 
"double", "decimal", "string", "varchar")
-          .build())
-      .put("bigint",    ImmutableSet.<String>builder().add("float", "double", 
"decimal", "string", "varchar").build())
-      .put("float",     ImmutableSet.<String>builder().add("double", 
"decimal", "string", "varchar").build())
-      .put("double",    ImmutableSet.<String>builder().add("decimal", 
"string", "varchar").build())
-      .put("decimal",   ImmutableSet.<String>builder().add("string", 
"varchar").build())
-      .put("string",    ImmutableSet.<String>builder().add("double", 
"decimal", "varchar").build())
-      .put("varchar",   ImmutableSet.<String>builder().add("double", "string", 
"varchar").build())
-      .put("timestamp", ImmutableSet.<String>builder().add("string", 
"varchar").build())
-      .put("date",      ImmutableSet.<String>builder().add("string", 
"varchar").build())
-      .put("binary",    Sets.<String>newHashSet())
-      .put("boolean",    Sets.<String>newHashSet()).build();
-
   @ToString
   public static enum COLUMN_SORT_ORDER {
     ASC ("ASC"),
@@ -439,7 +397,7 @@ public class HiveAvroORCQueryGenerator {
             columns.append(String.format("  `%s` %s COMMENT 'from 
flatten_source %s'", field.name(), type,flattenSource));
           }
         } else {
-          
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
+          
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
           for (Schema.Field field : schema.getFields()) {
             if (isFirst) {
               isFirst = false;
@@ -458,7 +416,7 @@ public class HiveAvroORCQueryGenerator {
           Schema optionalTypeSchema = optionalType.get();
           columns.append(generateAvroToHiveColumnMapping(optionalTypeSchema, 
hiveColumns, false, datasetName));
         } else {
-          
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
+          
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
           isFirst = true;
           for (Schema unionMember : schema.getTypes()) {
             if (Schema.Type.NULL.equals(unionMember.getType())) {
@@ -475,13 +433,13 @@ public class HiveAvroORCQueryGenerator {
         }
         break;
       case MAP:
-        
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
+        
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
         columns.append("string,")
             .append(generateAvroToHiveColumnMapping(schema.getValueType(), 
hiveColumns, false, datasetName));
         columns.append(">");
         break;
       case ARRAY:
-        
columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
+        
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType())).append("<");
         
columns.append(generateAvroToHiveColumnMapping(schema.getElementType(), 
hiveColumns, false, datasetName));
         columns.append(">");
         break;
@@ -496,7 +454,7 @@ public class HiveAvroORCQueryGenerator {
       case LONG:
       case STRING:
       case BOOLEAN:
-        columns.append(AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType()));
+        
columns.append(HiveAvroTypeConstants.AVRO_TO_HIVE_COLUMN_MAPPING_V_12.get(schema.getType()));
         break;
       default:
         String exceptionMessage =
@@ -1094,8 +1052,8 @@ public class HiveAvroORCQueryGenerator {
       return false;
     }
     // Look for compatibility in evolved type
-    if (HIVE_COMPATIBLE_TYPES.containsKey(destinationType)) {
-      if (HIVE_COMPATIBLE_TYPES.get(destinationType).contains(evolvedType)) {
+    if 
(HiveAvroTypeConstants.HIVE_COMPATIBLE_TYPES.containsKey(destinationType)) {
+      if 
(HiveAvroTypeConstants.HIVE_COMPATIBLE_TYPES.get(destinationType).contains(evolvedType))
 {
         return true;
       } else {
         throw new RuntimeException(String.format("Incompatible type evolution 
from: %s to: %s",
diff --git 
a/gobblin-utility/src/main/java/org/apache/gobblin/util/HiveAvroTypeConstants.java
 
b/gobblin-utility/src/main/java/org/apache/gobblin/util/HiveAvroTypeConstants.java
new file mode 100644
index 0000000..8a86840
--- /dev/null
+++ 
b/gobblin-utility/src/main/java/org/apache/gobblin/util/HiveAvroTypeConstants.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.util;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.avro.Schema;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+
+
+public class HiveAvroTypeConstants {
+
+  // Avro to Hive schema mapping
+  public static final Map<Schema.Type, String> 
AVRO_TO_HIVE_COLUMN_MAPPING_V_12 = ImmutableMap
+      .<Schema.Type, String>builder()
+      .put(Schema.Type.NULL,    "void")
+      .put(Schema.Type.BOOLEAN, "boolean")
+      .put(Schema.Type.INT,     "int")
+      .put(Schema.Type.LONG,    "bigint")
+      .put(Schema.Type.FLOAT,   "float")
+      .put(Schema.Type.DOUBLE,  "double")
+      .put(Schema.Type.BYTES,   "binary")
+      .put(Schema.Type.STRING,  "string")
+      .put(Schema.Type.RECORD,  "struct")
+      .put(Schema.Type.MAP,     "map")
+      .put(Schema.Type.ARRAY,   "array")
+      .put(Schema.Type.UNION,   "uniontype")
+      .put(Schema.Type.ENUM,    "string")
+      .put(Schema.Type.FIXED,   "binary")
+      .build();
+  // Hive evolution types supported
+  public static final Map<String, Set<String>> HIVE_COMPATIBLE_TYPES = 
ImmutableMap
+      .<String, Set<String>>builder()
+      .put("tinyint", ImmutableSet.<String>builder()
+          .add("smallint", "int", "bigint", "float", "double", "decimal", 
"string", "varchar").build())
+      .put("smallint",  ImmutableSet.<String>builder().add("int", "bigint", 
"float", "double", "decimal", "string",
+          "varchar").build())
+      .put("int",       ImmutableSet.<String>builder().add("bigint", "float", 
"double", "decimal", "string", "varchar")
+          .build())
+      .put("bigint",    ImmutableSet.<String>builder().add("float", "double", 
"decimal", "string", "varchar").build())
+      .put("float",     ImmutableSet.<String>builder().add("double", 
"decimal", "string", "varchar").build())
+      .put("double",    ImmutableSet.<String>builder().add("decimal", 
"string", "varchar").build())
+      .put("decimal",   ImmutableSet.<String>builder().add("string", 
"varchar").build())
+      .put("string",    ImmutableSet.<String>builder().add("double", 
"decimal", "varchar").build())
+      .put("varchar",   ImmutableSet.<String>builder().add("double", "string", 
"varchar").build())
+      .put("timestamp", ImmutableSet.<String>builder().add("string", 
"varchar").build())
+      .put("date",      ImmutableSet.<String>builder().add("string", 
"varchar").build())
+      .put("binary",    Sets.<String>newHashSet())
+      .put("boolean",    Sets.<String>newHashSet()).build();
+}

Reply via email to