This is an automated email from the ASF dual-hosted git repository.

suvasude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new f60409e  
[GOBBLIN-924][GOBBLIN-922][GOBBLIN-909][GOBBLIN-923]GOBBLIN-923][GOBBLIN-906][GOBBLIN-921]
 Get rid of orc.schema.literal in ORC-ingestion and registration
f60409e is described below

commit f60409ef0b6768bf46ddd137333d8d56981798fc
Author: Zihan Li <[email protected]>
AuthorDate: Thu Oct 31 13:30:34 2019 -0700

    
[GOBBLIN-924][GOBBLIN-922][GOBBLIN-909][GOBBLIN-923]GOBBLIN-923][GOBBLIN-906][GOBBLIN-921]
 Get rid of orc.schema.literal in ORC-ingestion and registration
    
    Closes #2780 from ZihanLi58/GOBBLIN-924
---
 .../gobblin/hive/orc/HiveOrcSerDeManager.java      | 22 ++++++++++------------
 .../gobblin/hive/orc/HiveOrcSerDeManagerTest.java  | 17 +++++++++++------
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git 
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
 
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
index 7fdceba..f20f962 100644
--- 
a/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
+++ 
b/gobblin-hive-registration/src/main/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManager.java
@@ -61,8 +61,6 @@ import org.apache.gobblin.util.HadoopUtils;
  */
 @Slf4j
 public class HiveOrcSerDeManager extends HiveSerDeManager {
-  // Schema is in the format of TypeDescriptor
-  public static final String SCHEMA_LITERAL = "orc.schema.literal";
 
   // Extensions of files containing ORC data
   public static final String FILE_EXTENSIONS_KEY = 
"hiveOrcSerdeManager.fileExtensions";
@@ -114,10 +112,13 @@ public class HiveOrcSerDeManager extends HiveSerDeManager 
{
   }
 
   @Override
+  //Using LIST_COLUMNS and LIST_COLUMN_TYPES to compare schema
   public boolean haveSameSchema(HiveRegistrationUnit unit1, 
HiveRegistrationUnit unit2)
       throws IOException {
-    if (unit1.getSerDeProps().contains(SCHEMA_LITERAL) && 
unit2.getSerDeProps().contains(SCHEMA_LITERAL)) {
-      return 
unit1.getSerDeProps().getProp(SCHEMA_LITERAL).equals(unit2.getSerDeProps().getProp(SCHEMA_LITERAL));
+    if (unit1.getSerDeProps().contains(serdeConstants.LIST_COLUMNS) && 
unit2.getSerDeProps().contains(serdeConstants.LIST_COLUMNS)
+    && unit1.getSerDeProps().contains(serdeConstants.LIST_COLUMN_TYPES) && 
unit2.getSerDeProps().contains(serdeConstants.LIST_COLUMN_TYPES)) {
+      return 
unit1.getSerDeProps().getProp(serdeConstants.LIST_COLUMNS).equals(unit2.getSerDeProps().getProp(serdeConstants.LIST_COLUMNS))
+          && 
unit1.getSerDeProps().getProp(serdeConstants.LIST_COLUMN_TYPES).equals(unit2.getSerDeProps().getProp(serdeConstants.LIST_COLUMN_TYPES));
     } else {
       return false;
     }
@@ -152,18 +153,18 @@ public class HiveOrcSerDeManager extends HiveSerDeManager 
{
     if (source.getOutputFormat().isPresent()) {
       target.setOutputFormat(source.getOutputFormat().get());
     }
-    if (source.getSerDeProps().contains(SCHEMA_LITERAL)) {
-      target.setSerDeProp(SCHEMA_LITERAL, 
source.getSerDeProps().getProp(SCHEMA_LITERAL));
-    }
   }
 
   @Override
   public void updateSchema(HiveRegistrationUnit existingUnit, 
HiveRegistrationUnit newUnit)
       throws IOException {
     Preconditions.checkArgument(
-        newUnit.getSerDeProps().contains(SCHEMA_LITERAL));
+        newUnit.getSerDeProps().contains(serdeConstants.LIST_COLUMNS));
+    Preconditions.checkArgument(
+        newUnit.getSerDeProps().contains(serdeConstants.LIST_COLUMN_TYPES));
 
-    existingUnit.setSerDeProp(SCHEMA_LITERAL, 
newUnit.getSerDeProps().getProp(SCHEMA_LITERAL));
+    existingUnit.setSerDeProp(serdeConstants.LIST_COLUMNS, 
newUnit.getSerDeProps().getProp(serdeConstants.LIST_COLUMNS));
+    existingUnit.setSerDeProp(serdeConstants.LIST_COLUMN_TYPES, 
newUnit.getSerDeProps().getProp(serdeConstants.LIST_COLUMN_TYPES));
   }
 
   /**
@@ -261,14 +262,11 @@ public class HiveOrcSerDeManager extends HiveSerDeManager 
{
    * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMNS and
    * org.apache.hadoop.hive.serde.serdeConstants#LIST_COLUMN_TYPES
    *
-   * Keeping {@link #SCHEMA_LITERAL} will be a nice-to-have thing but not 
actually necessary in terms of functionality.
    */
   protected void addSchemaPropertiesHelper(Path path, HiveRegistrationUnit 
hiveUnit) throws IOException {
     TypeInfo schema = getSchemaFromLatestFile(path, this.fs);
     if (schema instanceof StructTypeInfo) {
       StructTypeInfo structTypeInfo = (StructTypeInfo) schema;
-
-      hiveUnit.setSerDeProp(SCHEMA_LITERAL, schema);
       hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMNS,
           Joiner.on(",").join(structTypeInfo.getAllStructFieldNames()));
       hiveUnit.setSerDeProp(serdeConstants.LIST_COLUMN_TYPES,
diff --git 
a/gobblin-hive-registration/src/test/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManagerTest.java
 
b/gobblin-hive-registration/src/test/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManagerTest.java
index 782469e..a5b2ee5 100644
--- 
a/gobblin-hive-registration/src/test/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManagerTest.java
+++ 
b/gobblin-hive-registration/src/test/java/org/apache/gobblin/hive/orc/HiveOrcSerDeManagerTest.java
@@ -81,8 +81,6 @@ public class HiveOrcSerDeManagerTest {
 
     manager.addSerDeProperties(this.testRegisterPath, registrationUnit);
 
-    
Assert.assertTrue(registrationUnit.getSerDeProps().getProp(HiveOrcSerDeManager.SCHEMA_LITERAL).contains(
-        "name:string,timestamp:bigint"));
 
     List<String> columns = 
Arrays.asList(registrationUnit.getSerDeProps().getProp(serdeConstants.LIST_COLUMNS).split(","));
     Assert.assertTrue(columns.get(0).equals("name"));
@@ -105,8 +103,7 @@ public class HiveOrcSerDeManagerTest {
 
     manager.addSerDeProperties(this.testRegisterPath, registrationUnit);
 
-    
Assert.assertTrue(registrationUnit.getSerDeProps().getProp(HiveOrcSerDeManager.SCHEMA_LITERAL).contains(
-        "name:string,timestamp:bigint"));
+    examineSchema(registrationUnit);
   }
 
   /**
@@ -124,8 +121,7 @@ public class HiveOrcSerDeManagerTest {
 
     manager.addSerDeProperties(this.testRegisterPath, registrationUnit);
 
-    
Assert.assertTrue(registrationUnit.getSerDeProps().getProp(HiveOrcSerDeManager.SCHEMA_LITERAL).contains(
-        "name:string,timestamp:bigint"));
+    examineSchema(registrationUnit);
     Assert.assertEquals(registrationUnit.getSerDeType().get(), 
OrcSerde.class.getName());
     Assert.assertEquals(registrationUnit.getInputFormat().get(), 
"customInputFormat");
     Assert.assertEquals(registrationUnit.getOutputFormat().get(), 
"customOutputFormat");
@@ -158,6 +154,15 @@ public class HiveOrcSerDeManagerTest {
     manager.addSerDeProperties(this.testRegisterPath, registrationUnit);
   }
 
+  public void examineSchema(HiveRegistrationUnit registrationUnit) {
+    List<String> columns = 
Arrays.asList(registrationUnit.getSerDeProps().getProp(serdeConstants.LIST_COLUMNS).split(","));
+    Assert.assertTrue(columns.get(0).equals("name"));
+    Assert.assertTrue(columns.get(1).equals("timestamp"));
+    List<String> columnTypes = 
Arrays.asList(registrationUnit.getSerDeProps().getProp(serdeConstants.LIST_COLUMN_TYPES).split(","));
+    Assert.assertTrue(columnTypes.get(0).equals("string"));
+    Assert.assertTrue(columnTypes.get(1).equals("bigint"));
+  }
+
   @AfterClass
   public void tearDown() throws IOException {
     FileSystem fs = FileSystem.getLocal(new Configuration());

Reply via email to