Repository: incubator-gobblin
Updated Branches:
  refs/heads/master 241a1b3b7 -> f7ea77eb9


[GOBBLIN-514] AvroUtils#parseSchemaFromFile should use modified UTF-8 encoding 
to read file

Closes #2385 from aditya1105/schemaParsing


Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/f7ea77eb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/f7ea77eb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/f7ea77eb

Branch: refs/heads/master
Commit: f7ea77eb9b5829271e5a750b5d68b071a25a9f4c
Parents: 241a1b3
Author: aditya1105 <[email protected]>
Authored: Thu Jun 14 15:59:26 2018 -0700
Committer: Abhishek Tiwari <[email protected]>
Committed: Thu Jun 14 15:59:26 2018 -0700

----------------------------------------------------------------------
 .../hive/avro/AvroSchemaManagerTest.java        | 29 ++++++++++++++++----
 .../java/org/apache/gobblin/util/AvroUtils.java |  6 ++--
 2 files changed, 27 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/f7ea77eb/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java
----------------------------------------------------------------------
diff --git 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java
 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java
index c28a3d3..eeba93e 100644
--- 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java
+++ 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java
@@ -20,6 +20,11 @@ import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaParseException;
+import org.apache.gobblin.configuration.ConfigurationKeys;
+import org.apache.gobblin.configuration.State;
+import org.apache.gobblin.util.AvroUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -36,9 +41,6 @@ import org.testng.annotations.Test;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 
-import org.apache.gobblin.configuration.ConfigurationKeys;
-import org.apache.gobblin.configuration.State;
-
 
 public class AvroSchemaManagerTest {
   @Test
@@ -53,10 +55,27 @@ public class AvroSchemaManagerTest {
     Partition partition = getTestPartition(new Table("testDb", "testTable"));
     Path schemaPath = asm.getSchemaUrl(partition);
 
-    String actualSchema = fs.open(schemaPath).readUTF();
+    Schema actualSchema = AvroUtils.parseSchemaFromFile(schemaPath, fs);
     String expectedSchema = new String(Files.readAllBytes(
         
Paths.get(getClass().getClassLoader().getResource("avroSchemaManagerTest/expectedSchema.avsc").getFile())));
-    Assert.assertEquals(actualSchema, expectedSchema);
+    Assert.assertEquals(actualSchema.toString(), expectedSchema);
+  }
+
+  @Test(expectedExceptions = SchemaParseException.class)
+  public void testExceptionWhenReadingSchemaUsingParser()
+      throws IOException, HiveException {
+    FileSystem fs = FileSystem.getLocal(new Configuration());
+
+    String jobId = "123";
+    State state = new State();
+    state.setProp(ConfigurationKeys.JOB_ID_KEY, jobId);
+
+    AvroSchemaManager asm = new AvroSchemaManager(fs, state);
+    Partition partition = getTestPartition(new Table("testDb", "testTable"));
+    Path schemaPath = asm.getSchemaUrl(partition);
+    // parse operation tries to read using UTF-8 encoding and fails
+    // because schema is written using modified UTF-8 encoding
+    new Schema.Parser().parse(fs.open(schemaPath));
   }
 
   private Partition getTestPartition(Table table) throws HiveException {

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/f7ea77eb/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java
----------------------------------------------------------------------
diff --git 
a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java 
b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java
index c1d65d8..7d7e497 100644
--- a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java
+++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java
@@ -20,7 +20,6 @@ package org.apache.gobblin.util;
 import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -51,6 +50,7 @@ import org.apache.avro.mapred.FsInput;
 import org.apache.avro.util.Utf8;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -380,8 +380,8 @@ public class AvroUtils {
   public static Schema parseSchemaFromFile(Path filePath, FileSystem fs) 
throws IOException {
     Preconditions.checkArgument(fs.exists(filePath), filePath + " does not 
exist");
 
-    try (InputStream in = fs.open(filePath)) {
-      return new Schema.Parser().parse(in);
+    try (FSDataInputStream in = fs.open(filePath)) {
+      return new Schema.Parser().parse(in.readUTF());
     }
   }
 

Reply via email to