Repository: incubator-gobblin Updated Branches: refs/heads/master 241a1b3b7 -> f7ea77eb9
[GOBBLIN-514] AvroUtils#parseSchemaFromFile should use modified UTF-8 encoding to read file Closes #2385 from aditya1105/schemaParsing Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/f7ea77eb Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/f7ea77eb Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/f7ea77eb Branch: refs/heads/master Commit: f7ea77eb9b5829271e5a750b5d68b071a25a9f4c Parents: 241a1b3 Author: aditya1105 <[email protected]> Authored: Thu Jun 14 15:59:26 2018 -0700 Committer: Abhishek Tiwari <[email protected]> Committed: Thu Jun 14 15:59:26 2018 -0700 ---------------------------------------------------------------------- .../hive/avro/AvroSchemaManagerTest.java | 29 ++++++++++++++++---- .../java/org/apache/gobblin/util/AvroUtils.java | 6 ++-- 2 files changed, 27 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/f7ea77eb/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java ---------------------------------------------------------------------- diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java index c28a3d3..eeba93e 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/conversion/hive/avro/AvroSchemaManagerTest.java @@ -20,6 +20,11 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; +import org.apache.avro.Schema; +import org.apache.avro.SchemaParseException; +import org.apache.gobblin.configuration.ConfigurationKeys; +import org.apache.gobblin.configuration.State; +import org.apache.gobblin.util.AvroUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -36,9 +41,6 @@ import org.testng.annotations.Test; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import org.apache.gobblin.configuration.ConfigurationKeys; -import org.apache.gobblin.configuration.State; - public class AvroSchemaManagerTest { @Test @@ -53,10 +55,27 @@ public class AvroSchemaManagerTest { Partition partition = getTestPartition(new Table("testDb", "testTable")); Path schemaPath = asm.getSchemaUrl(partition); - String actualSchema = fs.open(schemaPath).readUTF(); + Schema actualSchema = AvroUtils.parseSchemaFromFile(schemaPath, fs); String expectedSchema = new String(Files.readAllBytes( Paths.get(getClass().getClassLoader().getResource("avroSchemaManagerTest/expectedSchema.avsc").getFile()))); - Assert.assertEquals(actualSchema, expectedSchema); + Assert.assertEquals(actualSchema.toString(), expectedSchema); + } + + @Test(expectedExceptions = SchemaParseException.class) + public void testExceptionWhenReadingSchemaUsingParser() + throws IOException, HiveException { + FileSystem fs = FileSystem.getLocal(new Configuration()); + + String jobId = "123"; + State state = new State(); + state.setProp(ConfigurationKeys.JOB_ID_KEY, jobId); + + AvroSchemaManager asm = new AvroSchemaManager(fs, state); + Partition partition = getTestPartition(new Table("testDb", "testTable")); + Path schemaPath = asm.getSchemaUrl(partition); + // parse operation tries to read using UTF-8 encoding and fails + // because schema is written using modified UTF-8 encoding + new Schema.Parser().parse(fs.open(schemaPath)); } private Partition getTestPartition(Table table) throws HiveException { http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/f7ea77eb/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java ---------------------------------------------------------------------- diff --git a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java index c1d65d8..7d7e497 100644 --- a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java +++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroUtils.java @@ -20,7 +20,6 @@ package org.apache.gobblin.util; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; -import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -51,6 +50,7 @@ import org.apache.avro.mapred.FsInput; import org.apache.avro.util.Utf8; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -380,8 +380,8 @@ public class AvroUtils { public static Schema parseSchemaFromFile(Path filePath, FileSystem fs) throws IOException { Preconditions.checkArgument(fs.exists(filePath), filePath + " does not exist"); - try (InputStream in = fs.open(filePath)) { - return new Schema.Parser().parse(in); + try (FSDataInputStream in = fs.open(filePath)) { + return new Schema.Parser().parse(in.readUTF()); } }
