Repository: parquet-mr Updated Branches: refs/heads/master b80b1844e -> 878ebcd0b
PARQUET-1191: Type.hashCode() takes originalType into account but Type.equals() does not Author: Nandor Kollar <[email protected]> Closes #450 from nandorKollar/PARQUET-1191 and squashes the following commits: c7131df [Nandor Kollar] PARQUET-1191: Type.hashCode() takes originalType into account but Type.equals() does not Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/878ebcd0 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/878ebcd0 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/878ebcd0 Branch: refs/heads/master Commit: 878ebcd0bc2592fa9d5dda01117c07bc3c40bb33 Parents: b80b184 Author: Nandor Kollar <[email protected]> Authored: Fri Jan 19 16:53:42 2018 +0100 Committer: Zoltan Ivanfi <[email protected]> Committed: Fri Jan 19 16:53:42 2018 +0100 ---------------------------------------------------------------------- .../src/main/java/org/apache/parquet/schema/Type.java | 4 ++-- .../org/apache/parquet/pig/TestPigSchemaConverter.java | 13 +++++-------- .../apache/parquet/scrooge/ScroogeStructConverter.java | 8 +++++++- .../apache/parquet/thrift/ThriftSchemaConverter.java | 2 ++ .../org/apache/parquet/hadoop/thrift/TestBinary.java | 5 +---- 5 files changed, 17 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-column/src/main/java/org/apache/parquet/schema/Type.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index 176b9a6..dd2c38d 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -20,7 +20,6 @@ package org.apache.parquet.schema; import static org.apache.parquet.Preconditions.checkNotNull; -import java.io.Serializable; import java.util.List; import org.apache.parquet.io.InvalidRecordException; @@ -262,7 +261,8 @@ abstract public class Type { name.equals(other.name) && repetition == other.repetition && eqOrBothNull(repetition, other.repetition) - && eqOrBothNull(id, other.id); + && eqOrBothNull(id, other.id) + && eqOrBothNull(originalType, other.originalType); }; @Override http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java ---------------------------------------------------------------------- diff --git a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java index 646e117..64d5961 100644 --- a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java +++ b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java @@ -18,7 +18,7 @@ */ package org.apache.parquet.pig; -import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; import static org.junit.Assert.assertEquals; import static org.apache.parquet.pig.PigSchemaConverter.pigSchemaToString; @@ -31,12 +31,9 @@ import java.util.Map; import java.util.Set; import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.OriginalType; -import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Types; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.Utils; @@ -215,7 +212,7 @@ public class TestPigSchemaConverter { "}\n", "a:{" + PigSchemaConverter.ARRAY_VALUE_NAME + ":(b: chararray)}"); } - + private void testFixedConversion(String schemaString, String pigSchemaString) throws Exception { Schema expectedPigSchema = Utils.getSchemaFromString(pigSchemaString); @@ -224,7 +221,7 @@ public class TestPigSchemaConverter { assertEquals("converting " + schemaString + " to " + pigSchemaString, expectedPigSchema, pigSchema); } - + @Test public void testMapWithFixed() throws Exception { testFixedConversion( @@ -272,11 +269,11 @@ public class TestPigSchemaConverter { } @Test - public void testAnnonymousField() throws Exception { + public void testAnonymousField() throws Exception { testConversion( "a:chararray, int", "message pig_schema {\n" + - " optional binary a;\n" + + " optional binary a (UTF8);\n" + " optional int32 val_0;\n" + "}\n"); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java ---------------------------------------------------------------------- diff --git a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java index c7448e3..310bb4c 100644 --- a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java +++ b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java @@ -189,7 +189,13 @@ public class ScroogeStructConverter { thriftType = new ThriftType.I64Type(); break; case STRING: - thriftType = new ThriftType.StringType(); + ThriftType.StringType stringType = new ThriftType.StringType(); + // There is no real binary type (see THRIFT-1920) in Thrift, + // binary data is represented by String type with an additional binary flag. + if (!String.class.equals(scroogeField.manifest().runtimeClass())) { + stringType.setBinary(true); + } + thriftType = stringType; break; case STRUCT: thriftType = convertStructTypeField(scroogeField); http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java index c3a166a..7717e04 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java @@ -165,6 +165,8 @@ public class ThriftSchemaConverter { case STRING: StringType stringType = new StringType(); FieldMetaData fieldMetaData = field.getFieldMetaData(); + // There is no real binary type (see THRIFT-1920) in Thrift, + // binary data is represented by String type with an additional binary flag. if (fieldMetaData != null && fieldMetaData.valueMetaData.isBinary()) { stringType.setBinary(true); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java index a152bba..ac5a08b 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java @@ -42,7 +42,6 @@ import org.apache.parquet.thrift.ThriftParquetWriter; import org.apache.parquet.thrift.test.binary.StringAndBinary; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; public class TestBinary { @Rule @@ -81,9 +80,7 @@ public class TestBinary { private void assertSchema(ParquetMetadata parquetMetadata) { List<Type> fields = parquetMetadata.getFileMetaData().getSchema().getFields(); assertEquals(2, fields.size()); - assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).id(1).named("s"), fields.get(0)); - assertEquals(OriginalType.UTF8, fields.get(0).getOriginalType()); + assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).id(1).named("s"), fields.get(0)); assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).id(2).named("b"), fields.get(1)); - assertNull(fields.get(1).getOriginalType()); } }
