Repository: parquet-mr Updated Branches: refs/heads/master f2615d9a6 -> dcd1c33f0
PARQUET-352: Add object model property to file footers. WriteSupport now has a getName getter method that is added to the footer if it returns a non-null string as writer.model.name. This is intended to help identify files written by object models incorrectly. Author: Ryan Blue <[email protected]> Closes #289 from rdblue/PARQUET-352-add-object-model-property and squashes the following commits: 23f8f67 [Ryan Blue] PARQUET-352: Add object model property to file footers. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/dcd1c33f Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/dcd1c33f Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/dcd1c33f Branch: refs/heads/master Commit: dcd1c33f0dba247b43418b922c1c3a2fc432dc11 Parents: f2615d9 Author: Ryan Blue <[email protected]> Authored: Tue Dec 8 10:15:30 2015 -0800 Committer: Ryan Blue <[email protected]> Committed: Tue Dec 8 10:15:30 2015 -0800 ---------------------------------------------------------------------- .../java/org/apache/parquet/avro/AvroWriteSupport.java | 5 +++++ .../org/apache/parquet/cascading/TupleWriteSupport.java | 5 +++++ .../parquet/hadoop/InternalParquetRecordWriter.java | 4 ++++ .../java/org/apache/parquet/hadoop/ParquetWriter.java | 2 ++ .../parquet/hadoop/api/DelegatingWriteSupport.java | 5 +++++ .../org/apache/parquet/hadoop/api/WriteSupport.java | 12 ++++++++++++ .../parquet/hadoop/example/GroupWriteSupport.java | 5 +++++ .../org/apache/parquet/hadoop/TestParquetWriter.java | 3 +++ .../java/org/apache/parquet/pig/TupleWriteSupport.java | 5 +++++ .../org/apache/parquet/proto/ProtoWriteSupport.java | 5 +++++ .../org/apache/parquet/scrooge/ScroogeWriteSupport.java | 5 +++++ .../apache/parquet/hadoop/thrift/TBaseWriteSupport.java | 5 +++++ .../parquet/hadoop/thrift/ThriftBytesWriteSupport.java | 5 +++++ .../parquet/hadoop/thrift/ThriftWriteSupport.java | 5 +++++ .../parquet/thrift/pig/TupleToThriftWriteSupport.java | 5 +++++ 15 files changed, 76 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java index 48fc01e..c75bb03 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java @@ -92,6 +92,11 @@ public class AvroWriteSupport<T> extends WriteSupport<T> { this.model = model; } + @Override + public String getName() { + return "avro"; + } + /** * @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema) */ http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java b/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java index 2489b2e..032f534 100644 --- a/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java +++ b/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java @@ -42,6 +42,11 @@ public class TupleWriteSupport extends WriteSupport<TupleEntry> { public static final String PARQUET_CASCADING_SCHEMA = "parquet.cascading.schema"; @Override + public String getName() { + return "cascading"; + } + + @Override public WriteContext init(Configuration configuration) { String schema = configuration.get(PARQUET_CASCADING_SCHEMA); rootSchema = MessageTypeParser.parseMessageType(schema); http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java index 87b23a2..2b1d48b 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java @@ -119,6 +119,10 @@ class InternalParquetRecordWriter<T> { flushRowGroupToStore(); FinalizedWriteContext finalWriteContext = writeSupport.finalizeWrite(); Map<String, String> finalMetadata = new HashMap<String, String>(extraMetaData); + String modelName = writeSupport.getName(); + if (modelName != null) { + finalMetadata.put(ParquetWriter.OBJECT_MODEL_NAME_PROP, modelName); + } finalMetadata.putAll(finalWriteContext.getExtraMetaData()); parquetFileWriter.end(finalMetadata); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java index e2521fb..be8c0cd 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java @@ -45,6 +45,8 @@ public class ParquetWriter<T> implements Closeable { public static final WriterVersion DEFAULT_WRITER_VERSION = WriterVersion.PARQUET_1_0; + public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name"; + // max size (bytes) to write as padding and the min size of a row group public static final int MAX_PADDING_SIZE_DEFAULT = 0; http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java index 207bb1a..66a4b01 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java @@ -55,6 +55,11 @@ public class DelegatingWriteSupport<T> extends WriteSupport<T> { } @Override + public String getName() { + return delegate.getName(); + } + + @Override public WriteSupport.FinalizedWriteContext finalizeWrite() { return delegate.finalizeWrite(); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java index 91c37c3..1a61faa 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java @@ -121,6 +121,18 @@ abstract public class WriteSupport<T> { public abstract void write(T record); /** + * Called to get a name to identify the WriteSupport object model. + * If not null, this is added to the file footer metadata. + * <p> + * Defining this method will be required in a future API version. + * + * @return a String name for file metadata. + */ + public String getName() { + return null; + } + + /** * called once in the end after the last record was written * @return information to be added in the file */ http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java index ee59a6e..c038f25 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java @@ -64,6 +64,11 @@ public class GroupWriteSupport extends WriteSupport<Group> { } @Override + public String getName() { + return "example"; + } + + @Override public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) { // if present, prefer the schema passed to the constructor if (schema == null) { http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java index e327643..6fc3c72 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java @@ -135,6 +135,9 @@ public class TestParquetWriter { } } } + assertEquals("Object model property should be example", + "example", footer.getFileMetaData().getKeyValueMetaData() + .get(ParquetWriter.OBJECT_MODEL_NAME_PROP)); } } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java b/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java index 829fe70..2cf676c 100644 --- a/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java +++ b/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java @@ -67,6 +67,11 @@ public class TupleWriteSupport extends WriteSupport<Tuple> { this.rootPigSchema = pigSchema; } + @Override + public String getName() { + return "pig"; + } + public Schema getPigSchema() { return rootPigSchema; } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java index 40e36d5..d7f7a53 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java @@ -62,6 +62,11 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport< this.protoMessage = protobufClass; } + @Override + public String getName() { + return "protobuf"; + } + public static void setSchema(Configuration configuration, Class<? extends Message> protoClass) { configuration.setClass(PB_CLASS_WRITE, protoClass, Message.class); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java index a478bf7..7b72b73 100644 --- a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java +++ b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java @@ -49,6 +49,11 @@ public class ScroogeWriteSupport<T extends ThriftStruct> extends AbstractThriftW } @Override + public String getName() { + return "scrooge"; + } + + @Override protected StructType getThriftStruct() { ScroogeStructConverter schemaConverter = new ScroogeStructConverter(); return schemaConverter.convert(thriftClass); http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java index b457278..56bf299 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java @@ -46,6 +46,11 @@ public class TBaseWriteSupport<T extends TBase<?, ?>> extends AbstractThriftWrit } @Override + public String getName() { + return "thrift"; + } + + @Override protected StructType getThriftStruct() { return ThriftSchemaConverter.toStructType(thriftClass); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java index 6db769e..f6f511b 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java @@ -93,6 +93,11 @@ public class ThriftBytesWriteSupport extends WriteSupport<BytesWritable> { } @Override + public String getName() { + return "thrift"; + } + + @Override public WriteContext init(Configuration configuration) { if (this.protocolFactory == null) { try { http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java index e81caa8..a9864ff 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java @@ -59,6 +59,11 @@ public class ThriftWriteSupport<T extends TBase<?,?>> extends WriteSupport<T> { } @Override + public String getName() { + return writeSupport.getName(); + } + + @Override public WriteContext init(Configuration configuration) { return this.writeSupport.init(configuration); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java ---------------------------------------------------------------------- diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java index 53fc16d..b8add82 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java @@ -49,6 +49,11 @@ public class TupleToThriftWriteSupport extends WriteSupport<Tuple> { this.className = className; } + @Override + public String getName() { + return "thrift"; + } + @SuppressWarnings({"rawtypes", "unchecked"}) @Override public WriteContext init(Configuration configuration) {
