Repository: parquet-mr Updated Branches: refs/heads/master 6a4bbe94a -> 445cb9dc2
PARQUET-1215: Add getFooter to ParquetWriter. This adds getFooter to ParquetWriter, which will return the file footer that was written after the file is closed. Author: Ryan Blue <[email protected]> Closes #457 from rdblue/PARQUET-1215-add-footer-accessor-to-writers and squashes the following commits: 79c5965a1 [Ryan Blue] PARQUET-1215: Add getFooter to ParquetWriter. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/445cb9dc Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/445cb9dc Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/445cb9dc Branch: refs/heads/master Commit: 445cb9dc2f07553f8e1e5f7c1150f00fbb05c63f Parents: 6a4bbe9 Author: Ryan Blue <[email protected]> Authored: Thu Feb 15 09:07:29 2018 -0800 Committer: Ryan Blue <[email protected]> Committed: Thu Feb 15 09:07:29 2018 -0800 ---------------------------------------------------------------------- .../parquet/hadoop/InternalParquetRecordWriter.java | 5 +++++ .../java/org/apache/parquet/hadoop/ParquetFileWriter.java | 10 +++++++++- .../java/org/apache/parquet/hadoop/ParquetWriter.java | 8 ++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java index 2a221ac..d9e9b5e 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java @@ -32,6 +32,7 @@ import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.hadoop.CodecFactory.BytesCompressor; import org.apache.parquet.hadoop.api.WriteSupport; import org.apache.parquet.hadoop.api.WriteSupport.FinalizedWriteContext; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.ColumnIOFactory; import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.api.RecordConsumer; @@ -96,6 +97,10 @@ class InternalParquetRecordWriter<T> { initStore(); } + public ParquetMetadata getFooter() { + return parquetFileWriter.getFooter(); + } + private void initStore() { pageStore = new ColumnChunkPageWriteStore(compressor, schema, props.getAllocator()); columnStore = props.newColumnWriteStore(schema, pageStore); http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java index 285c2db..f94fd9c 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java @@ -121,6 +121,9 @@ public class ParquetFileWriter { private long currentChunkFirstDataPage; // set in startColumn (out.pos()) private long currentChunkDictionaryPageOffset; // set in writeDictionaryPage + // set when end is called + private ParquetMetadata footer = null; + /** * Captures the order in which methods should be called * @@ -670,7 +673,7 @@ public class ParquetFileWriter { public void end(Map<String, String> extraMetaData) throws IOException { state = state.end(); LOG.debug("{}: end", out.getPos()); - ParquetMetadata footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION), blocks); + this.footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION), blocks); serializeFooter(footer, out); out.close(); } @@ -684,6 +687,11 @@ public class ParquetFileWriter { out.write(MAGIC); } + public ParquetMetadata getFooter() { + Preconditions.checkState(state == STATE.ENDED, "Cannot return unfinished footer."); + return footer; + } + /** * Given a list of metadata files, merge them into a single ParquetMetadata * Requires that the schemas be compatible, and the extraMetadata be exactly equal. http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java index bdde70e..1908206 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java @@ -28,6 +28,7 @@ import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.column.ParquetProperties.WriterVersion; import org.apache.parquet.hadoop.api.WriteSupport; import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.hadoop.util.HadoopOutputFile; import org.apache.parquet.io.OutputFile; import org.apache.parquet.schema.MessageType; @@ -311,6 +312,13 @@ public class ParquetWriter<T> implements Closeable { } /** + * @return the ParquetMetadata written to the (closed) file. + */ + public ParquetMetadata getFooter() { + return writer.getFooter(); + } + + /** * @return the total size of data written to the file and buffered in memory */ public long getDataSize() {
