Repository: orc Updated Branches: refs/heads/master 221c85e07 -> 896dffc3e
ORC-305 - Add column statistics for the size on disk Fixes #255 Signed-off-by: Owen O'Malley <omal...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/orc/repo Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/896dffc3 Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/896dffc3 Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/896dffc3 Branch: refs/heads/master Commit: 896dffc3e5d9434e7f7428ad1d41d045eeda5459 Parents: 221c85e Author: Sandeep More <m...@apache.org> Authored: Wed Mar 21 09:46:19 2018 -0400 Committer: Owen O'Malley <omal...@apache.org> Committed: Wed Apr 25 09:42:15 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/orc/ColumnStatistics.java | 6 +++ .../src/java/org/apache/orc/PhysicalWriter.java | 7 +++ .../apache/orc/impl/ColumnStatisticsImpl.java | 28 ++++++++++- .../org/apache/orc/impl/PhysicalFsWriter.java | 26 ++++++++++ .../java/org/apache/orc/impl/WriterImpl.java | 16 +++++- .../orc/impl/writer/BinaryTreeWriter.java | 11 ++++- .../orc/impl/writer/BooleanTreeWriter.java | 7 ++- .../apache/orc/impl/writer/ByteTreeWriter.java | 7 ++- .../apache/orc/impl/writer/DateTreeWriter.java | 8 ++- .../orc/impl/writer/DecimalTreeWriter.java | 9 +++- .../orc/impl/writer/DoubleTreeWriter.java | 6 +++ .../apache/orc/impl/writer/FloatTreeWriter.java | 7 ++- .../orc/impl/writer/IntegerTreeWriter.java | 7 ++- .../apache/orc/impl/writer/ListTreeWriter.java | 8 ++- .../apache/orc/impl/writer/MapTreeWriter.java | 9 +++- .../orc/impl/writer/StringBaseTreeWriter.java | 51 ++++++++++++-------- .../orc/impl/writer/StructTreeWriter.java | 9 ++++ .../orc/impl/writer/TimestampTreeWriter.java | 9 +++- .../org/apache/orc/impl/writer/TreeWriter.java | 6 +++ .../apache/orc/impl/writer/TreeWriterBase.java | 38 ++++++++++----- .../apache/orc/impl/writer/UnionTreeWriter.java | 10 +++- .../apache/orc/impl/writer/WriterContext.java | 9 ++++ .../org/apache/orc/TestOrcNullOptimization.java | 12 ++--- .../test/org/apache/orc/TestVectorOrcFile.java | 12 ++--- .../java/org/apache/orc/tools/JsonFileDump.java | 3 ++ .../test/org/apache/orc/tools/TestFileDump.java | 5 +- .../resources/orc-file-dump-bloomfilter.out | 38 +++++++-------- .../resources/orc-file-dump-bloomfilter2.out | 38 +++++++-------- .../orc-file-dump-dictionary-threshold.out | 38 +++++++-------- .../tools/src/test/resources/orc-file-dump.json | 20 +++++++- java/tools/src/test/resources/orc-file-dump.out | 38 +++++++-------- .../src/test/resources/orc-file-has-null.out | 22 ++++----- proto/orc_proto.proto | 1 + 33 files changed, 368 insertions(+), 153 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/ColumnStatistics.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/ColumnStatistics.java b/java/core/src/java/org/apache/orc/ColumnStatistics.java index 72d8fbf..0f97061 100644 --- a/java/core/src/java/org/apache/orc/ColumnStatistics.java +++ b/java/core/src/java/org/apache/orc/ColumnStatistics.java @@ -33,4 +33,10 @@ public interface ColumnStatistics { * @return true if null present else false */ boolean hasNull(); + + /** + * Get the number of bytes for this column. + * @return the number of bytes + */ + long getBytesOnDisk(); } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/PhysicalWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/PhysicalWriter.java b/java/core/src/java/org/apache/orc/PhysicalWriter.java index 7589aa5..051688b 100644 --- a/java/core/src/java/org/apache/orc/PhysicalWriter.java +++ b/java/core/src/java/org/apache/orc/PhysicalWriter.java @@ -132,4 +132,11 @@ public interface PhysicalWriter { /** Gets a compression codec used by this writer. */ CompressionCodec getCompressionCodec(); + + /** + * Get the number of bytes for a file in a givem column. + * @param column column from which to get file size + * @return number of bytes for the given column + */ + long getFileBytes(int column); } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java index ec874d6..0cd69f4 100644 --- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java @@ -58,6 +58,9 @@ public class ColumnStatisticsImpl implements ColumnStatistics { if (hasNull != that.hasNull) { return false; } + if (bytesOnDisk != that.bytesOnDisk) { + return false; + } return true; } @@ -1257,12 +1260,15 @@ public class ColumnStatisticsImpl implements ColumnStatistics { private long count = 0; private boolean hasNull = false; + private long bytesOnDisk = 0; ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) { if (stats.hasNumberOfValues()) { count = stats.getNumberOfValues(); } + bytesOnDisk = stats.hasBytesOnDisk() ? stats.getBytesOnDisk() : 0; + if (stats.hasHasNull()) { hasNull = stats.getHasNull(); } else { @@ -1281,6 +1287,10 @@ public class ColumnStatisticsImpl implements ColumnStatistics { this.count += count; } + public void updateByteCount(long size) { + this.bytesOnDisk += size; + } + public void setNull() { hasNull = true; } @@ -1342,10 +1352,12 @@ public class ColumnStatisticsImpl implements ColumnStatistics { public void merge(ColumnStatisticsImpl stats) { count += stats.count; hasNull |= stats.hasNull; + bytesOnDisk += stats.bytesOnDisk; } public void reset() { count = 0; + bytesOnDisk = 0; hasNull = false; } @@ -1359,9 +1371,20 @@ public class ColumnStatisticsImpl implements ColumnStatistics { return hasNull; } + /** + * Get the number of bytes for this column. + * + * @return the number of bytes + */ + @Override + public long getBytesOnDisk() { + return bytesOnDisk; + } + @Override public String toString() { - return "count: " + count + " hasNull: " + hasNull; + return "count: " + count + " hasNull: " + hasNull + + (bytesOnDisk != 0 ? " bytesOnDisk: " + bytesOnDisk : ""); } public OrcProto.ColumnStatistics.Builder serialize() { @@ -1369,6 +1392,9 @@ public class ColumnStatisticsImpl implements ColumnStatistics { OrcProto.ColumnStatistics.newBuilder(); builder.setNumberOfValues(count); builder.setHasNull(hasNull); + if (bytesOnDisk != 0) { + builder.setBytesOnDisk(bytesOnDisk); + } return builder; } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java index 38ca40e..2521e6d 100644 --- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java +++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java @@ -104,6 +104,32 @@ public class PhysicalFsWriter implements PhysicalWriter { return codec; } + /** + * Get the number of bytes for a file in a given column + * by finding all the streams (not suppressed) + * for a given column and returning the sum of their sizes. + * excludes index + * + * @param column column from which to get file size + * @return number of bytes for the given column + */ + @Override + public long getFileBytes(final int column) { + long size = 0; + for (final Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) { + final BufferedStream receiver = pair.getValue(); + if(!receiver.isSuppressed) { + + final StreamName name = pair.getKey(); + if(name.getColumn() == column && name.getArea() != StreamName.Area.INDEX ) { + size += receiver.getOutputSize(); + } + } + + } + return size; + } + private void padStripe(long indexSize, long dataSize, int footerSize) throws IOException { this.stripeStart = rawWriter.getPos(); final long currentStripeSize = indexSize + dataSize + footerSize; http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/WriterImpl.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java index 90b410c..0ddd00a 100644 --- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java +++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java @@ -384,6 +384,16 @@ public class WriterImpl implements Writer, MemoryManager.Callback { return version; } + /** + * Get the PhysicalWriter. + * + * @return the file's physical writer. + */ + @Override + public PhysicalWriter getPhysicalWriter() { + return physicalWriter; + } + public OrcFile.BloomFilterVersion getBloomFilterVersion() { return bloomFilterVersion; } @@ -430,12 +440,16 @@ public class WriterImpl implements Writer, MemoryManager.Callback { } OrcProto.StripeStatistics.Builder stats = OrcProto.StripeStatistics.newBuilder(); + + treeWriter.flushStreams(); treeWriter.writeStripe(builder, stats, requiredIndexEntries); - fileMetadata.addStripeStats(stats.build()); + OrcProto.StripeInformation.Builder dirEntry = OrcProto.StripeInformation.newBuilder() .setNumberOfRows(rowsInStripe); physicalWriter.finalizeStripe(builder, dirEntry); + + fileMetadata.addStripeStats(stats.build()); stripes.add(dirEntry.build()); rowCount += rowsInStripe; rowsInStripe = 0; http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java index 5835b5a..14669c9 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java @@ -108,8 +108,6 @@ public class BinaryTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - stream.flush(); - length.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -134,4 +132,13 @@ public class BinaryTreeWriter extends TreeWriterBase { BinaryColumnStatistics bcs = (BinaryColumnStatistics) fileStatistics; return bcs.getSum(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + stream.flush(); + length.flush(); + } + + } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java index 5f572bd..744aaef 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java @@ -74,7 +74,6 @@ public class BooleanTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - writer.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -96,4 +95,10 @@ public class BooleanTreeWriter extends TreeWriterBase { long num = fileStatistics.getNumberOfValues(); return num * JavaDataModel.get().primitive1(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + writer.flush(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java index edd6411..a8dc059 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java @@ -84,7 +84,6 @@ public class ByteTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - writer.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -106,4 +105,10 @@ public class ByteTreeWriter extends TreeWriterBase { long num = fileStatistics.getNumberOfValues(); return num * JavaDataModel.get().primitive1(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + writer.flush(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java index d15fb13..209dd0e 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java @@ -88,7 +88,6 @@ public class DateTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - writer.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -121,4 +120,11 @@ public class DateTreeWriter extends TreeWriterBase { return fileStatistics.getNumberOfValues() * JavaDataModel.get().lengthOfDate(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + writer.flush(); + } + } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java index 5d88372..9b2f2f0 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java @@ -164,8 +164,6 @@ public class DecimalTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - valueStream.flush(); - scaleStream.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -189,4 +187,11 @@ public class DecimalTreeWriter extends TreeWriterBase { return fileStatistics.getNumberOfValues() * JavaDataModel.get().lengthOfDecimal(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + valueStream.flush(); + scaleStream.flush(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java index d2c0db2..84218ca 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java @@ -109,4 +109,10 @@ public class DoubleTreeWriter extends TreeWriterBase { long num = fileStatistics.getNumberOfValues(); return num * JavaDataModel.get().primitive2(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + stream.flush(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java index c825bf1..e4198a2 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java @@ -88,7 +88,6 @@ public class FloatTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - stream.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -110,4 +109,10 @@ public class FloatTreeWriter extends TreeWriterBase { long num = fileStatistics.getNumberOfValues(); return num * JavaDataModel.get().primitive1(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + stream.flush(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java index 6036ef5..dc0eaad 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java @@ -101,7 +101,6 @@ public class IntegerTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - writer.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -124,4 +123,10 @@ public class IntegerTreeWriter extends TreeWriterBase { long num = fileStatistics.getNumberOfValues(); return num * (isLong ? jdm.primitive2() : jdm.primitive1()); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + writer.flush(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java index 2b937fd..c6068cd 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java @@ -123,7 +123,6 @@ public class ListTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - lengths.flush(); childWriter.writeStripe(builder, stats, requiredIndexEntries); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); @@ -158,4 +157,11 @@ public class ListTreeWriter extends TreeWriterBase { super.writeFileStatistics(footer); childWriter.writeFileStatistics(footer); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + lengths.flush(); + childWriter.flushStreams(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java index 26ace05..91e5657 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java @@ -132,7 +132,6 @@ public class MapTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - lengths.flush(); keyWriter.writeStripe(builder, stats, requiredIndexEntries); valueWriter.writeStripe(builder, stats, requiredIndexEntries); if (rowIndexPosition != null) { @@ -170,4 +169,12 @@ public class MapTreeWriter extends TreeWriterBase { keyWriter.writeFileStatistics(footer); valueWriter.writeFileStatistics(footer); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + lengths.flush(); + keyWriter.flushStreams(); + valueWriter.flushStreams(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java index f49cb7f..be4e6dc 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java @@ -98,33 +98,16 @@ public abstract class StringBaseTreeWriter extends TreeWriterBase { public void writeStripe(OrcProto.StripeFooter.Builder builder, OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { - // if rows in stripe is less than dictionaryCheckAfterRows, dictionary - // checking would not have happened. So do it again here. - checkDictionaryEncoding(); - if (useDictionaryEncoding) { - flushDictionary(); - } else { - // flushout any left over entries from dictionary - if (rows.size() > 0) { - flushDictionary(); - } - - // suppress the stream for every stripe if dictionary is disabled + checkDictionaryEncoding(); + if (!useDictionaryEncoding) { stringOutput.suppress(); } // we need to build the rowindex before calling super, since it // writes it out. super.writeStripe(builder, stats, requiredIndexEntries); - if (useDictionaryEncoding) { - stringOutput.flush(); - lengthOutput.flush(); - rowOutput.flush(); - } else { - directStreamOutput.flush(); - lengthOutput.flush(); - } + // reset all of the fields to be ready for the next stripe. dictionary.clear(); savedRowIndex.clear(); @@ -285,4 +268,32 @@ public abstract class StringBaseTreeWriter extends TreeWriterBase { return numVals * JavaDataModel.get().lengthForStringOfLength(avgSize); } } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + // if rows in stripe is less than dictionaryCheckAfterRows, dictionary + // checking would not have happened. So do it again here. + checkDictionaryEncoding(); + + if (useDictionaryEncoding) { + flushDictionary(); + stringOutput.flush(); + lengthOutput.flush(); + rowOutput.flush(); + } else { + // flushout any left over entries from dictionary + if (rows.size() > 0) { + flushDictionary(); + } + + // suppress the stream for every stripe if dictionary is disabled + stringOutput.suppress(); + + directStreamOutput.flush(); + lengthOutput.flush(); + } + + } + } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java index 9a1384d..ee0b0c0 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java @@ -153,4 +153,13 @@ public class StructTreeWriter extends TreeWriterBase { child.writeFileStatistics(footer); } } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + for (TreeWriter child : childrenWriters) { + child.flushStreams(); + } + + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java index 1694ca1..a7bfc90 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java @@ -130,8 +130,6 @@ public class TimestampTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - seconds.flush(); - nanos.flush(); if (rowIndexPosition != null) { recordPosition(rowIndexPosition); } @@ -171,4 +169,11 @@ public class TimestampTreeWriter extends TreeWriterBase { return fileStatistics.getNumberOfValues() * JavaDataModel.get().lengthOfTimestamp(); } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + seconds.flush(); + nanos.flush(); + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java index ea4e0e6..b1a6bec 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java @@ -70,6 +70,12 @@ public interface TreeWriter { void createRowIndexEntry() throws IOException; /** + * Flush the TreeWriter stream + * @throws IOException + */ + void flushStreams() throws IOException; + + /** * Write the stripe out to the file. * @param stripeFooter the stripe footer that contains the information about the * layout of the stripe. The TreeWriterBase is required to update http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java index bde4eb9..74ef3cc 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java +++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java @@ -229,23 +229,34 @@ public abstract class TreeWriterBase implements TreeWriter { } } - public void writeStripe(OrcProto.StripeFooter.Builder builder, - OrcProto.StripeStatistics.Builder stats, - int requiredIndexEntries) throws IOException { + @Override + public void flushStreams() throws IOException { + if (isPresent != null) { isPresent.flush(); + } - // if no nulls are found in a stream, then suppress the stream - if(!foundNulls) { - isPresentOutStream.suppress(); - // since isPresent bitstream is suppressed, update the index to - // remove the positions of the isPresent stream - if (rowIndex != null) { - removeIsPresentPositions(); - } + } + + @Override + public void writeStripe(OrcProto.StripeFooter.Builder builder, + OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { + + // if no nulls are found in a stream, then suppress the stream + if (isPresent != null && !foundNulls) { + isPresentOutStream.suppress(); + // since isPresent bitstream is suppressed, update the index to + // remove the positions of the isPresent stream + if (rowIndex != null) { + removeIsPresentPositions(); } + } + /* Update byte count */ + final long byteCount = streamFactory.getPhysicalWriter().getFileBytes(id); + stripeColStatistics.updateByteCount(byteCount); + // merge stripe-level column statistics to file statistics and write it to // stripe statistics fileStatistics.merge(stripeColStatistics); @@ -259,8 +270,8 @@ public abstract class TreeWriterBase implements TreeWriter { if (rowIndex != null) { if (rowIndex.getEntryCount() != requiredIndexEntries) { throw new IllegalArgumentException("Column has wrong number of " + - "index entries found: " + rowIndex.getEntryCount() + " expected: " + - requiredIndexEntries); + "index entries found: " + rowIndex.getEntryCount() + " expected: " + + requiredIndexEntries); } streamFactory.writeIndex(new StreamName(id, OrcProto.Stream.Kind.ROW_INDEX), rowIndex); rowIndex.clear(); @@ -279,6 +290,7 @@ public abstract class TreeWriterBase implements TreeWriter { OrcProto.Stream.Kind.BLOOM_FILTER_UTF8), bloomFilterIndexUtf8); bloomFilterIndexUtf8.clear(); } + } /** http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java index 6be2669..54a9a3a 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java @@ -124,7 +124,6 @@ public class UnionTreeWriter extends TreeWriterBase { OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) throws IOException { super.writeStripe(builder, stats, requiredIndexEntries); - tags.flush(); for (TreeWriter child : childrenWriters) { child.writeStripe(builder, stats, requiredIndexEntries); } @@ -172,4 +171,13 @@ public class UnionTreeWriter extends TreeWriterBase { child.writeFileStatistics(footer); } } + + @Override + public void flushStreams() throws IOException { + super.flushStreams(); + tags.flush(); + for (TreeWriter child : childrenWriters) { + child.flushStreams(); + } + } } http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java index f11d519..e32c683 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java +++ b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java @@ -21,6 +21,7 @@ package org.apache.orc.impl.writer; import org.apache.hadoop.conf.Configuration; import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; +import org.apache.orc.PhysicalWriter; import org.apache.orc.impl.OutStream; import org.apache.orc.impl.StreamName; @@ -84,6 +85,14 @@ public interface WriterContext { */ OrcFile.Version getVersion(); + /** + * Get the PhysicalWriter. + * + * @return the file's physical writer. + */ + PhysicalWriter getPhysicalWriter(); + + OrcFile.BloomFilterVersion getBloomFilterVersion(); void writeIndex(StreamName name, http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java index 45b69b2..de22301 100644 --- a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java +++ b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java @@ -150,13 +150,13 @@ public class TestOrcNullOptimization { assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum()); assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined()); assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum()); - assertEquals("count: 19998 hasNull: true min: 0 max: 0 sum: 0", + assertEquals("count: 19998 hasNull: true bytesOnDisk: 184 min: 0 max: 0 sum: 0", stats[1].toString()); assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum()); assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum()); assertEquals(19998, stats[2].getNumberOfValues()); - assertEquals("count: 19998 hasNull: true min: a max: a sum: 19998", + assertEquals("count: 19998 hasNull: true bytesOnDisk: 200 min: a max: a sum: 19998", stats[2].toString()); // check the inspectors @@ -265,13 +265,13 @@ public class TestOrcNullOptimization { assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum()); assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined()); assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum()); - assertEquals("count: 20000 hasNull: false min: 0 max: 0 sum: 0", + assertEquals("count: 20000 hasNull: false bytesOnDisk: 160 min: 0 max: 0 sum: 0", stats[1].toString()); assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum()); assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum()); assertEquals(20000, stats[2].getNumberOfValues()); - assertEquals("count: 20000 hasNull: false min: a max: b sum: 20000", + assertEquals("count: 20000 hasNull: false bytesOnDisk: 180 min: a max: b sum: 20000", stats[2].toString()); // check the inspectors @@ -359,13 +359,13 @@ public class TestOrcNullOptimization { assertEquals(2, ((IntegerColumnStatistics) stats[1]).getMinimum()); assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined()); assertEquals(17, ((IntegerColumnStatistics) stats[1]).getSum()); - assertEquals("count: 7 hasNull: true min: 2 max: 3 sum: 17", + assertEquals("count: 7 hasNull: true bytesOnDisk: 12 min: 2 max: 3 sum: 17", stats[1].toString()); assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum()); assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum()); assertEquals(7, stats[2].getNumberOfValues()); - assertEquals("count: 7 hasNull: true min: a max: h sum: 7", + assertEquals("count: 7 hasNull: true bytesOnDisk: 20 min: a max: h sum: 7", stats[2].toString()); // check the inspectors http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/test/org/apache/orc/TestVectorOrcFile.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index f8ed256..fdf20a4 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -450,13 +450,13 @@ public class TestVectorOrcFile { assertEquals(3, stats[1].getNumberOfValues()); assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum()); - assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString()); + assertEquals("count: 3 hasNull: true bytesOnDisk: 28 sum: 15", stats[1].toString()); assertEquals(3, stats[2].getNumberOfValues()); assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum()); assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum()); assertEquals(8, ((StringColumnStatistics) stats[2]).getSum()); - assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8", + assertEquals("count: 3 hasNull: true bytesOnDisk: 22 min: bar max: hi sum: 8", stats[2].toString()); // check the inspectors @@ -1034,13 +1034,13 @@ public class TestVectorOrcFile { assertEquals(2, stats[1].getNumberOfValues()); assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount()); assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount()); - assertEquals("count: 2 hasNull: false true: 1", stats[1].toString()); + assertEquals("count: 2 hasNull: false bytesOnDisk: 5 true: 1", stats[1].toString()); assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum()); assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum()); assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined()); assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum()); - assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072", + assertEquals("count: 2 hasNull: false bytesOnDisk: 9 min: 1024 max: 2048 sum: 3072", stats[3].toString()); StripeStatistics ss = reader.getStripeStatistics().get(0); @@ -1052,10 +1052,10 @@ public class TestVectorOrcFile { assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum(), 0.0001); assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum(), 0.0001); assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001); - assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0", + assertEquals("count: 2 hasNull: false bytesOnDisk: 15 min: -15.0 max: -5.0 sum: -20.0", stats[7].toString()); - assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", stats[9].toString()); + assertEquals("count: 2 hasNull: false bytesOnDisk: 14 min: bye max: hi sum: 5", stats[9].toString()); // check the schema TypeDescription readerSchema = reader.getSchema(); http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java ---------------------------------------------------------------------- diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java index 4ea9463..e5f3b94 100644 --- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java +++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java @@ -295,6 +295,9 @@ public class JsonFileDump { if (cs != null) { writer.key("count").value(cs.getNumberOfValues()); writer.key("hasNull").value(cs.hasNull()); + if (cs.getBytesOnDisk() != 0) { + writer.key("bytesOnDisk").value(cs.getBytesOnDisk()); + } if (cs instanceof BinaryColumnStatistics) { writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum()); writer.key("type").value(OrcProto.Type.Kind.BINARY); http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/org/apache/orc/tools/TestFileDump.java ---------------------------------------------------------------------- diff --git a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java index 9e21fad..bfb073c 100644 --- a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java +++ b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java @@ -19,7 +19,6 @@ package org.apache.orc.tools; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; import static org.junit.Assume.assumeTrue; import java.io.BufferedReader; @@ -29,7 +28,6 @@ import java.io.FileOutputStream; import java.io.FileReader; import java.io.PrintStream; import java.nio.charset.StandardCharsets; -import java.sql.Date; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -37,12 +35,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; -import java.util.TimeZone; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -294,6 +290,7 @@ public class TestFileDump { m, Arrays.asList(100, 200), 10, "foo"); + m.clear(); m.put("k3", "v3"); appendAllTypes( http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump-bloomfilter.out ---------------------------------------------------------------------- diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out index 5775500..2a20a71 100644 --- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out +++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out @@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string> Stripe Statistics: Stripe 1: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 - Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146021688 max: 2147223299 sum: 515792826 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9218592812243954469 max: 9221614132680747961 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3701 min: Darkness, max: worst sum: 19280 Stripe 2: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 - Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146733128 max: 2147001622 sum: 7673427 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9220818777591257749 max: 9222259462014003839 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3690 min: Darkness, max: worst sum: 19504 Stripe 3: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 - Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146993718 max: 2147378179 sum: 132660742551 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9218342074710552826 max: 9222303228623055266 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3702 min: Darkness, max: worst sum: 19641 Stripe 4: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 - Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146658006 max: 2145520931 sum: 8533549236 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9222758097219661129 max: 9221043130193737406 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3732 min: Darkness, max: worst sum: 19470 Stripe 5: Column 0: count: 1000 hasNull: false - Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 - Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 - Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 + Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2146245500 max: 2146378640 sum: 51299706363 + Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: -9208193203370316142 max: 9218567213558056476 + Column 3: count: 1000 hasNull: false bytesOnDisk: 926 min: Darkness, max: worst sum: 3866 File Statistics: Column 0: count: 21000 hasNull: false - Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 - Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 - Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 + Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2146993718 max: 2147378179 sum: 193017464403 + Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: -9222758097219661129 max: 9222303228623055266 + Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: worst sum: 81761 Stripes: Stripe: offset: 3 data: 63786 rows: 5000 tail: 87 index: 749 @@ -172,7 +172,7 @@ Stripes: Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 -File length: 272452 bytes +File length: 272503 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out ---------------------------------------------------------------------- diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out index 8afddae..c4fa8bf 100644 --- a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out +++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out @@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string> Stripe Statistics: Stripe 1: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 - Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146021688 max: 2147223299 sum: 515792826 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9218592812243954469 max: 9221614132680747961 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3701 min: Darkness, max: worst sum: 19280 Stripe 2: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 - Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146733128 max: 2147001622 sum: 7673427 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9220818777591257749 max: 9222259462014003839 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3690 min: Darkness, max: worst sum: 19504 Stripe 3: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 - Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146993718 max: 2147378179 sum: 132660742551 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9218342074710552826 max: 9222303228623055266 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3702 min: Darkness, max: worst sum: 19641 Stripe 4: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 - Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146658006 max: 2145520931 sum: 8533549236 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9222758097219661129 max: 9221043130193737406 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3732 min: Darkness, max: worst sum: 19470 Stripe 5: Column 0: count: 1000 hasNull: false - Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 - Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 - Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 + Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2146245500 max: 2146378640 sum: 51299706363 + Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: -9208193203370316142 max: 9218567213558056476 + Column 3: count: 1000 hasNull: false bytesOnDisk: 926 min: Darkness, max: worst sum: 3866 File Statistics: Column 0: count: 21000 hasNull: false - Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 - Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 - Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 + Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2146993718 max: 2147378179 sum: 193017464403 + Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: -9222758097219661129 max: 9222303228623055266 + Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: worst sum: 81761 Stripes: Stripe: offset: 3 data: 63786 rows: 5000 tail: 108 index: 14949 @@ -187,7 +187,7 @@ Stripes: Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 -File length: 332513 bytes +File length: 332564 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out ---------------------------------------------------------------------- diff --git a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out index 5989250..9b9dbef 100644 --- a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out +++ b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out @@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string> Stripe Statistics: Stripe 1: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2147115959 max: 2145911404 sum: 159677169195 - Column 2: count: 5000 hasNull: false min: -9216505819108477308 max: 9217851628057711416 - Column 3: count: 5000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 381254 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2147115959 max: 2145911404 sum: 159677169195 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9216505819108477308 max: 9217851628057711416 + Column 3: count: 5000 hasNull: false bytesOnDisk: 103500 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744 sum: 381254 Stripe 2: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2147390285 max: 2147224606 sum: -14961457759 - Column 2: count: 5000 hasNull: false min: -9222178666167296739 max: 9221301751385928177 - Column 3: count: 5000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 1117994 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2147390285 max: 2147224606 sum: -14961457759 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9222178666167296739 max: 9221301751385928177 + Column 3: count: 5000 hasNull: false bytesOnDisk: 308247 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938 sum: 1117994 Stripe 3: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2145842720 max: 2146718321 sum: 141092475520 - Column 2: count: 5000 hasNull: false min: -9221963099397084326 max: 9222722740629726770 - Column 3: count: 5000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 1925226 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2145842720 max: 2146718321 sum: 141092475520 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9221963099397084326 max: 9222722740629726770 + Column 3: count: 5000 hasNull: false bytesOnDisk: 545986 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974 sum: 1925226 Stripe 4: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2145378214 max: 2147453086 sum: -153680004530 - Column 2: count: 5000 hasNull: false min: -9222731174895935707 max: 9222919052987871506 - Column 3: count: 5000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836- 11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 2815002 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2145378214 max: 2147453086 sum: -153680004530 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9222731174895935707 max: 9222919052987871506 + Column 3: count: 5000 hasNull: false bytesOnDisk: 803877 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-1115 8-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904 sum: 2815002 Stripe 5: Column 0: count: 1000 hasNull: false - Column 1: count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: -22999664100 - Column 2: count: 1000 hasNull: false min: -9212379634781416464 max: 9197412874152820822 - Column 3: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7 798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 + Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2143595397 max: 2136858458 sum: -22999664100 + Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: -9212379634781416464 max: 9197412874152820822 + Column 3: count: 1000 hasNull: false bytesOnDisk: 195265 min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7 318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 File Statistics: Column 0: count: 21000 hasNull: false - Column 1: count: 21000 hasNull: false min: -2147390285 max: 2147453086 sum: 109128518326 - Column 2: count: 21000 hasNull: false min: -9222731174895935707 max: 9222919052987871506 - Column 3: count: 21000 hasNull: false min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 6910238 + Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2147390285 max: 2147453086 sum: 109128518326 + Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: -9222731174895935707 max: 9222919052987871506 + Column 3: count: 21000 hasNull: false bytesOnDisk: 1956875 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 6910238 Stripes: Stripe: offset: 3 data: 163585 rows: 5000 tail: 68 index: 720 @@ -183,7 +183,7 @@ Stripes: Row group indices for column 3: Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780- 7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0 -File length: 2217614 bytes +File length: 2217710 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump.json ---------------------------------------------------------------------- diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json index 81c96df..72476dd 100644 --- a/java/tools/src/test/resources/orc-file-dump.json +++ b/java/tools/src/test/resources/orc-file-dump.json @@ -47,6 +47,7 @@ "columnId": 1, "count": 5000, "hasNull": false, + "bytesOnDisk": 20035, "min": -2147115959, "max": 2145210552, "sum": 50111854553, @@ -56,6 +57,7 @@ "columnId": 2, "count": 5000, "hasNull": false, + "bytesOnDisk": 40050, "min": -9223180583305557329, "max": 9221614132680747961, "type": "LONG" @@ -64,6 +66,7 @@ "columnId": 3, "count": 4950, "hasNull": true, + "bytesOnDisk": 3685, "min": "Darkness,", "max": "worst", "totalLength": 19283, @@ -83,6 +86,7 @@ "columnId": 1, "count": 5000, "hasNull": false, + "bytesOnDisk": 20035, "min": -2147390285, "max": 2147224606, "sum": -22290798217, @@ -92,6 +96,7 @@ "columnId": 2, "count": 5000, "hasNull": false, + "bytesOnDisk": 40050, "min": -9219295160509160427, "max": 9217571024994660020, "type": "LONG" @@ -100,6 +105,7 @@ "columnId": 3, "count": 4950, "hasNull": true, + "bytesOnDisk": 3678, "min": "Darkness,", "max": "worst", "totalLength": 19397, @@ -119,6 +125,7 @@ "columnId": 1, "count": 5000, "hasNull": false, + "bytesOnDisk": 20035, "min": -2146954065, "max": 2146722468, "sum": 20639652136, @@ -128,6 +135,7 @@ "columnId": 2, "count": 5000, "hasNull": false, + "bytesOnDisk": 40050, "min": -9214076359988107846, "max": 9222919052987871506, "type": "LONG" @@ -136,6 +144,7 @@ "columnId": 3, "count": 4950, "hasNull": true, + "bytesOnDisk": 3685, "min": "Darkness,", "max": "worst", "totalLength": 19031, @@ -155,6 +164,7 @@ "columnId": 1, "count": 5000, "hasNull": false, + "bytesOnDisk": 20035, "min": -2146969085, "max": 2146025044, "sum": -5156814387, @@ -164,6 +174,7 @@ "columnId": 2, "count": 5000, "hasNull": false, + "bytesOnDisk": 40050, "min": -9222731174895935707, "max": 9220625004936875965, "type": "LONG" @@ -172,6 +183,7 @@ "columnId": 3, "count": 4950, "hasNull": true, + "bytesOnDisk": 3671, "min": "Darkness,", "max": "worst", "totalLength": 19459, @@ -191,6 +203,7 @@ "columnId": 1, "count": 1000, "hasNull": false, + "bytesOnDisk": 4007, "min": -2144303438, "max": 2127599049, "sum": 62841564778, @@ -200,6 +213,7 @@ "columnId": 2, "count": 1000, "hasNull": false, + "bytesOnDisk": 8010, "min": -9195133638801798919, "max": 9218626063131504414, "type": "LONG" @@ -208,6 +222,7 @@ "columnId": 3, "count": 990, "hasNull": true, + "bytesOnDisk": 926, "min": "Darkness,", "max": "worst", "totalLength": 3963, @@ -226,6 +241,7 @@ "columnId": 1, "count": 21000, "hasNull": false, + "bytesOnDisk": 84147, "min": -2147390285, "max": 2147224606, "sum": 106145458863, @@ -235,6 +251,7 @@ "columnId": 2, "count": 21000, "hasNull": false, + "bytesOnDisk": 168210, "min": -9223180583305557329, "max": 9222919052987871506, "type": "LONG" @@ -243,6 +260,7 @@ "columnId": 3, "count": 20790, "hasNull": true, + "bytesOnDisk": 15645, "min": "Darkness,", "max": "worst", "totalLength": 81133, @@ -1348,7 +1366,7 @@ }] } ], - "fileLength": 272436, + "fileLength": 272486, "paddingLength": 0, "paddingRatio": 0, "status": "OK" http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump.out ---------------------------------------------------------------------- diff --git a/java/tools/src/test/resources/orc-file-dump.out b/java/tools/src/test/resources/orc-file-dump.out index f03a6f2..2ae99ce 100644 --- a/java/tools/src/test/resources/orc-file-dump.out +++ b/java/tools/src/test/resources/orc-file-dump.out @@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string> Stripe Statistics: Stripe 1: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 - Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146021688 max: 2147223299 sum: 515792826 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9218592812243954469 max: 9221614132680747961 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3701 min: Darkness, max: worst sum: 19280 Stripe 2: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 - Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146733128 max: 2147001622 sum: 7673427 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9220818777591257749 max: 9222259462014003839 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3690 min: Darkness, max: worst sum: 19504 Stripe 3: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 - Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146993718 max: 2147378179 sum: 132660742551 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9218342074710552826 max: 9222303228623055266 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3702 min: Darkness, max: worst sum: 19641 Stripe 4: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 - Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 - Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 + Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146658006 max: 2145520931 sum: 8533549236 + Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: -9222758097219661129 max: 9221043130193737406 + Column 3: count: 5000 hasNull: false bytesOnDisk: 3732 min: Darkness, max: worst sum: 19470 Stripe 5: Column 0: count: 1000 hasNull: false - Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 - Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 - Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 + Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2146245500 max: 2146378640 sum: 51299706363 + Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: -9208193203370316142 max: 9218567213558056476 + Column 3: count: 1000 hasNull: false bytesOnDisk: 926 min: Darkness, max: worst sum: 3866 File Statistics: Column 0: count: 21000 hasNull: false - Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 - Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 - Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 + Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2146993718 max: 2147378179 sum: 193017464403 + Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: -9222758097219661129 max: 9222303228623055266 + Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: worst sum: 81761 Stripes: Stripe: offset: 3 data: 63786 rows: 5000 tail: 79 index: 439 @@ -188,7 +188,7 @@ Stripes: Row group indices for column 3: Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 -File length: 270996 bytes +File length: 271047 bytes Padding length: 0 bytes Padding ratio: 0% http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-has-null.out ---------------------------------------------------------------------- diff --git a/java/tools/src/test/resources/orc-file-has-null.out b/java/tools/src/test/resources/orc-file-has-null.out index d7e78f7..ed963dd 100644 --- a/java/tools/src/test/resources/orc-file-has-null.out +++ b/java/tools/src/test/resources/orc-file-has-null.out @@ -8,25 +8,25 @@ Type: struct<bytes1:binary,string1:string> Stripe Statistics: Stripe 1: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 2000 hasNull: true min: RG1 max: RG3 sum: 6000 + Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000 + Column 2: count: 2000 hasNull: true bytesOnDisk: 46 min: RG1 max: RG3 sum: 6000 Stripe 2: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 0 hasNull: true + Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000 + Column 2: count: 0 hasNull: true bytesOnDisk: 11 Stripe 3: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 5000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 40000 + Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000 + Column 2: count: 5000 hasNull: false bytesOnDisk: 32 min: STRIPE-3 max: STRIPE-3 sum: 40000 Stripe 4: Column 0: count: 5000 hasNull: false - Column 1: count: 5000 hasNull: false sum: 15000 - Column 2: count: 0 hasNull: true + Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000 + Column 2: count: 0 hasNull: true bytesOnDisk: 11 File Statistics: Column 0: count: 20000 hasNull: false - Column 1: count: 20000 hasNull: false sum: 60000 - Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000 + Column 1: count: 20000 hasNull: false bytesOnDisk: 696 sum: 60000 + Column 2: count: 7000 hasNull: true bytesOnDisk: 100 min: RG1 max: STRIPE-3 sum: 46000 Stripes: Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154 @@ -105,7 +105,7 @@ Stripes: Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0 Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0 -File length: 1825 bytes +File length: 1842 bytes Padding length: 0 bytes Padding ratio: 0% ________________________________________________________________________________________________________________________ http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/proto/orc_proto.proto ---------------------------------------------------------------------- diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto index e6e797f..f92e531 100644 --- a/proto/orc_proto.proto +++ b/proto/orc_proto.proto @@ -81,6 +81,7 @@ message ColumnStatistics { optional BinaryStatistics binaryStatistics = 8; optional TimestampStatistics timestampStatistics = 9; optional bool hasNull = 10; + optional uint64 bytesOnDisk = 11; } message RowIndexEntry {