Repository: orc
Updated Branches:
  refs/heads/master 221c85e07 -> 896dffc3e


ORC-305 - Add column statistics for the size on disk

Fixes #255

Signed-off-by: Owen O'Malley <omal...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/896dffc3
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/896dffc3
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/896dffc3

Branch: refs/heads/master
Commit: 896dffc3e5d9434e7f7428ad1d41d045eeda5459
Parents: 221c85e
Author: Sandeep More <m...@apache.org>
Authored: Wed Mar 21 09:46:19 2018 -0400
Committer: Owen O'Malley <omal...@apache.org>
Committed: Wed Apr 25 09:42:15 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/orc/ColumnStatistics.java   |  6 +++
 .../src/java/org/apache/orc/PhysicalWriter.java |  7 +++
 .../apache/orc/impl/ColumnStatisticsImpl.java   | 28 ++++++++++-
 .../org/apache/orc/impl/PhysicalFsWriter.java   | 26 ++++++++++
 .../java/org/apache/orc/impl/WriterImpl.java    | 16 +++++-
 .../orc/impl/writer/BinaryTreeWriter.java       | 11 ++++-
 .../orc/impl/writer/BooleanTreeWriter.java      |  7 ++-
 .../apache/orc/impl/writer/ByteTreeWriter.java  |  7 ++-
 .../apache/orc/impl/writer/DateTreeWriter.java  |  8 ++-
 .../orc/impl/writer/DecimalTreeWriter.java      |  9 +++-
 .../orc/impl/writer/DoubleTreeWriter.java       |  6 +++
 .../apache/orc/impl/writer/FloatTreeWriter.java |  7 ++-
 .../orc/impl/writer/IntegerTreeWriter.java      |  7 ++-
 .../apache/orc/impl/writer/ListTreeWriter.java  |  8 ++-
 .../apache/orc/impl/writer/MapTreeWriter.java   |  9 +++-
 .../orc/impl/writer/StringBaseTreeWriter.java   | 51 ++++++++++++--------
 .../orc/impl/writer/StructTreeWriter.java       |  9 ++++
 .../orc/impl/writer/TimestampTreeWriter.java    |  9 +++-
 .../org/apache/orc/impl/writer/TreeWriter.java  |  6 +++
 .../apache/orc/impl/writer/TreeWriterBase.java  | 38 ++++++++++-----
 .../apache/orc/impl/writer/UnionTreeWriter.java | 10 +++-
 .../apache/orc/impl/writer/WriterContext.java   |  9 ++++
 .../org/apache/orc/TestOrcNullOptimization.java | 12 ++---
 .../test/org/apache/orc/TestVectorOrcFile.java  | 12 ++---
 .../java/org/apache/orc/tools/JsonFileDump.java |  3 ++
 .../test/org/apache/orc/tools/TestFileDump.java |  5 +-
 .../resources/orc-file-dump-bloomfilter.out     | 38 +++++++--------
 .../resources/orc-file-dump-bloomfilter2.out    | 38 +++++++--------
 .../orc-file-dump-dictionary-threshold.out      | 38 +++++++--------
 .../tools/src/test/resources/orc-file-dump.json | 20 +++++++-
 java/tools/src/test/resources/orc-file-dump.out | 38 +++++++--------
 .../src/test/resources/orc-file-has-null.out    | 22 ++++-----
 proto/orc_proto.proto                           |  1 +
 33 files changed, 368 insertions(+), 153 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/ColumnStatistics.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/ColumnStatistics.java 
b/java/core/src/java/org/apache/orc/ColumnStatistics.java
index 72d8fbf..0f97061 100644
--- a/java/core/src/java/org/apache/orc/ColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/ColumnStatistics.java
@@ -33,4 +33,10 @@ public interface ColumnStatistics {
    * @return true if null present else false
    */
   boolean hasNull();
+
+  /**
+   * Get the number of bytes for this column.
+   * @return the number of bytes
+   */
+  long getBytesOnDisk();
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/PhysicalWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/PhysicalWriter.java 
b/java/core/src/java/org/apache/orc/PhysicalWriter.java
index 7589aa5..051688b 100644
--- a/java/core/src/java/org/apache/orc/PhysicalWriter.java
+++ b/java/core/src/java/org/apache/orc/PhysicalWriter.java
@@ -132,4 +132,11 @@ public interface PhysicalWriter {
 
   /** Gets a compression codec used by this writer. */
   CompressionCodec getCompressionCodec();
+
+  /**
+   * Get the number of bytes for a file in a givem column.
+   * @param column column from which to get file size
+   * @return number of bytes for the given column
+   */
+  long getFileBytes(int column);
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java 
b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index ec874d6..0cd69f4 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -58,6 +58,9 @@ public class ColumnStatisticsImpl implements ColumnStatistics 
{
     if (hasNull != that.hasNull) {
       return false;
     }
+    if (bytesOnDisk != that.bytesOnDisk) {
+      return false;
+    }
 
     return true;
   }
@@ -1257,12 +1260,15 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
 
   private long count = 0;
   private boolean hasNull = false;
+  private long bytesOnDisk = 0;
 
   ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
     if (stats.hasNumberOfValues()) {
       count = stats.getNumberOfValues();
     }
 
+    bytesOnDisk = stats.hasBytesOnDisk() ? stats.getBytesOnDisk() : 0;
+
     if (stats.hasHasNull()) {
       hasNull = stats.getHasNull();
     } else {
@@ -1281,6 +1287,10 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
     this.count += count;
   }
 
+  public void updateByteCount(long size) {
+    this.bytesOnDisk += size;
+  }
+
   public void setNull() {
     hasNull = true;
   }
@@ -1342,10 +1352,12 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
   public void merge(ColumnStatisticsImpl stats) {
     count += stats.count;
     hasNull |= stats.hasNull;
+    bytesOnDisk += stats.bytesOnDisk;
   }
 
   public void reset() {
     count = 0;
+    bytesOnDisk = 0;
     hasNull = false;
   }
 
@@ -1359,9 +1371,20 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
     return hasNull;
   }
 
+  /**
+   * Get the number of bytes for this column.
+   *
+   * @return the number of bytes
+   */
+  @Override
+  public long getBytesOnDisk() {
+    return bytesOnDisk;
+  }
+
   @Override
   public String toString() {
-    return "count: " + count + " hasNull: " + hasNull;
+    return "count: " + count + " hasNull: " + hasNull +
+        (bytesOnDisk != 0 ? " bytesOnDisk: " + bytesOnDisk : "");
   }
 
   public OrcProto.ColumnStatistics.Builder serialize() {
@@ -1369,6 +1392,9 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
       OrcProto.ColumnStatistics.newBuilder();
     builder.setNumberOfValues(count);
     builder.setHasNull(hasNull);
+    if (bytesOnDisk != 0) {
+      builder.setBytesOnDisk(bytesOnDisk);
+    }
     return builder;
   }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java 
b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
index 38ca40e..2521e6d 100644
--- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
@@ -104,6 +104,32 @@ public class PhysicalFsWriter implements PhysicalWriter {
     return codec;
   }
 
+  /**
+   * Get the number of bytes for a file in a given column
+   * by finding all the streams (not suppressed)
+   * for a given column and returning the sum of their sizes.
+   * excludes index
+   *
+   * @param column column from which to get file size
+   * @return number of bytes for the given column
+   */
+  @Override
+  public long getFileBytes(final int column) {
+    long size = 0;
+    for (final Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) 
{
+      final BufferedStream receiver = pair.getValue();
+      if(!receiver.isSuppressed) {
+
+        final StreamName name = pair.getKey();
+        if(name.getColumn() == column && name.getArea() != 
StreamName.Area.INDEX ) {
+          size += receiver.getOutputSize();
+        }
+      }
+
+    }
+    return size;
+  }
+
   private void padStripe(long indexSize, long dataSize, int footerSize) throws 
IOException {
     this.stripeStart = rawWriter.getPos();
     final long currentStripeSize = indexSize + dataSize + footerSize;

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java 
b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index 90b410c..0ddd00a 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -384,6 +384,16 @@ public class WriterImpl implements Writer, 
MemoryManager.Callback {
       return version;
     }
 
+    /**
+     * Get the PhysicalWriter.
+     *
+     * @return the file's physical writer.
+     */
+    @Override
+    public PhysicalWriter getPhysicalWriter() {
+      return physicalWriter;
+    }
+
     public OrcFile.BloomFilterVersion getBloomFilterVersion() {
       return bloomFilterVersion;
     }
@@ -430,12 +440,16 @@ public class WriterImpl implements Writer, 
MemoryManager.Callback {
       }
       OrcProto.StripeStatistics.Builder stats =
           OrcProto.StripeStatistics.newBuilder();
+
+      treeWriter.flushStreams();
       treeWriter.writeStripe(builder, stats, requiredIndexEntries);
-      fileMetadata.addStripeStats(stats.build());
+
       OrcProto.StripeInformation.Builder dirEntry =
           OrcProto.StripeInformation.newBuilder()
               .setNumberOfRows(rowsInStripe);
       physicalWriter.finalizeStripe(builder, dirEntry);
+
+      fileMetadata.addStripeStats(stats.build());
       stripes.add(dirEntry.build());
       rowCount += rowsInStripe;
       rowsInStripe = 0;

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java
index 5835b5a..14669c9 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/BinaryTreeWriter.java
@@ -108,8 +108,6 @@ public class BinaryTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    stream.flush();
-    length.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -134,4 +132,13 @@ public class BinaryTreeWriter extends TreeWriterBase {
     BinaryColumnStatistics bcs = (BinaryColumnStatistics) fileStatistics;
     return bcs.getSum();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    stream.flush();
+    length.flush();
+  }
+
+
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java
index 5f572bd..744aaef 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/BooleanTreeWriter.java
@@ -74,7 +74,6 @@ public class BooleanTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    writer.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -96,4 +95,10 @@ public class BooleanTreeWriter extends TreeWriterBase {
     long num = fileStatistics.getNumberOfValues();
     return num * JavaDataModel.get().primitive1();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    writer.flush();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java
index edd6411..a8dc059 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/ByteTreeWriter.java
@@ -84,7 +84,6 @@ public class ByteTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    writer.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -106,4 +105,10 @@ public class ByteTreeWriter extends TreeWriterBase {
     long num = fileStatistics.getNumberOfValues();
     return num * JavaDataModel.get().primitive1();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    writer.flush();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java
index d15fb13..209dd0e 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/DateTreeWriter.java
@@ -88,7 +88,6 @@ public class DateTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    writer.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -121,4 +120,11 @@ public class DateTreeWriter extends TreeWriterBase {
     return fileStatistics.getNumberOfValues() *
         JavaDataModel.get().lengthOfDate();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    writer.flush();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
index 5d88372..9b2f2f0 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
@@ -164,8 +164,6 @@ public class DecimalTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    valueStream.flush();
-    scaleStream.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -189,4 +187,11 @@ public class DecimalTreeWriter extends TreeWriterBase {
     return fileStatistics.getNumberOfValues() *
         JavaDataModel.get().lengthOfDecimal();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    valueStream.flush();
+    scaleStream.flush();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java
index d2c0db2..84218ca 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/DoubleTreeWriter.java
@@ -109,4 +109,10 @@ public class DoubleTreeWriter extends TreeWriterBase {
     long num = fileStatistics.getNumberOfValues();
     return num * JavaDataModel.get().primitive2();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    stream.flush();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java
index c825bf1..e4198a2 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/FloatTreeWriter.java
@@ -88,7 +88,6 @@ public class FloatTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    stream.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -110,4 +109,10 @@ public class FloatTreeWriter extends TreeWriterBase {
     long num = fileStatistics.getNumberOfValues();
     return num * JavaDataModel.get().primitive1();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    stream.flush();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java
index 6036ef5..dc0eaad 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/IntegerTreeWriter.java
@@ -101,7 +101,6 @@ public class IntegerTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    writer.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -124,4 +123,10 @@ public class IntegerTreeWriter extends TreeWriterBase {
     long num = fileStatistics.getNumberOfValues();
     return num * (isLong ? jdm.primitive2() : jdm.primitive1());
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    writer.flush();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java
index 2b937fd..c6068cd 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/ListTreeWriter.java
@@ -123,7 +123,6 @@ public class ListTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    lengths.flush();
     childWriter.writeStripe(builder, stats, requiredIndexEntries);
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
@@ -158,4 +157,11 @@ public class ListTreeWriter extends TreeWriterBase {
     super.writeFileStatistics(footer);
     childWriter.writeFileStatistics(footer);
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    lengths.flush();
+    childWriter.flushStreams();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java
index 26ace05..91e5657 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/MapTreeWriter.java
@@ -132,7 +132,6 @@ public class MapTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    lengths.flush();
     keyWriter.writeStripe(builder, stats, requiredIndexEntries);
     valueWriter.writeStripe(builder, stats, requiredIndexEntries);
     if (rowIndexPosition != null) {
@@ -170,4 +169,12 @@ public class MapTreeWriter extends TreeWriterBase {
     keyWriter.writeFileStatistics(footer);
     valueWriter.writeFileStatistics(footer);
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    lengths.flush();
+    keyWriter.flushStreams();
+    valueWriter.flushStreams();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java
index f49cb7f..be4e6dc 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/StringBaseTreeWriter.java
@@ -98,33 +98,16 @@ public abstract class StringBaseTreeWriter extends 
TreeWriterBase {
   public void writeStripe(OrcProto.StripeFooter.Builder builder,
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
-    // if rows in stripe is less than dictionaryCheckAfterRows, dictionary
-    // checking would not have happened. So do it again here.
-    checkDictionaryEncoding();
 
-    if (useDictionaryEncoding) {
-      flushDictionary();
-    } else {
-      // flushout any left over entries from dictionary
-      if (rows.size() > 0) {
-        flushDictionary();
-      }
-
-      // suppress the stream for every stripe if dictionary is disabled
+    checkDictionaryEncoding();
+    if (!useDictionaryEncoding) {
       stringOutput.suppress();
     }
 
     // we need to build the rowindex before calling super, since it
     // writes it out.
     super.writeStripe(builder, stats, requiredIndexEntries);
-    if (useDictionaryEncoding) {
-      stringOutput.flush();
-      lengthOutput.flush();
-      rowOutput.flush();
-    } else {
-      directStreamOutput.flush();
-      lengthOutput.flush();
-    }
+
     // reset all of the fields to be ready for the next stripe.
     dictionary.clear();
     savedRowIndex.clear();
@@ -285,4 +268,32 @@ public abstract class StringBaseTreeWriter extends 
TreeWriterBase {
       return numVals * JavaDataModel.get().lengthForStringOfLength(avgSize);
     }
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    // if rows in stripe is less than dictionaryCheckAfterRows, dictionary
+    // checking would not have happened. So do it again here.
+    checkDictionaryEncoding();
+
+    if (useDictionaryEncoding) {
+      flushDictionary();
+      stringOutput.flush();
+      lengthOutput.flush();
+      rowOutput.flush();
+    } else {
+      // flushout any left over entries from dictionary
+      if (rows.size() > 0) {
+        flushDictionary();
+      }
+
+      // suppress the stream for every stripe if dictionary is disabled
+      stringOutput.suppress();
+
+      directStreamOutput.flush();
+      lengthOutput.flush();
+    }
+
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java
index 9a1384d..ee0b0c0 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/StructTreeWriter.java
@@ -153,4 +153,13 @@ public class StructTreeWriter extends TreeWriterBase {
       child.writeFileStatistics(footer);
     }
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    for (TreeWriter child : childrenWriters) {
+      child.flushStreams();
+    }
+
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
----------------------------------------------------------------------
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
index 1694ca1..a7bfc90 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
@@ -130,8 +130,6 @@ public class TimestampTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    seconds.flush();
-    nanos.flush();
     if (rowIndexPosition != null) {
       recordPosition(rowIndexPosition);
     }
@@ -171,4 +169,11 @@ public class TimestampTreeWriter extends TreeWriterBase {
     return fileStatistics.getNumberOfValues() *
         JavaDataModel.get().lengthOfTimestamp();
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    seconds.flush();
+    nanos.flush();
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java
index ea4e0e6..b1a6bec 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java
@@ -70,6 +70,12 @@ public interface TreeWriter {
   void createRowIndexEntry() throws IOException;
 
   /**
+   * Flush the TreeWriter stream
+   * @throws IOException
+   */
+  void flushStreams() throws IOException;
+
+  /**
    * Write the stripe out to the file.
    * @param stripeFooter the stripe footer that contains the information about 
the
    *                layout of the stripe. The TreeWriterBase is required to 
update

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java 
b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
index bde4eb9..74ef3cc 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
@@ -229,23 +229,34 @@ public abstract class TreeWriterBase implements 
TreeWriter {
     }
   }
 
-  public void writeStripe(OrcProto.StripeFooter.Builder builder,
-                          OrcProto.StripeStatistics.Builder stats,
-                          int requiredIndexEntries) throws IOException {
+  @Override
+  public void flushStreams() throws IOException {
+
     if (isPresent != null) {
       isPresent.flush();
+    }
 
-      // if no nulls are found in a stream, then suppress the stream
-      if(!foundNulls) {
-        isPresentOutStream.suppress();
-        // since isPresent bitstream is suppressed, update the index to
-        // remove the positions of the isPresent stream
-        if (rowIndex != null) {
-          removeIsPresentPositions();
-        }
+  }
+
+  @Override
+  public void writeStripe(OrcProto.StripeFooter.Builder builder,
+      OrcProto.StripeStatistics.Builder stats, int requiredIndexEntries) 
throws IOException {
+
+    // if no nulls are found in a stream, then suppress the stream
+    if (isPresent != null && !foundNulls) {
+      isPresentOutStream.suppress();
+      // since isPresent bitstream is suppressed, update the index to
+      // remove the positions of the isPresent stream
+      if (rowIndex != null) {
+        removeIsPresentPositions();
       }
+
     }
 
+    /* Update byte count */
+    final long byteCount = streamFactory.getPhysicalWriter().getFileBytes(id);
+    stripeColStatistics.updateByteCount(byteCount);
+
     // merge stripe-level column statistics to file statistics and write it to
     // stripe statistics
     fileStatistics.merge(stripeColStatistics);
@@ -259,8 +270,8 @@ public abstract class TreeWriterBase implements TreeWriter {
     if (rowIndex != null) {
       if (rowIndex.getEntryCount() != requiredIndexEntries) {
         throw new IllegalArgumentException("Column has wrong number of " +
-             "index entries found: " + rowIndex.getEntryCount() + " expected: 
" +
-             requiredIndexEntries);
+            "index entries found: " + rowIndex.getEntryCount() + " expected: " 
+
+            requiredIndexEntries);
       }
       streamFactory.writeIndex(new StreamName(id, 
OrcProto.Stream.Kind.ROW_INDEX), rowIndex);
       rowIndex.clear();
@@ -279,6 +290,7 @@ public abstract class TreeWriterBase implements TreeWriter {
           OrcProto.Stream.Kind.BLOOM_FILTER_UTF8), bloomFilterIndexUtf8);
       bloomFilterIndexUtf8.clear();
     }
+
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java
index 6be2669..54a9a3a 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/UnionTreeWriter.java
@@ -124,7 +124,6 @@ public class UnionTreeWriter extends TreeWriterBase {
                           OrcProto.StripeStatistics.Builder stats,
                           int requiredIndexEntries) throws IOException {
     super.writeStripe(builder, stats, requiredIndexEntries);
-    tags.flush();
     for (TreeWriter child : childrenWriters) {
       child.writeStripe(builder, stats, requiredIndexEntries);
     }
@@ -172,4 +171,13 @@ public class UnionTreeWriter extends TreeWriterBase {
       child.writeFileStatistics(footer);
     }
   }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    tags.flush();
+    for (TreeWriter child : childrenWriters) {
+      child.flushStreams();
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java 
b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
index f11d519..e32c683 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/WriterContext.java
@@ -21,6 +21,7 @@ package org.apache.orc.impl.writer;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
+import org.apache.orc.PhysicalWriter;
 import org.apache.orc.impl.OutStream;
 import org.apache.orc.impl.StreamName;
 
@@ -84,6 +85,14 @@ public interface WriterContext {
      */
     OrcFile.Version getVersion();
 
+    /**
+     * Get the PhysicalWriter.
+     *
+     * @return the file's physical writer.
+     */
+    PhysicalWriter getPhysicalWriter();
+
+
     OrcFile.BloomFilterVersion getBloomFilterVersion();
 
     void writeIndex(StreamName name,

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java 
b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
index 45b69b2..de22301 100644
--- a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
+++ b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
@@ -150,13 +150,13 @@ public class TestOrcNullOptimization {
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 19998 hasNull: true min: 0 max: 0 sum: 0",
+    assertEquals("count: 19998 hasNull: true bytesOnDisk: 184 min: 0 max: 0 
sum: 0",
         stats[1].toString());
 
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals(19998, stats[2].getNumberOfValues());
-    assertEquals("count: 19998 hasNull: true min: a max: a sum: 19998",
+    assertEquals("count: 19998 hasNull: true bytesOnDisk: 200 min: a max: a 
sum: 19998",
         stats[2].toString());
 
     // check the inspectors
@@ -265,13 +265,13 @@ public class TestOrcNullOptimization {
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
     assertEquals(0, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 20000 hasNull: false min: 0 max: 0 sum: 0",
+    assertEquals("count: 20000 hasNull: false bytesOnDisk: 160 min: 0 max: 0 
sum: 0",
         stats[1].toString());
 
     assertEquals("b", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals(20000, stats[2].getNumberOfValues());
-    assertEquals("count: 20000 hasNull: false min: a max: b sum: 20000",
+    assertEquals("count: 20000 hasNull: false bytesOnDisk: 180 min: a max: b 
sum: 20000",
         stats[2].toString());
 
     // check the inspectors
@@ -359,13 +359,13 @@ public class TestOrcNullOptimization {
     assertEquals(2, ((IntegerColumnStatistics) stats[1]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[1]).isSumDefined());
     assertEquals(17, ((IntegerColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 7 hasNull: true min: 2 max: 3 sum: 17",
+    assertEquals("count: 7 hasNull: true bytesOnDisk: 12 min: 2 max: 3 sum: 
17",
         stats[1].toString());
 
     assertEquals("h", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals("a", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals(7, stats[2].getNumberOfValues());
-    assertEquals("count: 7 hasNull: true min: a max: h sum: 7",
+    assertEquals("count: 7 hasNull: true bytesOnDisk: 20 min: a max: h sum: 7",
         stats[2].toString());
 
     // check the inspectors

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java 
b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index f8ed256..fdf20a4 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -450,13 +450,13 @@ public class TestVectorOrcFile {
 
     assertEquals(3, stats[1].getNumberOfValues());
     assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
-    assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
+    assertEquals("count: 3 hasNull: true bytesOnDisk: 28 sum: 15", 
stats[1].toString());
 
     assertEquals(3, stats[2].getNumberOfValues());
     assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
     assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
     assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
-    assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8",
+    assertEquals("count: 3 hasNull: true bytesOnDisk: 22 min: bar max: hi sum: 
8",
         stats[2].toString());
 
     // check the inspectors
@@ -1034,13 +1034,13 @@ public class TestVectorOrcFile {
     assertEquals(2, stats[1].getNumberOfValues());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
     assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
-    assertEquals("count: 2 hasNull: false true: 1", stats[1].toString());
+    assertEquals("count: 2 hasNull: false bytesOnDisk: 5 true: 1", 
stats[1].toString());
 
     assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
     assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
     assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
     assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
-    assertEquals("count: 2 hasNull: false min: 1024 max: 2048 sum: 3072",
+    assertEquals("count: 2 hasNull: false bytesOnDisk: 9 min: 1024 max: 2048 
sum: 3072",
         stats[3].toString());
 
     StripeStatistics ss = reader.getStripeStatistics().get(0);
@@ -1052,10 +1052,10 @@ public class TestVectorOrcFile {
     assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum(), 
0.0001);
     assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum(), 
0.0001);
     assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
-    assertEquals("count: 2 hasNull: false min: -15.0 max: -5.0 sum: -20.0",
+    assertEquals("count: 2 hasNull: false bytesOnDisk: 15 min: -15.0 max: -5.0 
sum: -20.0",
         stats[7].toString());
 
-    assertEquals("count: 2 hasNull: false min: bye max: hi sum: 5", 
stats[9].toString());
+    assertEquals("count: 2 hasNull: false bytesOnDisk: 14 min: bye max: hi 
sum: 5", stats[9].toString());
 
     // check the schema
     TypeDescription readerSchema = reader.getSchema();

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java 
b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index 4ea9463..e5f3b94 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -295,6 +295,9 @@ public class JsonFileDump {
     if (cs != null) {
       writer.key("count").value(cs.getNumberOfValues());
       writer.key("hasNull").value(cs.hasNull());
+      if (cs.getBytesOnDisk() != 0) {
+        writer.key("bytesOnDisk").value(cs.getBytesOnDisk());
+      }
       if (cs instanceof BinaryColumnStatistics) {
         writer.key("totalLength").value(((BinaryColumnStatistics) 
cs).getSum());
         writer.key("type").value(OrcProto.Type.Kind.BINARY);

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
----------------------------------------------------------------------
diff --git a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java 
b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
index 9e21fad..bfb073c 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
@@ -19,7 +19,6 @@
 package org.apache.orc.tools;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
 import static org.junit.Assume.assumeTrue;
 
 import java.io.BufferedReader;
@@ -29,7 +28,6 @@ import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
-import java.sql.Date;
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
@@ -37,12 +35,10 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
-import java.util.TimeZone;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
@@ -294,6 +290,7 @@ public class TestFileDump {
         m,
         Arrays.asList(100, 200),
         10, "foo");
+
     m.clear();
     m.put("k3", "v3");
     appendAllTypes(

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out 
b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index 5775500..2a20a71 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string>
 Stripe Statistics:
   Stripe 1:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 
515792826
-    Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 
9221614132680747961
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146021688 
max: 2147223299 sum: 515792826
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9218592812243954469 max: 9221614132680747961
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3701 min: Darkness, max: 
worst sum: 19280
   Stripe 2:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 
7673427
-    Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 
9222259462014003839
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146733128 
max: 2147001622 sum: 7673427
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9220818777591257749 max: 9222259462014003839
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3690 min: Darkness, max: 
worst sum: 19504
   Stripe 3:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 
132660742551
-    Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 
9222303228623055266
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146993718 
max: 2147378179 sum: 132660742551
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9218342074710552826 max: 9222303228623055266
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3702 min: Darkness, max: 
worst sum: 19641
   Stripe 4:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 
8533549236
-    Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 
9221043130193737406
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146658006 
max: 2145520931 sum: 8533549236
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9222758097219661129 max: 9221043130193737406
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3732 min: Darkness, max: 
worst sum: 19470
   Stripe 5:
     Column 0: count: 1000 hasNull: false
-    Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 
51299706363
-    Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 
9218567213558056476
-    Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
+    Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2146245500 
max: 2146378640 sum: 51299706363
+    Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: 
-9208193203370316142 max: 9218567213558056476
+    Column 3: count: 1000 hasNull: false bytesOnDisk: 926 min: Darkness, max: 
worst sum: 3866
 
 File Statistics:
   Column 0: count: 21000 hasNull: false
-  Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 
193017464403
-  Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 
9222303228623055266
-  Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
+  Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2146993718 
max: 2147378179 sum: 193017464403
+  Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: 
-9222758097219661129 max: 9222303228623055266
+  Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: 
worst sum: 81761
 
 Stripes:
   Stripe: offset: 3 data: 63786 rows: 5000 tail: 87 index: 749
@@ -172,7 +172,7 @@ Stripes:
       Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 
loadFactor: 0.022 expectedFpp: 2.343647E-7
 
-File length: 272452 bytes
+File length: 272503 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 
________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out 
b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
index 8afddae..c4fa8bf 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string>
 Stripe Statistics:
   Stripe 1:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 
515792826
-    Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 
9221614132680747961
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146021688 
max: 2147223299 sum: 515792826
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9218592812243954469 max: 9221614132680747961
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3701 min: Darkness, max: 
worst sum: 19280
   Stripe 2:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 
7673427
-    Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 
9222259462014003839
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146733128 
max: 2147001622 sum: 7673427
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9220818777591257749 max: 9222259462014003839
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3690 min: Darkness, max: 
worst sum: 19504
   Stripe 3:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 
132660742551
-    Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 
9222303228623055266
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146993718 
max: 2147378179 sum: 132660742551
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9218342074710552826 max: 9222303228623055266
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3702 min: Darkness, max: 
worst sum: 19641
   Stripe 4:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 
8533549236
-    Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 
9221043130193737406
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146658006 
max: 2145520931 sum: 8533549236
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9222758097219661129 max: 9221043130193737406
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3732 min: Darkness, max: 
worst sum: 19470
   Stripe 5:
     Column 0: count: 1000 hasNull: false
-    Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 
51299706363
-    Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 
9218567213558056476
-    Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
+    Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2146245500 
max: 2146378640 sum: 51299706363
+    Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: 
-9208193203370316142 max: 9218567213558056476
+    Column 3: count: 1000 hasNull: false bytesOnDisk: 926 min: Darkness, max: 
worst sum: 3866
 
 File Statistics:
   Column 0: count: 21000 hasNull: false
-  Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 
193017464403
-  Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 
9222303228623055266
-  Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
+  Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2146993718 
max: 2147378179 sum: 193017464403
+  Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: 
-9222758097219661129 max: 9222303228623055266
+  Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: 
worst sum: 81761
 
 Stripes:
   Stripe: offset: 3 data: 63786 rows: 5000 tail: 108 index: 14949
@@ -187,7 +187,7 @@ Stripes:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 
0.5154 expectedFpp: 0.00966294
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 
loadFactor: 0.5154 expectedFpp: 0.00966294
 
-File length: 332513 bytes
+File length: 332564 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 
________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
----------------------------------------------------------------------
diff --git 
a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out 
b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
index 5989250..9b9dbef 100644
--- a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string>
 Stripe Statistics:
   Stripe 1:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2147115959 max: 2145911404 sum: 
159677169195
-    Column 2: count: 5000 hasNull: false min: -9216505819108477308 max: 
9217851628057711416
-    Column 3: count: 5000 hasNull: false min: Darkness,-230 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744
 sum: 381254
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2147115959 
max: 2145911404 sum: 159677169195
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9216505819108477308 max: 9217851628057711416
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 103500 min: 
Darkness,-230 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744
 sum: 381254
   Stripe 2:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2147390285 max: 2147224606 sum: 
-14961457759
-    Column 2: count: 5000 hasNull: false min: -9222178666167296739 max: 
9221301751385928177
-    Column 3: count: 5000 hasNull: false min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938
 sum: 1117994
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2147390285 
max: 2147224606 sum: -14961457759
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9222178666167296739 max: 9221301751385928177
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 308247 min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938
 sum: 1117994
   Stripe 3:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2145842720 max: 2146718321 sum: 
141092475520
-    Column 2: count: 5000 hasNull: false min: -9221963099397084326 max: 
9222722740629726770
-    Column 3: count: 5000 hasNull: false min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974
 sum: 1925226
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2145842720 
max: 2146718321 sum: 141092475520
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9221963099397084326 max: 9222722740629726770
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 545986 min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974
 sum: 1925226
   Stripe 4:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2145378214 max: 2147453086 sum: 
-153680004530
-    Column 2: count: 5000 hasNull: false min: -9222731174895935707 max: 
9222919052987871506
-    Column 3: count: 5000 hasNull: false min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-
 
11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904
 sum: 2815002
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2145378214 
max: 2147453086 sum: -153680004530
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9222731174895935707 max: 9222919052987871506
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 803877 min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-1115
 
8-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904
 sum: 2815002
   Stripe 5:
     Column 0: count: 1000 hasNull: false
-    Column 1: count: 1000 hasNull: false min: -2143595397 max: 2136858458 sum: 
-22999664100
-    Column 2: count: 1000 hasNull: false min: -9212379634781416464 max: 
9197412874152820822
-    Column 3: count: 1000 hasNull: false min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7
 
798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
 sum: 670762
+    Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2143595397 
max: 2136858458 sum: -22999664100
+    Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: 
-9212379634781416464 max: 9197412874152820822
+    Column 3: count: 1000 hasNull: false bytesOnDisk: 195265 min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7
 
318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
 sum: 670762
 
 File Statistics:
   Column 0: count: 21000 hasNull: false
-  Column 1: count: 21000 hasNull: false min: -2147390285 max: 2147453086 sum: 
109128518326
-  Column 2: count: 21000 hasNull: false min: -9222731174895935707 max: 
9222919052987871506
-  Column 3: count: 21000 hasNull: false min: Darkness,-230 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
 sum: 6910238
+  Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2147390285 
max: 2147453086 sum: 109128518326
+  Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: 
-9222731174895935707 max: 9222919052987871506
+  Column 3: count: 21000 hasNull: false bytesOnDisk: 1956875 min: 
Darkness,-230 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
 sum: 6910238
 
 Stripes:
   Stripe: offset: 3 data: 163585 rows: 5000 tail: 68 index: 720
@@ -183,7 +183,7 @@ Stripes:
     Row group indices for column 3:
       Entry 0: count: 1000 hasNull: false min: 
Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084
 max: 
worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-
 
7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
 sum: 670762 positions: 0,0,0,0,0
 
-File length: 2217614 bytes
+File length: 2217710 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 
________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump.json 
b/java/tools/src/test/resources/orc-file-dump.json
index 81c96df..72476dd 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -47,6 +47,7 @@
           "columnId": 1,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 20035,
           "min": -2147115959,
           "max": 2145210552,
           "sum": 50111854553,
@@ -56,6 +57,7 @@
           "columnId": 2,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 40050,
           "min": -9223180583305557329,
           "max": 9221614132680747961,
           "type": "LONG"
@@ -64,6 +66,7 @@
           "columnId": 3,
           "count": 4950,
           "hasNull": true,
+          "bytesOnDisk": 3685,
           "min": "Darkness,",
           "max": "worst",
           "totalLength": 19283,
@@ -83,6 +86,7 @@
           "columnId": 1,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 20035,
           "min": -2147390285,
           "max": 2147224606,
           "sum": -22290798217,
@@ -92,6 +96,7 @@
           "columnId": 2,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 40050,
           "min": -9219295160509160427,
           "max": 9217571024994660020,
           "type": "LONG"
@@ -100,6 +105,7 @@
           "columnId": 3,
           "count": 4950,
           "hasNull": true,
+          "bytesOnDisk": 3678,
           "min": "Darkness,",
           "max": "worst",
           "totalLength": 19397,
@@ -119,6 +125,7 @@
           "columnId": 1,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 20035,
           "min": -2146954065,
           "max": 2146722468,
           "sum": 20639652136,
@@ -128,6 +135,7 @@
           "columnId": 2,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 40050,
           "min": -9214076359988107846,
           "max": 9222919052987871506,
           "type": "LONG"
@@ -136,6 +144,7 @@
           "columnId": 3,
           "count": 4950,
           "hasNull": true,
+          "bytesOnDisk": 3685,
           "min": "Darkness,",
           "max": "worst",
           "totalLength": 19031,
@@ -155,6 +164,7 @@
           "columnId": 1,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 20035,
           "min": -2146969085,
           "max": 2146025044,
           "sum": -5156814387,
@@ -164,6 +174,7 @@
           "columnId": 2,
           "count": 5000,
           "hasNull": false,
+          "bytesOnDisk": 40050,
           "min": -9222731174895935707,
           "max": 9220625004936875965,
           "type": "LONG"
@@ -172,6 +183,7 @@
           "columnId": 3,
           "count": 4950,
           "hasNull": true,
+          "bytesOnDisk": 3671,
           "min": "Darkness,",
           "max": "worst",
           "totalLength": 19459,
@@ -191,6 +203,7 @@
           "columnId": 1,
           "count": 1000,
           "hasNull": false,
+          "bytesOnDisk": 4007,
           "min": -2144303438,
           "max": 2127599049,
           "sum": 62841564778,
@@ -200,6 +213,7 @@
           "columnId": 2,
           "count": 1000,
           "hasNull": false,
+          "bytesOnDisk": 8010,
           "min": -9195133638801798919,
           "max": 9218626063131504414,
           "type": "LONG"
@@ -208,6 +222,7 @@
           "columnId": 3,
           "count": 990,
           "hasNull": true,
+          "bytesOnDisk": 926,
           "min": "Darkness,",
           "max": "worst",
           "totalLength": 3963,
@@ -226,6 +241,7 @@
       "columnId": 1,
       "count": 21000,
       "hasNull": false,
+      "bytesOnDisk": 84147,
       "min": -2147390285,
       "max": 2147224606,
       "sum": 106145458863,
@@ -235,6 +251,7 @@
       "columnId": 2,
       "count": 21000,
       "hasNull": false,
+      "bytesOnDisk": 168210,
       "min": -9223180583305557329,
       "max": 9222919052987871506,
       "type": "LONG"
@@ -243,6 +260,7 @@
       "columnId": 3,
       "count": 20790,
       "hasNull": true,
+      "bytesOnDisk": 15645,
       "min": "Darkness,",
       "max": "worst",
       "totalLength": 81133,
@@ -1348,7 +1366,7 @@
       }]
     }
   ],
-  "fileLength": 272436,
+  "fileLength": 272486,
   "paddingLength": 0,
   "paddingRatio": 0,
   "status": "OK"

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-dump.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump.out 
b/java/tools/src/test/resources/orc-file-dump.out
index f03a6f2..2ae99ce 100644
--- a/java/tools/src/test/resources/orc-file-dump.out
+++ b/java/tools/src/test/resources/orc-file-dump.out
@@ -8,35 +8,35 @@ Type: struct<i:int,l:bigint,s:string>
 Stripe Statistics:
   Stripe 1:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 
515792826
-    Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 
9221614132680747961
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146021688 
max: 2147223299 sum: 515792826
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9218592812243954469 max: 9221614132680747961
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3701 min: Darkness, max: 
worst sum: 19280
   Stripe 2:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 
7673427
-    Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 
9222259462014003839
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146733128 
max: 2147001622 sum: 7673427
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9220818777591257749 max: 9222259462014003839
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3690 min: Darkness, max: 
worst sum: 19504
   Stripe 3:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 
132660742551
-    Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 
9222303228623055266
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146993718 
max: 2147378179 sum: 132660742551
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9218342074710552826 max: 9222303228623055266
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3702 min: Darkness, max: 
worst sum: 19641
   Stripe 4:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 
8533549236
-    Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 
9221043130193737406
-    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 20035 min: -2146658006 
max: 2145520931 sum: 8533549236
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 40050 min: 
-9222758097219661129 max: 9221043130193737406
+    Column 3: count: 5000 hasNull: false bytesOnDisk: 3732 min: Darkness, max: 
worst sum: 19470
   Stripe 5:
     Column 0: count: 1000 hasNull: false
-    Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 
51299706363
-    Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 
9218567213558056476
-    Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
+    Column 1: count: 1000 hasNull: false bytesOnDisk: 4007 min: -2146245500 
max: 2146378640 sum: 51299706363
+    Column 2: count: 1000 hasNull: false bytesOnDisk: 8010 min: 
-9208193203370316142 max: 9218567213558056476
+    Column 3: count: 1000 hasNull: false bytesOnDisk: 926 min: Darkness, max: 
worst sum: 3866
 
 File Statistics:
   Column 0: count: 21000 hasNull: false
-  Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 
193017464403
-  Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 
9222303228623055266
-  Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
+  Column 1: count: 21000 hasNull: false bytesOnDisk: 84147 min: -2146993718 
max: 2147378179 sum: 193017464403
+  Column 2: count: 21000 hasNull: false bytesOnDisk: 168210 min: 
-9222758097219661129 max: 9222303228623055266
+  Column 3: count: 21000 hasNull: false bytesOnDisk: 15751 min: Darkness, max: 
worst sum: 81761
 
 Stripes:
   Stripe: offset: 3 data: 63786 rows: 5000 tail: 79 index: 439
@@ -188,7 +188,7 @@ Stripes:
     Row group indices for column 3:
       Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 
positions: 0,0,0
 
-File length: 270996 bytes
+File length: 271047 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/java/tools/src/test/resources/orc-file-has-null.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-has-null.out 
b/java/tools/src/test/resources/orc-file-has-null.out
index d7e78f7..ed963dd 100644
--- a/java/tools/src/test/resources/orc-file-has-null.out
+++ b/java/tools/src/test/resources/orc-file-has-null.out
@@ -8,25 +8,25 @@ Type: struct<bytes1:binary,string1:string>
 Stripe Statistics:
   Stripe 1:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false sum: 15000
-    Column 2: count: 2000 hasNull: true min: RG1 max: RG3 sum: 6000
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000
+    Column 2: count: 2000 hasNull: true bytesOnDisk: 46 min: RG1 max: RG3 sum: 
6000
   Stripe 2:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false sum: 15000
-    Column 2: count: 0 hasNull: true
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000
+    Column 2: count: 0 hasNull: true bytesOnDisk: 11
   Stripe 3:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false sum: 15000
-    Column 2: count: 5000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 40000
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000
+    Column 2: count: 5000 hasNull: false bytesOnDisk: 32 min: STRIPE-3 max: 
STRIPE-3 sum: 40000
   Stripe 4:
     Column 0: count: 5000 hasNull: false
-    Column 1: count: 5000 hasNull: false sum: 15000
-    Column 2: count: 0 hasNull: true
+    Column 1: count: 5000 hasNull: false bytesOnDisk: 174 sum: 15000
+    Column 2: count: 0 hasNull: true bytesOnDisk: 11
 
 File Statistics:
   Column 0: count: 20000 hasNull: false
-  Column 1: count: 20000 hasNull: false sum: 60000
-  Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000
+  Column 1: count: 20000 hasNull: false bytesOnDisk: 696 sum: 60000
+  Column 2: count: 7000 hasNull: true bytesOnDisk: 100 min: RG1 max: STRIPE-3 
sum: 46000
 
 Stripes:
   Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154
@@ -105,7 +105,7 @@ Stripes:
       Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
       Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
 
-File length: 1825 bytes
+File length: 1842 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 
________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/896dffc3/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index e6e797f..f92e531 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -81,6 +81,7 @@ message ColumnStatistics {
   optional BinaryStatistics binaryStatistics = 8;
   optional TimestampStatistics timestampStatistics = 9;
   optional bool hasNull = 10;
+  optional uint64 bytesOnDisk = 11;
 }
 
 message RowIndexEntry {

Reply via email to