[07/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley reviewed by prasanthj)

omalley Fri, 13 May 2016 12:51:06 -0700

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java 
b/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java
new file mode 100644
index 0000000..4a8a0f2
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java
@@ -0,0 +1,201 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.junit.Test;
+
+import com.google.common.math.LongMath;
+
+public class TestSerializationUtils {
+
+  private InputStream fromBuffer(ByteArrayOutputStream buffer) {
+    return new ByteArrayInputStream(buffer.toByteArray());
+  }
+
+  @Test
+  public void testDoubles() throws Exception {
+    double tolerance = 0.0000000000000001;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    utils.writeDouble(buffer, 1343822337.759);
+    assertEquals(1343822337.759, utils.readDouble(fromBuffer(buffer)), 
tolerance);
+    buffer = new ByteArrayOutputStream();
+    utils.writeDouble(buffer, 0.8);
+    double got = utils.readDouble(fromBuffer(buffer));
+    assertEquals(0.8, got, tolerance);
+  }
+
+  @Test
+  public void testBigIntegers() throws Exception {
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0));
+    assertArrayEquals(new byte[]{0}, buffer.toByteArray());
+    assertEquals(0L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1));
+    assertArrayEquals(new byte[]{2}, buffer.toByteArray());
+    assertEquals(1L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1));
+    assertArrayEquals(new byte[]{1}, buffer.toByteArray());
+    assertEquals(-1L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50));
+    assertArrayEquals(new byte[]{100}, buffer.toByteArray());
+    assertEquals(50L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50));
+    assertArrayEquals(new byte[]{99}, buffer.toByteArray());
+    assertEquals(-50L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    for(int i=-8192; i < 8192; ++i) {
+      buffer.reset();
+        SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i));
+      assertEquals("compare length for " + i,
+            i >= -64 && i < 64 ? 1 : 2, buffer.size());
+      assertEquals("compare result for " + i,
+          i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue());
+    }
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("123456789abcdef0",16));
+    assertEquals(new BigInteger("123456789abcdef0",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("-123456789abcdef0",16));
+    assertEquals(new BigInteger("-123456789abcdef0",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    StringBuilder buf = new StringBuilder();
+    for(int i=0; i < 256; ++i) {
+      String num = Integer.toHexString(i);
+      if (num.length() == 1) {
+        buf.append('0');
+      }
+      buf.append(num);
+    }
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger(buf.toString(),16));
+    assertEquals(new BigInteger(buf.toString(),16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("ff000000000000000000000000000000000000000000ff",16));
+    assertEquals(
+        new BigInteger("ff000000000000000000000000000000000000000000ff",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+  }
+
+  @Test
+  public void testSubtractionOverflow() {
+    // cross check results with Guava results below
+    SerializationUtils utils = new SerializationUtils();
+    assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE));
+    assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE));
+    assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE));
+    assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 
6553103058346370095L));
+    assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE));
+    assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0));
+  }
+
+  @Test
+  public void testSubtractionOverflowGuava() {
+    try {
+      LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    try {
+      LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    try {
+      LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    assertEquals(-8106206116692740190L,
+        LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L));
+    assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE));
+    assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0));
+  }
+
+  @Test
+  public void testRandomFloats() throws Exception {
+    float tolerance = 0.0000000000000001f;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    Random rand = new Random();
+    int n = 100_000;
+    float[] expected = new float[n];
+    for (int i = 0; i < n; i++) {
+      float f = rand.nextFloat();
+      expected[i] = f;
+      utils.writeFloat(buffer, f);
+    }
+    InputStream newBuffer = fromBuffer(buffer);
+    for (int i = 0; i < n; i++) {
+      float got = utils.readFloat(newBuffer);
+      assertEquals(expected[i], got, tolerance);
+    }
+  }
+
+  @Test
+  public void testRandomDoubles() throws Exception {
+    double tolerance = 0.0000000000000001;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    Random rand = new Random();
+    int n = 100_000;
+    double[] expected = new double[n];
+    for (int i = 0; i < n; i++) {
+      double d = rand.nextDouble();
+      expected[i] = d;
+      utils.writeDouble(buffer, d);
+    }
+    InputStream newBuffer = fromBuffer(buffer);
+    for (int i = 0; i < n; i++) {
+      double got = utils.readDouble(newBuffer);
+      assertEquals(expected[i], got, tolerance);
+    }
+  }
+}


http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestStreamName.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestStreamName.java 
b/java/core/src/test/org/apache/orc/impl/TestStreamName.java
new file mode 100644
index 0000000..be58d4c
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestStreamName.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.apache.orc.OrcProto;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestStreamName {
+
+  @Test
+  public void test1() throws Exception {
+    StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    StreamName s2 = new StreamName(3,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
+    StreamName s4 = new StreamName(5,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    assertEquals(true, s1.equals(s1));
+    assertEquals(false, s1.equals(s2));
+    assertEquals(false, s1.equals(s3));
+    assertEquals(true, s1.equals(s1p));
+    assertEquals(true, s1.compareTo(null) < 0);
+    assertEquals(false, s1.equals(null));
+    assertEquals(true, s1.compareTo(s2) < 0);
+    assertEquals(true, s2.compareTo(s3) < 0);
+    assertEquals(true, s3.compareTo(s4) < 0);
+    assertEquals(true, s4.compareTo(s1p) > 0);
+    assertEquals(0, s1p.compareTo(s1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java 
b/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java
new file mode 100644
index 0000000..3d4612c
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.orc.impl.RedBlackTree;
+import org.apache.orc.impl.StringRedBlackTree;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Test the red-black tree with string keys.
+ */
+public class TestStringRedBlackTree {
+
+  /**
+   * Checks the red-black tree rules to make sure that we have correctly built
+   * a valid tree.
+   *
+   * Properties:
+   *   1. Red nodes must have black children
+   *   2. Each node must have the same black height on both sides.
+   *
+   * @param node The id of the root of the subtree to check for the red-black
+   *        tree properties.
+   * @return The black-height of the subtree.
+   */
+  private int checkSubtree(RedBlackTree tree, int node, IntWritable count
+                          ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      return 1;
+    }
+    count.set(count.get() + 1);
+    boolean is_red = tree.isRed(node);
+    int left = tree.getLeft(node);
+    int right = tree.getRight(node);
+    if (is_red) {
+      if (tree.isRed(left)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Left node of " + node + " is " + left 
+
+          " and both are red.");
+      }
+      if (tree.isRed(right)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Right node of " + node + " is " +
+          right + " and both are red.");
+      }
+    }
+    int left_depth = checkSubtree(tree, left, count);
+    int right_depth = checkSubtree(tree, right, count);
+    if (left_depth != right_depth) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Lopsided tree at node " + node +
+        " with depths " + left_depth + " and " + right_depth);
+    }
+    if (is_red) {
+      return left_depth;
+    } else {
+      return left_depth + 1;
+    }
+  }
+
+  /**
+   * Checks the validity of the entire tree. Also ensures that the number of
+   * nodes visited is the same as the size of the set.
+   */
+  void checkTree(RedBlackTree tree) throws IOException {
+    IntWritable count = new IntWritable(0);
+    if (tree.isRed(tree.root)) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("root is red");
+    }
+    checkSubtree(tree, tree.root, count);
+    if (count.get() != tree.size) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Broken tree! visited= " + count.get() +
+        " size=" + tree.size);
+    }
+  }
+
+  void printTree(RedBlackTree tree, String indent, int node
+                ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      System.err.println(indent + "NULL");
+    } else {
+      System.err.println(indent + "Node " + node + " color " +
+        (tree.isRed(node) ? "red" : "black"));
+      printTree(tree, indent + "  ", tree.getLeft(node));
+      printTree(tree, indent + "  ", tree.getRight(node));
+    }
+  }
+
+  private static class MyVisitor implements StringRedBlackTree.Visitor {
+    private final String[] words;
+    private final int[] order;
+    private final DataOutputBuffer buffer = new DataOutputBuffer();
+    int current = 0;
+
+    MyVisitor(String[] args, int[] order) {
+      words = args;
+      this.order = order;
+    }
+
+    @Override
+    public void visit(StringRedBlackTree.VisitorContext context
+                     ) throws IOException {
+      String word = context.getText().toString();
+      assertEquals("in word " + current, words[current], word);
+      assertEquals("in word " + current, order[current],
+        context.getOriginalPosition());
+      buffer.reset();
+      context.writeBytes(buffer);
+      assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
+      current += 1;
+    }
+  }
+
+  void checkContents(StringRedBlackTree tree, int[] order,
+                     String... params
+                    ) throws IOException {
+    tree.visit(new MyVisitor(params, order));
+  }
+
+  StringRedBlackTree buildTree(String... params) throws IOException {
+    StringRedBlackTree result = new StringRedBlackTree(1000);
+    for(String word: params) {
+      result.add(word);
+      checkTree(result);
+    }
+    return result;
+  }
+
+  @Test
+  public void test1() throws Exception {
+    StringRedBlackTree tree = new StringRedBlackTree(5);
+    assertEquals(0, tree.getSizeInBytes());
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(3, tree.add("greg"));
+    checkTree(tree);
+    assertEquals(4, tree.add("eric"));
+    checkTree(tree);
+    assertEquals(5, tree.add("arun"));
+    checkTree(tree);
+    assertEquals(6, tree.size());
+    checkTree(tree);
+    assertEquals(6, tree.add("eric14"));
+    checkTree(tree);
+    assertEquals(7, tree.add("o"));
+    checkTree(tree);
+    assertEquals(8, tree.add("ziggy"));
+    checkTree(tree);
+    assertEquals(9, tree.add("z"));
+    checkTree(tree);
+    checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8},
+      "alan", "arun", "ashutosh", "eric", "eric14", "greg",
+      "o", "owen", "z", "ziggy");
+    assertEquals(32888, tree.getSizeInBytes());
+    // check that adding greg again bumps the count
+    assertEquals(3, tree.add("greg"));
+    assertEquals(41, tree.getCharacterSize());
+    // add some more strings to test the different branches of the
+    // rebalancing
+    assertEquals(10, tree.add("zak"));
+    checkTree(tree);
+    assertEquals(11, tree.add("eric1"));
+    checkTree(tree);
+    assertEquals(12, tree.add("ash"));
+    checkTree(tree);
+    assertEquals(13, tree.add("harry"));
+    checkTree(tree);
+    assertEquals(14, tree.add("john"));
+    checkTree(tree);
+    tree.clear();
+    checkTree(tree);
+    assertEquals(0, tree.getSizeInBytes());
+    assertEquals(0, tree.getCharacterSize());
+  }
+
+  @Test
+  public void test2() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+        "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
+      15,16,17, 18,19,20, 21,22,23, 24,25},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+
+  @Test
+  public void test3() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", 
"n",
+        "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
+      13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", 
"o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestZlib.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestZlib.java 
b/java/core/src/test/org/apache/orc/impl/TestZlib.java
new file mode 100644
index 0000000..327ecfc
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestZlib.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.apache.orc.CompressionCodec;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+public class TestZlib {
+
+  @Test
+  public void testNoOverflow() throws Exception {
+    ByteBuffer in = ByteBuffer.allocate(10);
+    ByteBuffer out = ByteBuffer.allocate(10);
+    in.put(new byte[]{1,2,3,4,5,6,7,10});
+    in.flip();
+    CompressionCodec codec = new ZlibCodec();
+    assertEquals(false, codec.compress(in, out, null));
+  }
+
+  @Test
+  public void testCorrupt() throws Exception {
+    ByteBuffer buf = ByteBuffer.allocate(1000);
+    buf.put(new byte[]{127,-128,0,99,98,-1});
+    buf.flip();
+    CompressionCodec codec = new ZlibCodec();
+    ByteBuffer out = ByteBuffer.allocate(1000);
+    try {
+      codec.decompress(buf, out);
+      fail();
+    } catch (IOException ioe) {
+      // EXPECTED
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/tools/TestFileDump.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/tools/TestFileDump.java 
b/java/core/src/test/org/apache/orc/tools/TestFileDump.java
new file mode 100644
index 0000000..ce3381e
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/tools/TestFileDump.java
@@ -0,0 +1,486 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFileDump {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestFileDump.testDump.orc");
+    fs.delete(testFilePath, false);
+  }
+
+  static TypeDescription getMyRecordType() {
+    return TypeDescription.createStruct()
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("s", TypeDescription.createString());
+  }
+
+  static void appendMyRecord(VectorizedRowBatch batch,
+                             int i,
+                             long l,
+                             String str) {
+    ((LongColumnVector) batch.cols[0]).vector[batch.size] = i;
+    ((LongColumnVector) batch.cols[1]).vector[batch.size] = l;
+    if (str == null) {
+      batch.cols[2].noNulls = false;
+      batch.cols[2].isNull[batch.size] = true;
+    } else {
+      ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+          str.getBytes());
+    }
+    batch.size += 1;
+  }
+
+  static TypeDescription getAllTypesType() {
+    return TypeDescription.createStruct()
+        .addField("b", TypeDescription.createBoolean())
+        .addField("bt", TypeDescription.createByte())
+        .addField("s", TypeDescription.createShort())
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("f", TypeDescription.createFloat())
+        .addField("d", TypeDescription.createDouble())
+        .addField("de", TypeDescription.createDecimal())
+        .addField("t", TypeDescription.createTimestamp())
+        .addField("dt", TypeDescription.createDate())
+        .addField("str", TypeDescription.createString())
+        .addField("c", TypeDescription.createChar().withMaxLength(5))
+        .addField("vc", TypeDescription.createVarchar().withMaxLength(10))
+        .addField("m", TypeDescription.createMap(
+            TypeDescription.createString(),
+            TypeDescription.createString()))
+        .addField("a", TypeDescription.createList(TypeDescription.createInt()))
+        .addField("st", TypeDescription.createStruct()
+                .addField("i", TypeDescription.createInt())
+                .addField("s", TypeDescription.createString()));
+  }
+
+  static void appendAllTypes(VectorizedRowBatch batch,
+                             boolean b,
+                             byte bt,
+                             short s,
+                             int i,
+                             long l,
+                             float f,
+                             double d,
+                             HiveDecimalWritable de,
+                             Timestamp t,
+                             DateWritable dt,
+                             String str,
+                             String c,
+                             String vc,
+                             Map<String, String> m,
+                             List<Integer> a,
+                             int sti,
+                             String sts) {
+    int row = batch.size++;
+    ((LongColumnVector) batch.cols[0]).vector[row] = b ? 1 : 0;
+    ((LongColumnVector) batch.cols[1]).vector[row] = bt;
+    ((LongColumnVector) batch.cols[2]).vector[row] = s;
+    ((LongColumnVector) batch.cols[3]).vector[row] = i;
+    ((LongColumnVector) batch.cols[4]).vector[row] = l;
+    ((DoubleColumnVector) batch.cols[5]).vector[row] = f;
+    ((DoubleColumnVector) batch.cols[6]).vector[row] = d;
+    ((DecimalColumnVector) batch.cols[7]).vector[row].set(de);
+    ((TimestampColumnVector) batch.cols[8]).set(row, t);
+    ((LongColumnVector) batch.cols[9]).vector[row] = dt.getDays();
+    ((BytesColumnVector) batch.cols[10]).setVal(row, str.getBytes());
+    ((BytesColumnVector) batch.cols[11]).setVal(row, c.getBytes());
+    ((BytesColumnVector) batch.cols[12]).setVal(row, vc.getBytes());
+    MapColumnVector map = (MapColumnVector) batch.cols[13];
+    int offset = map.childCount;
+    map.offsets[row] = offset;
+    map.lengths[row] = m.size();
+    map.childCount += map.lengths[row];
+    for(Map.Entry<String, String> entry: m.entrySet()) {
+      ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes());
+      ((BytesColumnVector) map.values).setVal(offset++,
+          entry.getValue().getBytes());
+    }
+    ListColumnVector list = (ListColumnVector) batch.cols[14];
+    offset = list.childCount;
+    list.offsets[row] = offset;
+    list.lengths[row] = a.size();
+    list.childCount += list.lengths[row];
+    for(int e=0; e < a.size(); ++e) {
+      ((LongColumnVector) list.child).vector[offset + e] = a.get(e);
+    }
+    StructColumnVector struct = (StructColumnVector) batch.cols[15];
+    ((LongColumnVector) struct.fields[0]).vector[row] = sti;
+    ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes());
+  }
+
+  public static void checkOutput(String expected,
+                                 String actual) throws Exception {
+    BufferedReader eStream =
+        new BufferedReader(new FileReader
+            (TestJsonFileDump.getFileFromClasspath(expected)));
+    BufferedReader aStream =
+        new BufferedReader(new FileReader(actual));
+    String expectedLine = eStream.readLine().trim();
+    while (expectedLine != null) {
+      String actualLine = aStream.readLine().trim();
+      System.out.println("actual:   " + actualLine);
+      System.out.println("expected: " + expectedLine);
+      Assert.assertEquals(expectedLine, actualLine);
+      expectedLine = eStream.readLine();
+      expectedLine = expectedLine == null ? null : expectedLine.trim();
+    }
+    Assert.assertNull(eStream.readLine());
+    Assert.assertNull(aStream.readLine());
+    eStream.close();
+    aStream.close();
+  }
+
+  @Test
+  public void testDump() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .compress(CompressionKind.ZLIB)
+            .stripeSize(100000)
+            .rowIndexStride(1000));
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", 
"despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + 
outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testDataDump() throws Exception {
+    TypeDescription schema = getAllTypesType();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .rowIndexStride(1000));
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Map<String, String> m = new HashMap<String, String>(2);
+    m.put("k1", "v1");
+    appendAllTypes(batch,
+        true,
+        (byte) 10,
+        (short) 100,
+        1000,
+        10000L,
+        4.0f,
+        20.0,
+        new HiveDecimalWritable("4.2222"),
+        new Timestamp(1416967764000L),
+        new DateWritable(new Date(1416967764000L)),
+        "string",
+        "hello",
+       "hello",
+        m,
+        Arrays.asList(100, 200),
+        10, "foo");
+    m.clear();
+    m.put("k3", "v3");
+    appendAllTypes(
+        batch,
+        false,
+        (byte)20,
+        (short)200,
+        2000,
+        20000L,
+        8.0f,
+        40.0,
+        new HiveDecimalWritable("2.2222"),
+        new Timestamp(1416967364000L),
+        new DateWritable(new Date(1411967764000L)),
+        "abcd",
+        "world",
+        "world",
+        m,
+        Arrays.asList(200, 300),
+        20, "bar");
+    writer.addRowBatch(batch);
+
+    writer.close();
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "-d"});
+    System.out.flush();
+    System.setOut(origOut);
+    String[] lines = myOut.toString().split("\n");
+    
Assert.assertEquals("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25
 
18:09:24.0\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello\",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}",
 lines[0]);
+    
Assert.assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25
 
18:02:44.0\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world\",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}",
 lines[1]);
+  }
+  
+  // Test that if the fraction of rows that have distinct strings is greater 
than the configured
+  // threshold dictionary encoding is turned off.  If dictionary encoding is 
turned off the length
+  // of the dictionary stream for the column will be 0 in the ORC file dump.
+  @Test
+  public void testDictionaryThreshold() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    Configuration conf = new Configuration();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f);
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.ZLIB)
+            .rowIndexStride(1000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", 
"despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    int nextInt = 0;
+    for(int i=0; i < 21000; ++i) {
+      // Write out the same string twice, this guarantees the fraction of rows 
with
+      // distinct strings is 0.5
+      if (i % 2 == 0) {
+        nextInt = r1.nextInt(words.length);
+        // Append the value of i to the word, this guarantees when an index or 
word is repeated
+        // the actual string is unique.
+        words[nextInt] += "-" + i;
+      }
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(), words[nextInt]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size != 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-dictionary-threshold.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + 
outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testBloomFilter() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("S");
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", 
"despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-bloomfilter.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + 
outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testBloomFilter2() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("l")
+        .bloomFilterFpp(0.01);
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", 
"despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-bloomfilter2.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + 
outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java 
b/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java
new file mode 100644
index 0000000..a514824
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.net.URL;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestJsonFileDump {
+  public static String getFileFromClasspath(String name) {
+    URL url = ClassLoader.getSystemResource(name);
+    if (url == null) {
+      throw new IllegalArgumentException("Could not find " + name);
+    }
+    return url.getPath();
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestFileDump.testDump.orc");
+    fs.delete(testFilePath, false);
+  }
+
+  static void checkOutput(String expected,
+                                  String actual) throws Exception {
+    BufferedReader eStream =
+        new BufferedReader(new FileReader(getFileFromClasspath(expected)));
+    BufferedReader aStream =
+        new BufferedReader(new FileReader(actual));
+    String expectedLine = eStream.readLine();
+    while (expectedLine != null) {
+      String actualLine = aStream.readLine();
+      System.out.println("actual:   " + actualLine);
+      System.out.println("expected: " + expectedLine);
+      assertEquals(expectedLine, actualLine);
+      expectedLine = eStream.readLine();
+    }
+    assertNull(eStream.readLine());
+    assertNull(aStream.readLine());
+  }
+
+  @Test
+  public void testJsonDump() throws Exception {
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("s", TypeDescription.createString());
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("s");
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", 
"despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      ((LongColumnVector) batch.cols[0]).vector[batch.size] = r1.nextInt();
+      ((LongColumnVector) batch.cols[1]).vector[batch.size] = r1.nextLong();
+      if (i % 100 == 0) {
+        batch.cols[2].noNulls = false;
+        batch.cols[2].isNull[batch.size] = true;
+      } else {
+        ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+            words[r1.nextInt(words.length)].getBytes());
+      }
+      batch.size += 1;
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump.json";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + 
outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", 
"--rowindex=3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/resources/orc-file-11-format.orc
----------------------------------------------------------------------
diff --git a/java/core/src/test/resources/orc-file-11-format.orc 
b/java/core/src/test/resources/orc-file-11-format.orc
new file mode 100644
index 0000000..41653c8
Binary files /dev/null and 
b/java/core/src/test/resources/orc-file-11-format.orc differ

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/java/core/src/test/resources/orc-file-dump-bloomfilter.out 
b/java/core/src/test/resources/orc-file-dump-bloomfilter.out
new file mode 100644
index 0000000..18fd2fb
--- /dev/null
+++ b/java/core/src/test/resources/orc-file-dump-bloomfilter.out
@@ -0,0 +1,179 @@
+Structure for TestFileDump.testDump.orc
+File Version: 0.12 with HIVE_13083
+Rows: 21000
+Compression: ZLIB
+Compression size: 4096
+Type: struct<i:int,l:bigint,s:string>
+
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 
515792826
+    Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 
9221614132680747961
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
+  Stripe 2:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 
7673427
+    Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 
9222259462014003839
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
+  Stripe 3:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 
132660742551
+    Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 
9222303228623055266
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
+  Stripe 4:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 
8533549236
+    Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 
9221043130193737406
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
+  Stripe 5:
+    Column 0: count: 1000 hasNull: false
+    Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 
51299706363
+    Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 
9218567213558056476
+    Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
+
+File Statistics:
+  Column 0: count: 21000 hasNull: false
+  Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 
193017464403
+  Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 
9222303228623055266
+  Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
+
+Stripes:
+  Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 951
+    Stream: column 0 section ROW_INDEX start: 3 length 17
+    Stream: column 1 section ROW_INDEX start: 20 length 166
+    Stream: column 2 section ROW_INDEX start: 186 length 169
+    Stream: column 3 section ROW_INDEX start: 355 length 87
+    Stream: column 3 section BLOOM_FILTER start: 442 length 512
+    Stream: column 1 section DATA start: 954 length 20035
+    Stream: column 2 section DATA start: 20989 length 40050
+    Stream: column 3 section DATA start: 61039 length 3543
+    Stream: column 3 section LENGTH start: 64582 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 64607 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 3:
+      Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 
positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 
positions: 0,659,149
+      Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 
positions: 0,1531,3
+      Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 
positions: 0,2281,32
+      Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 
positions: 0,3033,45
+    Bloom filters for column 3:
+      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 
loadFactor: 0.022 expectedFpp: 2.343647E-7
+  Stripe: offset: 64826 data: 63775 rows: 5000 tail: 86 index: 944
+    Stream: column 0 section ROW_INDEX start: 64826 length 17
+    Stream: column 1 section ROW_INDEX start: 64843 length 164
+    Stream: column 2 section ROW_INDEX start: 65007 length 168
+    Stream: column 3 section ROW_INDEX start: 65175 length 83
+    Stream: column 3 section BLOOM_FILTER start: 65258 length 512
+    Stream: column 1 section DATA start: 65770 length 20035
+    Stream: column 2 section DATA start: 85805 length 40050
+    Stream: column 3 section DATA start: 125855 length 3532
+    Stream: column 3 section LENGTH start: 129387 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 129412 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 3:
+      Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 
positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 
positions: 0,761,12
+      Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 
positions: 0,1472,70
+      Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 
positions: 0,2250,43
+      Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 
positions: 0,2978,88
+    Bloom filters for column 3:
+      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 
loadFactor: 0.022 expectedFpp: 2.343647E-7
+  Stripe: offset: 129631 data: 63787 rows: 5000 tail: 86 index: 950
+    Stream: column 0 section ROW_INDEX start: 129631 length 17
+    Stream: column 1 section ROW_INDEX start: 129648 length 163
+    Stream: column 2 section ROW_INDEX start: 129811 length 168
+    Stream: column 3 section ROW_INDEX start: 129979 length 90
+    Stream: column 3 section BLOOM_FILTER start: 130069 length 512
+    Stream: column 1 section DATA start: 130581 length 20035
+    Stream: column 2 section DATA start: 150616 length 40050
+    Stream: column 3 section DATA start: 190666 length 3544
+    Stream: column 3 section LENGTH start: 194210 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 194235 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 3:
+      Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 
positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 
positions: 0,634,174
+      Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 
positions: 0,1469,69
+      Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 
positions: 0,2133,194
+      Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 
positions: 0,3005,43
+    Bloom filters for column 3:
+      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 
loadFactor: 0.022 expectedFpp: 2.343647E-7
+  Stripe: offset: 194454 data: 63817 rows: 5000 tail: 86 index: 952
+    Stream: column 0 section ROW_INDEX start: 194454 length 17
+    Stream: column 1 section ROW_INDEX start: 194471 length 165
+    Stream: column 2 section ROW_INDEX start: 194636 length 167
+    Stream: column 3 section ROW_INDEX start: 194803 length 91
+    Stream: column 3 section BLOOM_FILTER start: 194894 length 512
+    Stream: column 1 section DATA start: 195406 length 20035
+    Stream: column 2 section DATA start: 215441 length 40050
+    Stream: column 3 section DATA start: 255491 length 3574
+    Stream: column 3 section LENGTH start: 259065 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 259090 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 3:
+      Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 
positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 
positions: 0,431,431
+      Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 
positions: 0,1485,52
+      Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 
positions: 0,2196,104
+      Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 
positions: 0,2934,131
+    Bloom filters for column 3:
+      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 
loadFactor: 0.022 expectedFpp: 2.343647E-7
+  Stripe: offset: 259309 data: 12943 rows: 1000 tail: 78 index: 432
+    Stream: column 0 section ROW_INDEX start: 259309 length 12
+    Stream: column 1 section ROW_INDEX start: 259321 length 38
+    Stream: column 2 section ROW_INDEX start: 259359 length 41
+    Stream: column 3 section ROW_INDEX start: 259400 length 40
+    Stream: column 3 section BLOOM_FILTER start: 259440 length 301
+    Stream: column 1 section DATA start: 259741 length 4007
+    Stream: column 2 section DATA start: 263748 length 8010
+    Stream: column 3 section DATA start: 271758 length 768
+    Stream: column 3 section LENGTH start: 272526 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 272551 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 3:
+      Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 
positions: 0,0,0
+    Bloom filters for column 3:
+      Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 
0.022 expectedFpp: 2.343647E-7
+      Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 
loadFactor: 0.022 expectedFpp: 2.343647E-7
+
+File length: 273307 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
+________________________________________________________________________________________________________________________
+

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/java/core/src/test/resources/orc-file-dump-bloomfilter2.out 
b/java/core/src/test/resources/orc-file-dump-bloomfilter2.out
new file mode 100644
index 0000000..fa5cc2d
--- /dev/null
+++ b/java/core/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -0,0 +1,179 @@
+Structure for TestFileDump.testDump.orc
+File Version: 0.12 with HIVE_13083
+Rows: 21000
+Compression: ZLIB
+Compression size: 4096
+Type: struct<i:int,l:bigint,s:string>
+
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 
515792826
+    Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 
9221614132680747961
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280
+  Stripe 2:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 
7673427
+    Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 
9222259462014003839
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504
+  Stripe 3:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 
132660742551
+    Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 
9222303228623055266
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641
+  Stripe 4:
+    Column 0: count: 5000 hasNull: false
+    Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 
8533549236
+    Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 
9221043130193737406
+    Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470
+  Stripe 5:
+    Column 0: count: 1000 hasNull: false
+    Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 
51299706363
+    Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 
9218567213558056476
+    Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866
+
+File Statistics:
+  Column 0: count: 21000 hasNull: false
+  Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 
193017464403
+  Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 
9222303228623055266
+  Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761
+
+Stripes:
+  Stripe: offset: 3 data: 63786 rows: 5000 tail: 85 index: 6974
+    Stream: column 0 section ROW_INDEX start: 3 length 17
+    Stream: column 1 section ROW_INDEX start: 20 length 166
+    Stream: column 2 section ROW_INDEX start: 186 length 169
+    Stream: column 2 section BLOOM_FILTER start: 355 length 6535
+    Stream: column 3 section ROW_INDEX start: 6890 length 87
+    Stream: column 1 section DATA start: 6977 length 20035
+    Stream: column 2 section DATA start: 27012 length 40050
+    Stream: column 3 section DATA start: 67062 length 3543
+    Stream: column 3 section LENGTH start: 70605 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 70630 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 2:
+      Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 
9175500305011173751 positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 
9208123824411178101 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 
9221351515892923972 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 
9167703224425685487 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 
9221614132680747961 positions: 28693,14,416
+    Bloom filters for column 2:
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4931 loadFactor: 
0.5136 expectedFpp: 0.009432924
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4956 loadFactor: 
0.5163 expectedFpp: 0.009772834
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 
0.5178 expectedFpp: 0.009981772
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 
0.5178 expectedFpp: 0.009981772
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 
0.5155 expectedFpp: 0.009676614
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 
loadFactor: 0.9736 expectedFpp: 0.829482
+  Stripe: offset: 70848 data: 63775 rows: 5000 tail: 85 index: 6965
+    Stream: column 0 section ROW_INDEX start: 70848 length 17
+    Stream: column 1 section ROW_INDEX start: 70865 length 164
+    Stream: column 2 section ROW_INDEX start: 71029 length 168
+    Stream: column 2 section BLOOM_FILTER start: 71197 length 6533
+    Stream: column 3 section ROW_INDEX start: 77730 length 83
+    Stream: column 1 section DATA start: 77813 length 20035
+    Stream: column 2 section DATA start: 97848 length 40050
+    Stream: column 3 section DATA start: 137898 length 3532
+    Stream: column 3 section LENGTH start: 141430 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 141455 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 2:
+      Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 
9189819526332228512 positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 
9178821722829648113 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 
9210838931786956852 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 
9222259462014003839 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 
9212277876771676916 positions: 28693,14,416
+    Bloom filters for column 2:
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 
0.5178 expectedFpp: 0.009981772
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4988 loadFactor: 
0.5196 expectedFpp: 0.010223193
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 
0.521 expectedFpp: 0.01042575
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 
0.5169 expectedFpp: 0.009855959
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 
0.5173 expectedFpp: 0.009911705
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 
loadFactor: 0.9733 expectedFpp: 0.8276205
+  Stripe: offset: 141673 data: 63787 rows: 5000 tail: 85 index: 6971
+    Stream: column 0 section ROW_INDEX start: 141673 length 17
+    Stream: column 1 section ROW_INDEX start: 141690 length 163
+    Stream: column 2 section ROW_INDEX start: 141853 length 168
+    Stream: column 2 section BLOOM_FILTER start: 142021 length 6533
+    Stream: column 3 section ROW_INDEX start: 148554 length 90
+    Stream: column 1 section DATA start: 148644 length 20035
+    Stream: column 2 section DATA start: 168679 length 40050
+    Stream: column 3 section DATA start: 208729 length 3544
+    Stream: column 3 section LENGTH start: 212273 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 212298 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 2:
+      Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 
9179058898902097152 positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 
9189147759444307708 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 
9193561362676960747 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 
9221286760675829363 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 
9222303228623055266 positions: 28693,14,416
+    Bloom filters for column 2:
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4967 loadFactor: 
0.5174 expectedFpp: 0.009925688
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 
0.521 expectedFpp: 0.01042575
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4964 loadFactor: 
0.5171 expectedFpp: 0.009883798
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 
0.5149 expectedFpp: 0.009594797
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 
0.5135 expectedFpp: 0.009419539
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 
loadFactor: 0.9722 expectedFpp: 0.82082444
+  Stripe: offset: 212516 data: 63817 rows: 5000 tail: 85 index: 6964
+    Stream: column 0 section ROW_INDEX start: 212516 length 17
+    Stream: column 1 section ROW_INDEX start: 212533 length 165
+    Stream: column 2 section ROW_INDEX start: 212698 length 167
+    Stream: column 2 section BLOOM_FILTER start: 212865 length 6524
+    Stream: column 3 section ROW_INDEX start: 219389 length 91
+    Stream: column 1 section DATA start: 219480 length 20035
+    Stream: column 2 section DATA start: 239515 length 40050
+    Stream: column 3 section DATA start: 279565 length 3574
+    Stream: column 3 section LENGTH start: 283139 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 283164 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 2:
+      Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 
9214167447015056056 positions: 0,0,0
+      Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 
9221043130193737406 positions: 4099,2,488
+      Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 
9208134757538374043 positions: 12297,6,464
+      Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 
9197412874152820822 positions: 20495,10,440
+      Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 
9220625004936875965 positions: 28693,14,416
+    Bloom filters for column 2:
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4951 loadFactor: 
0.5157 expectedFpp: 0.009704026
+      Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4969 loadFactor: 
0.5176 expectedFpp: 0.009953696
+      Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4994 loadFactor: 
0.5202 expectedFpp: 0.010309587
+      Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 
0.5147 expectedFpp: 0.009567649
+      Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 
0.5201 expectedFpp: 0.010295142
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 
loadFactor: 0.9743 expectedFpp: 0.8332165
+  Stripe: offset: 283382 data: 12943 rows: 1000 tail: 78 index: 1468
+    Stream: column 0 section ROW_INDEX start: 283382 length 12
+    Stream: column 1 section ROW_INDEX start: 283394 length 38
+    Stream: column 2 section ROW_INDEX start: 283432 length 41
+    Stream: column 2 section BLOOM_FILTER start: 283473 length 1337
+    Stream: column 3 section ROW_INDEX start: 284810 length 40
+    Stream: column 1 section DATA start: 284850 length 4007
+    Stream: column 2 section DATA start: 288857 length 8010
+    Stream: column 3 section DATA start: 296867 length 768
+    Stream: column 3 section LENGTH start: 297635 length 25
+    Stream: column 3 section DICTIONARY_DATA start: 297660 length 133
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT_V2
+    Encoding column 2: DIRECT_V2
+    Encoding column 3: DICTIONARY_V2[35]
+    Row group indices for column 2:
+      Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 
9218567213558056476 positions: 0,0,0
+    Bloom filters for column 2:
+      Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 
0.5154 expectedFpp: 0.00966294
+      Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 
loadFactor: 0.5154 expectedFpp: 0.00966294
+
+File length: 298416 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
+________________________________________________________________________________________________________________________
+

[07/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley reviewed by prasanthj)

Reply via email to