http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java b/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java new file mode 100644 index 0000000..4a8a0f2 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.junit.Test; + +import com.google.common.math.LongMath; + +public class TestSerializationUtils { + + private InputStream fromBuffer(ByteArrayOutputStream buffer) { + return new ByteArrayInputStream(buffer.toByteArray()); + } + + @Test + public void testDoubles() throws Exception { + double tolerance = 0.0000000000000001; + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + SerializationUtils utils = new SerializationUtils(); + utils.writeDouble(buffer, 1343822337.759); + assertEquals(1343822337.759, utils.readDouble(fromBuffer(buffer)), tolerance); + buffer = new ByteArrayOutputStream(); + utils.writeDouble(buffer, 0.8); + double got = utils.readDouble(fromBuffer(buffer)); + assertEquals(0.8, got, tolerance); + } + + @Test + public void testBigIntegers() throws Exception { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0)); + assertArrayEquals(new byte[]{0}, buffer.toByteArray()); + assertEquals(0L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1)); + assertArrayEquals(new byte[]{2}, buffer.toByteArray()); + assertEquals(1L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1)); + assertArrayEquals(new byte[]{1}, buffer.toByteArray()); + assertEquals(-1L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50)); + assertArrayEquals(new byte[]{100}, buffer.toByteArray()); + assertEquals(50L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50)); + assertArrayEquals(new byte[]{99}, buffer.toByteArray()); + assertEquals(-50L, + SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue()); + for(int i=-8192; i < 8192; ++i) { + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i)); + assertEquals("compare length for " + i, + i >= -64 && i < 64 ? 1 : 2, buffer.size()); + assertEquals("compare result for " + i, + i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue()); + } + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger("123456789abcdef0",16)); + assertEquals(new BigInteger("123456789abcdef0",16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger("-123456789abcdef0",16)); + assertEquals(new BigInteger("-123456789abcdef0",16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + StringBuilder buf = new StringBuilder(); + for(int i=0; i < 256; ++i) { + String num = Integer.toHexString(i); + if (num.length() == 1) { + buf.append('0'); + } + buf.append(num); + } + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger(buf.toString(),16)); + assertEquals(new BigInteger(buf.toString(),16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + buffer.reset(); + SerializationUtils.writeBigInteger(buffer, + new BigInteger("ff000000000000000000000000000000000000000000ff",16)); + assertEquals( + new BigInteger("ff000000000000000000000000000000000000000000ff",16), + SerializationUtils.readBigInteger(fromBuffer(buffer))); + } + + @Test + public void testSubtractionOverflow() { + // cross check results with Guava results below + SerializationUtils utils = new SerializationUtils(); + assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE)); + assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE)); + assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE)); + assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 6553103058346370095L)); + assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE)); + assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0)); + } + + @Test + public void testSubtractionOverflowGuava() { + try { + LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE); + fail("expected ArithmeticException for overflow"); + } catch (ArithmeticException ex) { + assertEquals(ex.getMessage(), "overflow"); + } + + try { + LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE); + fail("expected ArithmeticException for overflow"); + } catch (ArithmeticException ex) { + assertEquals(ex.getMessage(), "overflow"); + } + + try { + LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE); + fail("expected ArithmeticException for overflow"); + } catch (ArithmeticException ex) { + assertEquals(ex.getMessage(), "overflow"); + } + + assertEquals(-8106206116692740190L, + LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L)); + assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE)); + assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0)); + } + + @Test + public void testRandomFloats() throws Exception { + float tolerance = 0.0000000000000001f; + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + SerializationUtils utils = new SerializationUtils(); + Random rand = new Random(); + int n = 100_000; + float[] expected = new float[n]; + for (int i = 0; i < n; i++) { + float f = rand.nextFloat(); + expected[i] = f; + utils.writeFloat(buffer, f); + } + InputStream newBuffer = fromBuffer(buffer); + for (int i = 0; i < n; i++) { + float got = utils.readFloat(newBuffer); + assertEquals(expected[i], got, tolerance); + } + } + + @Test + public void testRandomDoubles() throws Exception { + double tolerance = 0.0000000000000001; + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + SerializationUtils utils = new SerializationUtils(); + Random rand = new Random(); + int n = 100_000; + double[] expected = new double[n]; + for (int i = 0; i < n; i++) { + double d = rand.nextDouble(); + expected[i] = d; + utils.writeDouble(buffer, d); + } + InputStream newBuffer = fromBuffer(buffer); + for (int i = 0; i < n; i++) { + double got = utils.readDouble(newBuffer); + assertEquals(expected[i], got, tolerance); + } + } +}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestStreamName.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestStreamName.java b/java/core/src/test/org/apache/orc/impl/TestStreamName.java new file mode 100644 index 0000000..be58d4c --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestStreamName.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import org.apache.orc.OrcProto; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestStreamName { + + @Test + public void test1() throws Exception { + StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA); + StreamName s2 = new StreamName(3, + OrcProto.Stream.Kind.DICTIONARY_DATA); + StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA); + StreamName s4 = new StreamName(5, + OrcProto.Stream.Kind.DICTIONARY_DATA); + StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA); + assertEquals(true, s1.equals(s1)); + assertEquals(false, s1.equals(s2)); + assertEquals(false, s1.equals(s3)); + assertEquals(true, s1.equals(s1p)); + assertEquals(true, s1.compareTo(null) < 0); + assertEquals(false, s1.equals(null)); + assertEquals(true, s1.compareTo(s2) < 0); + assertEquals(true, s2.compareTo(s3) < 0); + assertEquals(true, s3.compareTo(s4) < 0); + assertEquals(true, s4.compareTo(s1p) > 0); + assertEquals(0, s1p.compareTo(s1)); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java b/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java new file mode 100644 index 0000000..3d4612c --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestStringRedBlackTree.java @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.IntWritable; +import org.apache.orc.impl.RedBlackTree; +import org.apache.orc.impl.StringRedBlackTree; +import org.junit.Test; + +import java.io.IOException; + +import static junit.framework.Assert.assertEquals; + +/** + * Test the red-black tree with string keys. + */ +public class TestStringRedBlackTree { + + /** + * Checks the red-black tree rules to make sure that we have correctly built + * a valid tree. + * + * Properties: + * 1. Red nodes must have black children + * 2. Each node must have the same black height on both sides. + * + * @param node The id of the root of the subtree to check for the red-black + * tree properties. + * @return The black-height of the subtree. + */ + private int checkSubtree(RedBlackTree tree, int node, IntWritable count + ) throws IOException { + if (node == RedBlackTree.NULL) { + return 1; + } + count.set(count.get() + 1); + boolean is_red = tree.isRed(node); + int left = tree.getLeft(node); + int right = tree.getRight(node); + if (is_red) { + if (tree.isRed(left)) { + printTree(tree, "", tree.root); + throw new IllegalStateException("Left node of " + node + " is " + left + + " and both are red."); + } + if (tree.isRed(right)) { + printTree(tree, "", tree.root); + throw new IllegalStateException("Right node of " + node + " is " + + right + " and both are red."); + } + } + int left_depth = checkSubtree(tree, left, count); + int right_depth = checkSubtree(tree, right, count); + if (left_depth != right_depth) { + printTree(tree, "", tree.root); + throw new IllegalStateException("Lopsided tree at node " + node + + " with depths " + left_depth + " and " + right_depth); + } + if (is_red) { + return left_depth; + } else { + return left_depth + 1; + } + } + + /** + * Checks the validity of the entire tree. Also ensures that the number of + * nodes visited is the same as the size of the set. + */ + void checkTree(RedBlackTree tree) throws IOException { + IntWritable count = new IntWritable(0); + if (tree.isRed(tree.root)) { + printTree(tree, "", tree.root); + throw new IllegalStateException("root is red"); + } + checkSubtree(tree, tree.root, count); + if (count.get() != tree.size) { + printTree(tree, "", tree.root); + throw new IllegalStateException("Broken tree! visited= " + count.get() + + " size=" + tree.size); + } + } + + void printTree(RedBlackTree tree, String indent, int node + ) throws IOException { + if (node == RedBlackTree.NULL) { + System.err.println(indent + "NULL"); + } else { + System.err.println(indent + "Node " + node + " color " + + (tree.isRed(node) ? "red" : "black")); + printTree(tree, indent + " ", tree.getLeft(node)); + printTree(tree, indent + " ", tree.getRight(node)); + } + } + + private static class MyVisitor implements StringRedBlackTree.Visitor { + private final String[] words; + private final int[] order; + private final DataOutputBuffer buffer = new DataOutputBuffer(); + int current = 0; + + MyVisitor(String[] args, int[] order) { + words = args; + this.order = order; + } + + @Override + public void visit(StringRedBlackTree.VisitorContext context + ) throws IOException { + String word = context.getText().toString(); + assertEquals("in word " + current, words[current], word); + assertEquals("in word " + current, order[current], + context.getOriginalPosition()); + buffer.reset(); + context.writeBytes(buffer); + assertEquals(word, new String(buffer.getData(),0,buffer.getLength())); + current += 1; + } + } + + void checkContents(StringRedBlackTree tree, int[] order, + String... params + ) throws IOException { + tree.visit(new MyVisitor(params, order)); + } + + StringRedBlackTree buildTree(String... params) throws IOException { + StringRedBlackTree result = new StringRedBlackTree(1000); + for(String word: params) { + result.add(word); + checkTree(result); + } + return result; + } + + @Test + public void test1() throws Exception { + StringRedBlackTree tree = new StringRedBlackTree(5); + assertEquals(0, tree.getSizeInBytes()); + checkTree(tree); + assertEquals(0, tree.add("owen")); + checkTree(tree); + assertEquals(1, tree.add("ashutosh")); + checkTree(tree); + assertEquals(0, tree.add("owen")); + checkTree(tree); + assertEquals(2, tree.add("alan")); + checkTree(tree); + assertEquals(2, tree.add("alan")); + checkTree(tree); + assertEquals(1, tree.add("ashutosh")); + checkTree(tree); + assertEquals(3, tree.add("greg")); + checkTree(tree); + assertEquals(4, tree.add("eric")); + checkTree(tree); + assertEquals(5, tree.add("arun")); + checkTree(tree); + assertEquals(6, tree.size()); + checkTree(tree); + assertEquals(6, tree.add("eric14")); + checkTree(tree); + assertEquals(7, tree.add("o")); + checkTree(tree); + assertEquals(8, tree.add("ziggy")); + checkTree(tree); + assertEquals(9, tree.add("z")); + checkTree(tree); + checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8}, + "alan", "arun", "ashutosh", "eric", "eric14", "greg", + "o", "owen", "z", "ziggy"); + assertEquals(32888, tree.getSizeInBytes()); + // check that adding greg again bumps the count + assertEquals(3, tree.add("greg")); + assertEquals(41, tree.getCharacterSize()); + // add some more strings to test the different branches of the + // rebalancing + assertEquals(10, tree.add("zak")); + checkTree(tree); + assertEquals(11, tree.add("eric1")); + checkTree(tree); + assertEquals(12, tree.add("ash")); + checkTree(tree); + assertEquals(13, tree.add("harry")); + checkTree(tree); + assertEquals(14, tree.add("john")); + checkTree(tree); + tree.clear(); + checkTree(tree); + assertEquals(0, tree.getSizeInBytes()); + assertEquals(0, tree.getCharacterSize()); + } + + @Test + public void test2() throws Exception { + StringRedBlackTree tree = + buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", + "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"); + assertEquals(26, tree.size()); + checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14, + 15,16,17, 18,19,20, 21,22,23, 24,25}, + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"); + } + + @Test + public void test3() throws Exception { + StringRedBlackTree tree = + buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n", + "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a"); + assertEquals(26, tree.size()); + checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14, + 13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0}, + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestZlib.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestZlib.java b/java/core/src/test/org/apache/orc/impl/TestZlib.java new file mode 100644 index 0000000..327ecfc --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestZlib.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import org.apache.orc.CompressionCodec; +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.fail; + +public class TestZlib { + + @Test + public void testNoOverflow() throws Exception { + ByteBuffer in = ByteBuffer.allocate(10); + ByteBuffer out = ByteBuffer.allocate(10); + in.put(new byte[]{1,2,3,4,5,6,7,10}); + in.flip(); + CompressionCodec codec = new ZlibCodec(); + assertEquals(false, codec.compress(in, out, null)); + } + + @Test + public void testCorrupt() throws Exception { + ByteBuffer buf = ByteBuffer.allocate(1000); + buf.put(new byte[]{127,-128,0,99,98,-1}); + buf.flip(); + CompressionCodec codec = new ZlibCodec(); + ByteBuffer out = ByteBuffer.allocate(1000); + try { + codec.decompress(buf, out); + fail(); + } catch (IOException ioe) { + // EXPECTED + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/tools/TestFileDump.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/tools/TestFileDump.java b/java/core/src/test/org/apache/orc/tools/TestFileDump.java new file mode 100644 index 0000000..ce3381e --- /dev/null +++ b/java/core/src/test/org/apache/orc/tools/TestFileDump.java @@ -0,0 +1,486 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.tools; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.PrintStream; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.orc.CompressionKind; +import org.apache.orc.OrcConf; +import org.apache.orc.OrcFile; +import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestFileDump { + + Path workDir = new Path(System.getProperty("test.tmp.dir")); + Configuration conf; + FileSystem fs; + Path testFilePath; + + @Before + public void openFileSystem () throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + fs.setWorkingDirectory(workDir); + testFilePath = new Path("TestFileDump.testDump.orc"); + fs.delete(testFilePath, false); + } + + static TypeDescription getMyRecordType() { + return TypeDescription.createStruct() + .addField("i", TypeDescription.createInt()) + .addField("l", TypeDescription.createLong()) + .addField("s", TypeDescription.createString()); + } + + static void appendMyRecord(VectorizedRowBatch batch, + int i, + long l, + String str) { + ((LongColumnVector) batch.cols[0]).vector[batch.size] = i; + ((LongColumnVector) batch.cols[1]).vector[batch.size] = l; + if (str == null) { + batch.cols[2].noNulls = false; + batch.cols[2].isNull[batch.size] = true; + } else { + ((BytesColumnVector) batch.cols[2]).setVal(batch.size, + str.getBytes()); + } + batch.size += 1; + } + + static TypeDescription getAllTypesType() { + return TypeDescription.createStruct() + .addField("b", TypeDescription.createBoolean()) + .addField("bt", TypeDescription.createByte()) + .addField("s", TypeDescription.createShort()) + .addField("i", TypeDescription.createInt()) + .addField("l", TypeDescription.createLong()) + .addField("f", TypeDescription.createFloat()) + .addField("d", TypeDescription.createDouble()) + .addField("de", TypeDescription.createDecimal()) + .addField("t", TypeDescription.createTimestamp()) + .addField("dt", TypeDescription.createDate()) + .addField("str", TypeDescription.createString()) + .addField("c", TypeDescription.createChar().withMaxLength(5)) + .addField("vc", TypeDescription.createVarchar().withMaxLength(10)) + .addField("m", TypeDescription.createMap( + TypeDescription.createString(), + TypeDescription.createString())) + .addField("a", TypeDescription.createList(TypeDescription.createInt())) + .addField("st", TypeDescription.createStruct() + .addField("i", TypeDescription.createInt()) + .addField("s", TypeDescription.createString())); + } + + static void appendAllTypes(VectorizedRowBatch batch, + boolean b, + byte bt, + short s, + int i, + long l, + float f, + double d, + HiveDecimalWritable de, + Timestamp t, + DateWritable dt, + String str, + String c, + String vc, + Map<String, String> m, + List<Integer> a, + int sti, + String sts) { + int row = batch.size++; + ((LongColumnVector) batch.cols[0]).vector[row] = b ? 1 : 0; + ((LongColumnVector) batch.cols[1]).vector[row] = bt; + ((LongColumnVector) batch.cols[2]).vector[row] = s; + ((LongColumnVector) batch.cols[3]).vector[row] = i; + ((LongColumnVector) batch.cols[4]).vector[row] = l; + ((DoubleColumnVector) batch.cols[5]).vector[row] = f; + ((DoubleColumnVector) batch.cols[6]).vector[row] = d; + ((DecimalColumnVector) batch.cols[7]).vector[row].set(de); + ((TimestampColumnVector) batch.cols[8]).set(row, t); + ((LongColumnVector) batch.cols[9]).vector[row] = dt.getDays(); + ((BytesColumnVector) batch.cols[10]).setVal(row, str.getBytes()); + ((BytesColumnVector) batch.cols[11]).setVal(row, c.getBytes()); + ((BytesColumnVector) batch.cols[12]).setVal(row, vc.getBytes()); + MapColumnVector map = (MapColumnVector) batch.cols[13]; + int offset = map.childCount; + map.offsets[row] = offset; + map.lengths[row] = m.size(); + map.childCount += map.lengths[row]; + for(Map.Entry<String, String> entry: m.entrySet()) { + ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes()); + ((BytesColumnVector) map.values).setVal(offset++, + entry.getValue().getBytes()); + } + ListColumnVector list = (ListColumnVector) batch.cols[14]; + offset = list.childCount; + list.offsets[row] = offset; + list.lengths[row] = a.size(); + list.childCount += list.lengths[row]; + for(int e=0; e < a.size(); ++e) { + ((LongColumnVector) list.child).vector[offset + e] = a.get(e); + } + StructColumnVector struct = (StructColumnVector) batch.cols[15]; + ((LongColumnVector) struct.fields[0]).vector[row] = sti; + ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes()); + } + + public static void checkOutput(String expected, + String actual) throws Exception { + BufferedReader eStream = + new BufferedReader(new FileReader + (TestJsonFileDump.getFileFromClasspath(expected))); + BufferedReader aStream = + new BufferedReader(new FileReader(actual)); + String expectedLine = eStream.readLine().trim(); + while (expectedLine != null) { + String actualLine = aStream.readLine().trim(); + System.out.println("actual: " + actualLine); + System.out.println("expected: " + expectedLine); + Assert.assertEquals(expectedLine, actualLine); + expectedLine = eStream.readLine(); + expectedLine = expectedLine == null ? null : expectedLine.trim(); + } + Assert.assertNull(eStream.readLine()); + Assert.assertNull(aStream.readLine()); + eStream.close(); + aStream.close(); + } + + @Test + public void testDump() throws Exception { + TypeDescription schema = getMyRecordType(); + conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(schema) + .compress(CompressionKind.ZLIB) + .stripeSize(100000) + .rowIndexStride(1000)); + Random r1 = new Random(1); + String[] words = new String[]{"It", "was", "the", "best", "of", "times,", + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", + "we", "had", "everything", "before", "us,", "we", "had", "nothing", + "before", "us,", "we", "were", "all", "going", "direct", "to", + "Heaven,", "we", "were", "all", "going", "direct", "the", "other", + "way"}; + VectorizedRowBatch batch = schema.createRowBatch(1000); + for(int i=0; i < 21000; ++i) { + appendMyRecord(batch, r1.nextInt(), r1.nextLong(), + words[r1.nextInt(words.length)]); + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size > 0) { + writer.addRowBatch(batch); + } + writer.close(); + PrintStream origOut = System.out; + String outputFilename = "orc-file-dump.out"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"}); + System.out.flush(); + System.setOut(origOut); + + + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } + + @Test + public void testDataDump() throws Exception { + TypeDescription schema = getAllTypesType(); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(schema) + .stripeSize(100000) + .compress(CompressionKind.NONE) + .bufferSize(10000) + .rowIndexStride(1000)); + VectorizedRowBatch batch = schema.createRowBatch(1000); + Map<String, String> m = new HashMap<String, String>(2); + m.put("k1", "v1"); + appendAllTypes(batch, + true, + (byte) 10, + (short) 100, + 1000, + 10000L, + 4.0f, + 20.0, + new HiveDecimalWritable("4.2222"), + new Timestamp(1416967764000L), + new DateWritable(new Date(1416967764000L)), + "string", + "hello", + "hello", + m, + Arrays.asList(100, 200), + 10, "foo"); + m.clear(); + m.put("k3", "v3"); + appendAllTypes( + batch, + false, + (byte)20, + (short)200, + 2000, + 20000L, + 8.0f, + 40.0, + new HiveDecimalWritable("2.2222"), + new Timestamp(1416967364000L), + new DateWritable(new Date(1411967764000L)), + "abcd", + "world", + "world", + m, + Arrays.asList(200, 300), + 20, "bar"); + writer.addRowBatch(batch); + + writer.close(); + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "-d"}); + System.out.flush(); + System.setOut(origOut); + String[] lines = myOut.toString().split("\n"); + Assert.assertEquals("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24.0\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello\",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]); + Assert.assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44.0\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world\",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]); + } + + // Test that if the fraction of rows that have distinct strings is greater than the configured + // threshold dictionary encoding is turned off. If dictionary encoding is turned off the length + // of the dictionary stream for the column will be 0 in the ORC file dump. + @Test + public void testDictionaryThreshold() throws Exception { + TypeDescription schema = getMyRecordType(); + Configuration conf = new Configuration(); + conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); + conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(schema) + .stripeSize(100000) + .compress(CompressionKind.ZLIB) + .rowIndexStride(1000) + .bufferSize(10000)); + VectorizedRowBatch batch = schema.createRowBatch(1000); + Random r1 = new Random(1); + String[] words = new String[]{"It", "was", "the", "best", "of", "times,", + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", + "we", "had", "everything", "before", "us,", "we", "had", "nothing", + "before", "us,", "we", "were", "all", "going", "direct", "to", + "Heaven,", "we", "were", "all", "going", "direct", "the", "other", + "way"}; + int nextInt = 0; + for(int i=0; i < 21000; ++i) { + // Write out the same string twice, this guarantees the fraction of rows with + // distinct strings is 0.5 + if (i % 2 == 0) { + nextInt = r1.nextInt(words.length); + // Append the value of i to the word, this guarantees when an index or word is repeated + // the actual string is unique. + words[nextInt] += "-" + i; + } + appendMyRecord(batch, r1.nextInt(), r1.nextLong(), words[nextInt]); + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size != 0) { + writer.addRowBatch(batch); + } + writer.close(); + PrintStream origOut = System.out; + String outputFilename = "orc-file-dump-dictionary-threshold.out"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"}); + System.out.flush(); + System.setOut(origOut); + + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } + + @Test + public void testBloomFilter() throws Exception { + TypeDescription schema = getMyRecordType(); + conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); + OrcFile.WriterOptions options = OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(schema) + .stripeSize(100000) + .compress(CompressionKind.ZLIB) + .bufferSize(10000) + .rowIndexStride(1000) + .bloomFilterColumns("S"); + Writer writer = OrcFile.createWriter(testFilePath, options); + Random r1 = new Random(1); + String[] words = new String[]{"It", "was", "the", "best", "of", "times,", + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", + "we", "had", "everything", "before", "us,", "we", "had", "nothing", + "before", "us,", "we", "were", "all", "going", "direct", "to", + "Heaven,", "we", "were", "all", "going", "direct", "the", "other", + "way"}; + VectorizedRowBatch batch = schema.createRowBatch(1000); + for(int i=0; i < 21000; ++i) { + appendMyRecord(batch, r1.nextInt(), r1.nextLong(), + words[r1.nextInt(words.length)]); + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size > 0) { + writer.addRowBatch(batch); + } + writer.close(); + PrintStream origOut = System.out; + String outputFilename = "orc-file-dump-bloomfilter.out"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"}); + System.out.flush(); + System.setOut(origOut); + + + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } + + @Test + public void testBloomFilter2() throws Exception { + TypeDescription schema = getMyRecordType(); + conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); + OrcFile.WriterOptions options = OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(schema) + .stripeSize(100000) + .compress(CompressionKind.ZLIB) + .bufferSize(10000) + .rowIndexStride(1000) + .bloomFilterColumns("l") + .bloomFilterFpp(0.01); + VectorizedRowBatch batch = schema.createRowBatch(1000); + Writer writer = OrcFile.createWriter(testFilePath, options); + Random r1 = new Random(1); + String[] words = new String[]{"It", "was", "the", "best", "of", "times,", + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", + "we", "had", "everything", "before", "us,", "we", "had", "nothing", + "before", "us,", "we", "were", "all", "going", "direct", "to", + "Heaven,", "we", "were", "all", "going", "direct", "the", "other", + "way"}; + for(int i=0; i < 21000; ++i) { + appendMyRecord(batch, r1.nextInt(), r1.nextLong(), + words[r1.nextInt(words.length)]); + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size > 0) { + writer.addRowBatch(batch); + } + writer.close(); + PrintStream origOut = System.out; + String outputFilename = "orc-file-dump-bloomfilter2.out"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"}); + System.out.flush(); + System.setOut(origOut); + + + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java b/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java new file mode 100644 index 0000000..a514824 --- /dev/null +++ b/java/core/src/test/org/apache/orc/tools/TestJsonFileDump.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.tools; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.PrintStream; +import java.net.URL; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.orc.CompressionKind; +import org.apache.orc.OrcConf; +import org.apache.orc.OrcFile; +import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; +import org.junit.Before; +import org.junit.Test; + +public class TestJsonFileDump { + public static String getFileFromClasspath(String name) { + URL url = ClassLoader.getSystemResource(name); + if (url == null) { + throw new IllegalArgumentException("Could not find " + name); + } + return url.getPath(); + } + + Path workDir = new Path(System.getProperty("test.tmp.dir")); + Configuration conf; + FileSystem fs; + Path testFilePath; + + @Before + public void openFileSystem () throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + fs.setWorkingDirectory(workDir); + testFilePath = new Path("TestFileDump.testDump.orc"); + fs.delete(testFilePath, false); + } + + static void checkOutput(String expected, + String actual) throws Exception { + BufferedReader eStream = + new BufferedReader(new FileReader(getFileFromClasspath(expected))); + BufferedReader aStream = + new BufferedReader(new FileReader(actual)); + String expectedLine = eStream.readLine(); + while (expectedLine != null) { + String actualLine = aStream.readLine(); + System.out.println("actual: " + actualLine); + System.out.println("expected: " + expectedLine); + assertEquals(expectedLine, actualLine); + expectedLine = eStream.readLine(); + } + assertNull(eStream.readLine()); + assertNull(aStream.readLine()); + } + + @Test + public void testJsonDump() throws Exception { + TypeDescription schema = TypeDescription.createStruct() + .addField("i", TypeDescription.createInt()) + .addField("l", TypeDescription.createLong()) + .addField("s", TypeDescription.createString()); + conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION"); + OrcFile.WriterOptions options = OrcFile.writerOptions(conf) + .fileSystem(fs) + .setSchema(schema) + .stripeSize(100000) + .compress(CompressionKind.ZLIB) + .bufferSize(10000) + .rowIndexStride(1000) + .bloomFilterColumns("s"); + Writer writer = OrcFile.createWriter(testFilePath, options); + Random r1 = new Random(1); + String[] words = new String[]{"It", "was", "the", "best", "of", "times,", + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age", + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it", + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch", + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,", + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the", + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,", + "we", "had", "everything", "before", "us,", "we", "had", "nothing", + "before", "us,", "we", "were", "all", "going", "direct", "to", + "Heaven,", "we", "were", "all", "going", "direct", "the", "other", + "way"}; + VectorizedRowBatch batch = schema.createRowBatch(1000); + for(int i=0; i < 21000; ++i) { + ((LongColumnVector) batch.cols[0]).vector[batch.size] = r1.nextInt(); + ((LongColumnVector) batch.cols[1]).vector[batch.size] = r1.nextLong(); + if (i % 100 == 0) { + batch.cols[2].noNulls = false; + batch.cols[2].isNull[batch.size] = true; + } else { + ((BytesColumnVector) batch.cols[2]).setVal(batch.size, + words[r1.nextInt(words.length)].getBytes()); + } + batch.size += 1; + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size > 0) { + writer.addRowBatch(batch); + } + + writer.close(); + PrintStream origOut = System.out; + String outputFilename = "orc-file-dump.json"; + FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename); + + // replace stdout and run command + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"}); + System.out.flush(); + System.setOut(origOut); + + + checkOutput(outputFilename, workDir + File.separator + outputFilename); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/resources/orc-file-11-format.orc ---------------------------------------------------------------------- diff --git a/java/core/src/test/resources/orc-file-11-format.orc b/java/core/src/test/resources/orc-file-11-format.orc new file mode 100644 index 0000000..41653c8 Binary files /dev/null and b/java/core/src/test/resources/orc-file-11-format.orc differ http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/resources/orc-file-dump-bloomfilter.out ---------------------------------------------------------------------- diff --git a/java/core/src/test/resources/orc-file-dump-bloomfilter.out b/java/core/src/test/resources/orc-file-dump-bloomfilter.out new file mode 100644 index 0000000..18fd2fb --- /dev/null +++ b/java/core/src/test/resources/orc-file-dump-bloomfilter.out @@ -0,0 +1,179 @@ +Structure for TestFileDump.testDump.orc +File Version: 0.12 with HIVE_13083 +Rows: 21000 +Compression: ZLIB +Compression size: 4096 +Type: struct<i:int,l:bigint,s:string> + +Stripe Statistics: + Stripe 1: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 + Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 + Stripe 2: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 + Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 + Stripe 3: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 + Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 + Stripe 4: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 + Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 + Stripe 5: + Column 0: count: 1000 hasNull: false + Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 + Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 + Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 + +File Statistics: + Column 0: count: 21000 hasNull: false + Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 + Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 + Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 + +Stripes: + Stripe: offset: 3 data: 63786 rows: 5000 tail: 86 index: 951 + Stream: column 0 section ROW_INDEX start: 3 length 17 + Stream: column 1 section ROW_INDEX start: 20 length 166 + Stream: column 2 section ROW_INDEX start: 186 length 169 + Stream: column 3 section ROW_INDEX start: 355 length 87 + Stream: column 3 section BLOOM_FILTER start: 442 length 512 + Stream: column 1 section DATA start: 954 length 20035 + Stream: column 2 section DATA start: 20989 length 40050 + Stream: column 3 section DATA start: 61039 length 3543 + Stream: column 3 section LENGTH start: 64582 length 25 + Stream: column 3 section DICTIONARY_DATA start: 64607 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3862 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3884 positions: 0,659,149 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3893 positions: 0,1531,3 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3798 positions: 0,2281,32 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3843 positions: 0,3033,45 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 64826 data: 63775 rows: 5000 tail: 86 index: 944 + Stream: column 0 section ROW_INDEX start: 64826 length 17 + Stream: column 1 section ROW_INDEX start: 64843 length 164 + Stream: column 2 section ROW_INDEX start: 65007 length 168 + Stream: column 3 section ROW_INDEX start: 65175 length 83 + Stream: column 3 section BLOOM_FILTER start: 65258 length 512 + Stream: column 1 section DATA start: 65770 length 20035 + Stream: column 2 section DATA start: 85805 length 40050 + Stream: column 3 section DATA start: 125855 length 3532 + Stream: column 3 section LENGTH start: 129387 length 25 + Stream: column 3 section DICTIONARY_DATA start: 129412 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3923 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3869 positions: 0,761,12 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,1472,70 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3931 positions: 0,2250,43 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3964 positions: 0,2978,88 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 129631 data: 63787 rows: 5000 tail: 86 index: 950 + Stream: column 0 section ROW_INDEX start: 129631 length 17 + Stream: column 1 section ROW_INDEX start: 129648 length 163 + Stream: column 2 section ROW_INDEX start: 129811 length 168 + Stream: column 3 section ROW_INDEX start: 129979 length 90 + Stream: column 3 section BLOOM_FILTER start: 130069 length 512 + Stream: column 1 section DATA start: 130581 length 20035 + Stream: column 2 section DATA start: 150616 length 40050 + Stream: column 3 section DATA start: 190666 length 3544 + Stream: column 3 section LENGTH start: 194210 length 25 + Stream: column 3 section DICTIONARY_DATA start: 194235 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 4008 positions: 0,634,174 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3999 positions: 0,1469,69 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3817 positions: 0,2133,194 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 4000 positions: 0,3005,43 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 194454 data: 63817 rows: 5000 tail: 86 index: 952 + Stream: column 0 section ROW_INDEX start: 194454 length 17 + Stream: column 1 section ROW_INDEX start: 194471 length 165 + Stream: column 2 section ROW_INDEX start: 194636 length 167 + Stream: column 3 section ROW_INDEX start: 194803 length 91 + Stream: column 3 section BLOOM_FILTER start: 194894 length 512 + Stream: column 1 section DATA start: 195406 length 20035 + Stream: column 2 section DATA start: 215441 length 40050 + Stream: column 3 section DATA start: 255491 length 3574 + Stream: column 3 section LENGTH start: 259065 length 25 + Stream: column 3 section DICTIONARY_DATA start: 259090 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3901 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: Darkness, max: worst sum: 3900 positions: 0,431,431 + Entry 2: count: 1000 hasNull: false min: Darkness, max: worst sum: 3909 positions: 0,1485,52 + Entry 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3947 positions: 0,2196,104 + Entry 4: count: 1000 hasNull: false min: Darkness, max: worst sum: 3813 positions: 0,2934,131 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 2: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 3: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Entry 4: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe: offset: 259309 data: 12943 rows: 1000 tail: 78 index: 432 + Stream: column 0 section ROW_INDEX start: 259309 length 12 + Stream: column 1 section ROW_INDEX start: 259321 length 38 + Stream: column 2 section ROW_INDEX start: 259359 length 41 + Stream: column 3 section ROW_INDEX start: 259400 length 40 + Stream: column 3 section BLOOM_FILTER start: 259440 length 301 + Stream: column 1 section DATA start: 259741 length 4007 + Stream: column 2 section DATA start: 263748 length 8010 + Stream: column 3 section DATA start: 271758 length 768 + Stream: column 3 section LENGTH start: 272526 length 25 + Stream: column 3 section DICTIONARY_DATA start: 272551 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 3: + Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0 + Bloom filters for column 3: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7 + +File length: 273307 bytes +Padding length: 0 bytes +Padding ratio: 0% +________________________________________________________________________________________________________________________ + http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/resources/orc-file-dump-bloomfilter2.out ---------------------------------------------------------------------- diff --git a/java/core/src/test/resources/orc-file-dump-bloomfilter2.out b/java/core/src/test/resources/orc-file-dump-bloomfilter2.out new file mode 100644 index 0000000..fa5cc2d --- /dev/null +++ b/java/core/src/test/resources/orc-file-dump-bloomfilter2.out @@ -0,0 +1,179 @@ +Structure for TestFileDump.testDump.orc +File Version: 0.12 with HIVE_13083 +Rows: 21000 +Compression: ZLIB +Compression size: 4096 +Type: struct<i:int,l:bigint,s:string> + +Stripe Statistics: + Stripe 1: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146021688 max: 2147223299 sum: 515792826 + Column 2: count: 5000 hasNull: false min: -9218592812243954469 max: 9221614132680747961 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19280 + Stripe 2: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146733128 max: 2147001622 sum: 7673427 + Column 2: count: 5000 hasNull: false min: -9220818777591257749 max: 9222259462014003839 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19504 + Stripe 3: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146993718 max: 2147378179 sum: 132660742551 + Column 2: count: 5000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19641 + Stripe 4: + Column 0: count: 5000 hasNull: false + Column 1: count: 5000 hasNull: false min: -2146658006 max: 2145520931 sum: 8533549236 + Column 2: count: 5000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 + Column 3: count: 5000 hasNull: false min: Darkness, max: worst sum: 19470 + Stripe 5: + Column 0: count: 1000 hasNull: false + Column 1: count: 1000 hasNull: false min: -2146245500 max: 2146378640 sum: 51299706363 + Column 2: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 + Column 3: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 + +File Statistics: + Column 0: count: 21000 hasNull: false + Column 1: count: 21000 hasNull: false min: -2146993718 max: 2147378179 sum: 193017464403 + Column 2: count: 21000 hasNull: false min: -9222758097219661129 max: 9222303228623055266 + Column 3: count: 21000 hasNull: false min: Darkness, max: worst sum: 81761 + +Stripes: + Stripe: offset: 3 data: 63786 rows: 5000 tail: 85 index: 6974 + Stream: column 0 section ROW_INDEX start: 3 length 17 + Stream: column 1 section ROW_INDEX start: 20 length 166 + Stream: column 2 section ROW_INDEX start: 186 length 169 + Stream: column 2 section BLOOM_FILTER start: 355 length 6535 + Stream: column 3 section ROW_INDEX start: 6890 length 87 + Stream: column 1 section DATA start: 6977 length 20035 + Stream: column 2 section DATA start: 27012 length 40050 + Stream: column 3 section DATA start: 67062 length 3543 + Stream: column 3 section LENGTH start: 70605 length 25 + Stream: column 3 section DICTIONARY_DATA start: 70630 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9200577545527640566 max: 9175500305011173751 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9203618157670445774 max: 9208123824411178101 positions: 4099,2,488 + Entry 2: count: 1000 hasNull: false min: -9218592812243954469 max: 9221351515892923972 positions: 12297,6,464 + Entry 3: count: 1000 hasNull: false min: -9206585617947511272 max: 9167703224425685487 positions: 20495,10,440 + Entry 4: count: 1000 hasNull: false min: -9206645795733282496 max: 9221614132680747961 positions: 28693,14,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4931 loadFactor: 0.5136 expectedFpp: 0.009432924 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4956 loadFactor: 0.5163 expectedFpp: 0.009772834 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4949 loadFactor: 0.5155 expectedFpp: 0.009676614 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9347 loadFactor: 0.9736 expectedFpp: 0.829482 + Stripe: offset: 70848 data: 63775 rows: 5000 tail: 85 index: 6965 + Stream: column 0 section ROW_INDEX start: 70848 length 17 + Stream: column 1 section ROW_INDEX start: 70865 length 164 + Stream: column 2 section ROW_INDEX start: 71029 length 168 + Stream: column 2 section BLOOM_FILTER start: 71197 length 6533 + Stream: column 3 section ROW_INDEX start: 77730 length 83 + Stream: column 1 section DATA start: 77813 length 20035 + Stream: column 2 section DATA start: 97848 length 40050 + Stream: column 3 section DATA start: 137898 length 3532 + Stream: column 3 section LENGTH start: 141430 length 25 + Stream: column 3 section DICTIONARY_DATA start: 141455 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9218450653857701562 max: 9189819526332228512 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9220818777591257749 max: 9178821722829648113 positions: 4099,2,488 + Entry 2: count: 1000 hasNull: false min: -9220031433030423388 max: 9210838931786956852 positions: 12297,6,464 + Entry 3: count: 1000 hasNull: false min: -9208195729739635607 max: 9222259462014003839 positions: 20495,10,440 + Entry 4: count: 1000 hasNull: false min: -9174271499932339698 max: 9212277876771676916 positions: 28693,14,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4971 loadFactor: 0.5178 expectedFpp: 0.009981772 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4988 loadFactor: 0.5196 expectedFpp: 0.010223193 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 0.521 expectedFpp: 0.01042575 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4962 loadFactor: 0.5169 expectedFpp: 0.009855959 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4966 loadFactor: 0.5173 expectedFpp: 0.009911705 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9344 loadFactor: 0.9733 expectedFpp: 0.8276205 + Stripe: offset: 141673 data: 63787 rows: 5000 tail: 85 index: 6971 + Stream: column 0 section ROW_INDEX start: 141673 length 17 + Stream: column 1 section ROW_INDEX start: 141690 length 163 + Stream: column 2 section ROW_INDEX start: 141853 length 168 + Stream: column 2 section BLOOM_FILTER start: 142021 length 6533 + Stream: column 3 section ROW_INDEX start: 148554 length 90 + Stream: column 1 section DATA start: 148644 length 20035 + Stream: column 2 section DATA start: 168679 length 40050 + Stream: column 3 section DATA start: 208729 length 3544 + Stream: column 3 section LENGTH start: 212273 length 25 + Stream: column 3 section DICTIONARY_DATA start: 212298 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9211978436552246208 max: 9179058898902097152 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9195645160817780503 max: 9189147759444307708 positions: 4099,2,488 + Entry 2: count: 1000 hasNull: false min: -9202888157616520823 max: 9193561362676960747 positions: 12297,6,464 + Entry 3: count: 1000 hasNull: false min: -9216318198067839390 max: 9221286760675829363 positions: 20495,10,440 + Entry 4: count: 1000 hasNull: false min: -9218342074710552826 max: 9222303228623055266 positions: 28693,14,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4967 loadFactor: 0.5174 expectedFpp: 0.009925688 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 5002 loadFactor: 0.521 expectedFpp: 0.01042575 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4964 loadFactor: 0.5171 expectedFpp: 0.009883798 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4943 loadFactor: 0.5149 expectedFpp: 0.009594797 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4930 loadFactor: 0.5135 expectedFpp: 0.009419539 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9333 loadFactor: 0.9722 expectedFpp: 0.82082444 + Stripe: offset: 212516 data: 63817 rows: 5000 tail: 85 index: 6964 + Stream: column 0 section ROW_INDEX start: 212516 length 17 + Stream: column 1 section ROW_INDEX start: 212533 length 165 + Stream: column 2 section ROW_INDEX start: 212698 length 167 + Stream: column 2 section BLOOM_FILTER start: 212865 length 6524 + Stream: column 3 section ROW_INDEX start: 219389 length 91 + Stream: column 1 section DATA start: 219480 length 20035 + Stream: column 2 section DATA start: 239515 length 40050 + Stream: column 3 section DATA start: 279565 length 3574 + Stream: column 3 section LENGTH start: 283139 length 25 + Stream: column 3 section DICTIONARY_DATA start: 283164 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9222731174895935707 max: 9214167447015056056 positions: 0,0,0 + Entry 1: count: 1000 hasNull: false min: -9222758097219661129 max: 9221043130193737406 positions: 4099,2,488 + Entry 2: count: 1000 hasNull: false min: -9174483776261243438 max: 9208134757538374043 positions: 12297,6,464 + Entry 3: count: 1000 hasNull: false min: -9174329712613510612 max: 9197412874152820822 positions: 20495,10,440 + Entry 4: count: 1000 hasNull: false min: -9221162005892422758 max: 9220625004936875965 positions: 28693,14,416 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4951 loadFactor: 0.5157 expectedFpp: 0.009704026 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 4969 loadFactor: 0.5176 expectedFpp: 0.009953696 + Entry 2: numHashFunctions: 7 bitCount: 9600 popCount: 4994 loadFactor: 0.5202 expectedFpp: 0.010309587 + Entry 3: numHashFunctions: 7 bitCount: 9600 popCount: 4941 loadFactor: 0.5147 expectedFpp: 0.009567649 + Entry 4: numHashFunctions: 7 bitCount: 9600 popCount: 4993 loadFactor: 0.5201 expectedFpp: 0.010295142 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 9353 loadFactor: 0.9743 expectedFpp: 0.8332165 + Stripe: offset: 283382 data: 12943 rows: 1000 tail: 78 index: 1468 + Stream: column 0 section ROW_INDEX start: 283382 length 12 + Stream: column 1 section ROW_INDEX start: 283394 length 38 + Stream: column 2 section ROW_INDEX start: 283432 length 41 + Stream: column 2 section BLOOM_FILTER start: 283473 length 1337 + Stream: column 3 section ROW_INDEX start: 284810 length 40 + Stream: column 1 section DATA start: 284850 length 4007 + Stream: column 2 section DATA start: 288857 length 8010 + Stream: column 3 section DATA start: 296867 length 768 + Stream: column 3 section LENGTH start: 297635 length 25 + Stream: column 3 section DICTIONARY_DATA start: 297660 length 133 + Encoding column 0: DIRECT + Encoding column 1: DIRECT_V2 + Encoding column 2: DIRECT_V2 + Encoding column 3: DICTIONARY_V2[35] + Row group indices for column 2: + Entry 0: count: 1000 hasNull: false min: -9208193203370316142 max: 9218567213558056476 positions: 0,0,0 + Bloom filters for column 2: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294 + +File length: 298416 bytes +Padding length: 0 bytes +Padding ratio: 0% +________________________________________________________________________________________________________________________ +
