http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestBitFieldReader.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestBitFieldReader.java b/java/core/src/test/org/apache/orc/impl/TestBitFieldReader.java new file mode 100644 index 0000000..e4c6f6b --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestBitFieldReader.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import static junit.framework.Assert.assertEquals; + +import java.nio.ByteBuffer; + +import org.apache.orc.CompressionCodec; +import org.junit.Test; + +public class TestBitFieldReader { + + public void runSeekTest(CompressionCodec codec) throws Exception { + TestInStream.OutputCollector collect = new TestInStream.OutputCollector(); + final int COUNT = 16384; + BitFieldWriter out = new BitFieldWriter( + new OutStream("test", 500, codec, collect), 1); + TestInStream.PositionCollector[] positions = + new TestInStream.PositionCollector[COUNT]; + for(int i=0; i < COUNT; ++i) { + positions[i] = new TestInStream.PositionCollector(); + out.getPosition(positions[i]); + // test runs, non-runs + if (i < COUNT / 2) { + out.write(i & 1); + } else { + out.write((i/3) & 1); + } + } + out.flush(); + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + BitFieldReader in = new BitFieldReader(InStream.create("test", + new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), + codec, 500), 1); + for(int i=0; i < COUNT; ++i) { + int x = in.next(); + if (i < COUNT / 2) { + assertEquals(i & 1, x); + } else { + assertEquals((i/3) & 1, x); + } + } + for(int i=COUNT-1; i >= 0; --i) { + in.seek(positions[i]); + int x = in.next(); + if (i < COUNT / 2) { + assertEquals(i & 1, x); + } else { + assertEquals((i/3) & 1, x); + } + } + } + + @Test + public void testUncompressedSeek() throws Exception { + runSeekTest(null); + } + + @Test + public void testCompressedSeek() throws Exception { + runSeekTest(new ZlibCodec()); + } + + @Test + public void testBiggerItems() throws Exception { + TestInStream.OutputCollector collect = new TestInStream.OutputCollector(); + final int COUNT = 16384; + BitFieldWriter out = new BitFieldWriter( + new OutStream("test", 500, null, collect), 3); + for(int i=0; i < COUNT; ++i) { + // test runs, non-runs + if (i < COUNT / 2) { + out.write(i & 7); + } else { + out.write((i/3) & 7); + } + } + out.flush(); + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + BitFieldReader in = new BitFieldReader(InStream.create("test", + new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(), + null, 500), 3); + for(int i=0; i < COUNT; ++i) { + int x = in.next(); + if (i < COUNT / 2) { + assertEquals(i & 7, x); + } else { + assertEquals((i/3) & 7, x); + } + } + } + + @Test + public void testSkips() throws Exception { + TestInStream.OutputCollector collect = new TestInStream.OutputCollector(); + BitFieldWriter out = new BitFieldWriter( + new OutStream("test", 100, null, collect), 1); + final int COUNT = 16384; + for(int i=0; i < COUNT; ++i) { + if (i < COUNT/2) { + out.write(i & 1); + } else { + out.write((i/3) & 1); + } + } + out.flush(); + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + BitFieldReader in = new BitFieldReader(InStream.create("test", new ByteBuffer[]{inBuf}, + new long[]{0}, inBuf.remaining(), null, 100), 1); + for(int i=0; i < COUNT; i += 5) { + int x = (int) in.next(); + if (i < COUNT/2) { + assertEquals(i & 1, x); + } else { + assertEquals((i/3) & 1, x); + } + if (i < COUNT - 5) { + in.skip(4); + } + in.skip(0); + } + } +}
http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestBitPack.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestBitPack.java b/java/core/src/test/org/apache/orc/impl/TestBitPack.java new file mode 100644 index 0000000..f2d3d64 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestBitPack.java @@ -0,0 +1,279 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +import com.google.common.primitives.Longs; + +public class TestBitPack { + + private static final int SIZE = 100; + private static Random rand = new Random(100); + Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test" + + File.separator + "tmp")); + + Configuration conf; + FileSystem fs; + Path testFilePath; + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem() throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + private long[] deltaEncode(long[] inp) { + long[] output = new long[inp.length]; + SerializationUtils utils = new SerializationUtils(); + for (int i = 0; i < inp.length; i++) { + output[i] = utils.zigzagEncode(inp[i]); + } + return output; + } + + private long nextLong(Random rng, long n) { + long bits, val; + do { + bits = (rng.nextLong() << 1) >>> 1; + val = bits % n; + } while (bits - val + (n - 1) < 0L); + return val; + } + + private void runTest(int numBits) throws IOException { + long[] inp = new long[SIZE]; + for (int i = 0; i < SIZE; i++) { + long val = 0; + if (numBits <= 32) { + if (numBits == 1) { + val = -1 * rand.nextInt(2); + } else { + val = rand.nextInt((int) Math.pow(2, numBits - 1)); + } + } else { + val = nextLong(rand, (long) Math.pow(2, numBits - 2)); + } + if (val % 2 == 0) { + val = -val; + } + inp[i] = val; + } + long[] deltaEncoded = deltaEncode(inp); + long minInput = Collections.min(Longs.asList(deltaEncoded)); + long maxInput = Collections.max(Longs.asList(deltaEncoded)); + long rangeInput = maxInput - minInput; + SerializationUtils utils = new SerializationUtils(); + int fixedWidth = utils.findClosestNumBits(rangeInput); + TestInStream.OutputCollector collect = new TestInStream.OutputCollector(); + OutStream output = new OutStream("test", SIZE, null, collect); + utils.writeInts(deltaEncoded, 0, deltaEncoded.length, fixedWidth, output); + output.flush(); + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + long[] buff = new long[SIZE]; + utils.readInts(buff, 0, SIZE, fixedWidth, InStream.create("test", new ByteBuffer[] { inBuf }, + new long[] { 0 }, inBuf.remaining(), null, SIZE)); + for (int i = 0; i < SIZE; i++) { + buff[i] = utils.zigzagDecode(buff[i]); + } + assertEquals(numBits, fixedWidth); + assertArrayEquals(inp, buff); + } + + @Test + public void test01BitPacking1Bit() throws IOException { + runTest(1); + } + + @Test + public void test02BitPacking2Bit() throws IOException { + runTest(2); + } + + @Test + public void test03BitPacking3Bit() throws IOException { + runTest(3); + } + + @Test + public void test04BitPacking4Bit() throws IOException { + runTest(4); + } + + @Test + public void test05BitPacking5Bit() throws IOException { + runTest(5); + } + + @Test + public void test06BitPacking6Bit() throws IOException { + runTest(6); + } + + @Test + public void test07BitPacking7Bit() throws IOException { + runTest(7); + } + + @Test + public void test08BitPacking8Bit() throws IOException { + runTest(8); + } + + @Test + public void test09BitPacking9Bit() throws IOException { + runTest(9); + } + + @Test + public void test10BitPacking10Bit() throws IOException { + runTest(10); + } + + @Test + public void test11BitPacking11Bit() throws IOException { + runTest(11); + } + + @Test + public void test12BitPacking12Bit() throws IOException { + runTest(12); + } + + @Test + public void test13BitPacking13Bit() throws IOException { + runTest(13); + } + + @Test + public void test14BitPacking14Bit() throws IOException { + runTest(14); + } + + @Test + public void test15BitPacking15Bit() throws IOException { + runTest(15); + } + + @Test + public void test16BitPacking16Bit() throws IOException { + runTest(16); + } + + @Test + public void test17BitPacking17Bit() throws IOException { + runTest(17); + } + + @Test + public void test18BitPacking18Bit() throws IOException { + runTest(18); + } + + @Test + public void test19BitPacking19Bit() throws IOException { + runTest(19); + } + + @Test + public void test20BitPacking20Bit() throws IOException { + runTest(20); + } + + @Test + public void test21BitPacking21Bit() throws IOException { + runTest(21); + } + + @Test + public void test22BitPacking22Bit() throws IOException { + runTest(22); + } + + @Test + public void test23BitPacking23Bit() throws IOException { + runTest(23); + } + + @Test + public void test24BitPacking24Bit() throws IOException { + runTest(24); + } + + @Test + public void test26BitPacking26Bit() throws IOException { + runTest(26); + } + + @Test + public void test28BitPacking28Bit() throws IOException { + runTest(28); + } + + @Test + public void test30BitPacking30Bit() throws IOException { + runTest(30); + } + + @Test + public void test32BitPacking32Bit() throws IOException { + runTest(32); + } + + @Test + public void test40BitPacking40Bit() throws IOException { + runTest(40); + } + + @Test + public void test48BitPacking48Bit() throws IOException { + runTest(48); + } + + @Test + public void test56BitPacking56Bit() throws IOException { + runTest(56); + } + + @Test + public void test64BitPacking64Bit() throws IOException { + runTest(64); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java b/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java new file mode 100644 index 0000000..6165526 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.orc.OrcProto; +import org.apache.orc.TypeDescription; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TestColumnStatisticsImpl { + + @Test + public void testUpdateDate() throws Exception { + ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(TypeDescription.createDate()); + DateWritable date = new DateWritable(16400); + stat.increment(); + stat.updateDate(date); + assertDateStatistics(stat, 1, 16400, 16400); + + date.set(16410); + stat.increment(); + stat.updateDate(date); + assertDateStatistics(stat, 2, 16400, 16410); + + date.set(16420); + stat.increment(); + stat.updateDate(date); + assertDateStatistics(stat, 3, 16400, 16420); + } + + private void assertDateStatistics(ColumnStatisticsImpl stat, int count, int minimum, int maximum) { + OrcProto.ColumnStatistics.Builder builder = stat.serialize(); + + assertEquals(count, builder.getNumberOfValues()); + assertTrue(builder.hasDateStatistics()); + assertFalse(builder.hasStringStatistics()); + + OrcProto.DateStatistics protoStat = builder.getDateStatistics(); + assertTrue(protoStat.hasMinimum()); + assertEquals(minimum, protoStat.getMinimum()); + assertTrue(protoStat.hasMaximum()); + assertEquals(maximum, protoStat.getMaximum()); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestDataReaderProperties.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestDataReaderProperties.java b/java/core/src/test/org/apache/orc/impl/TestDataReaderProperties.java new file mode 100644 index 0000000..b9918f2 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestDataReaderProperties.java @@ -0,0 +1,69 @@ +package org.apache.orc.impl; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.orc.CompressionCodec; +import org.apache.orc.CompressionKind; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.mockito.Mockito.mock; + +public class TestDataReaderProperties { + + private FileSystem mockedFileSystem = mock(FileSystem.class); + private Path mockedPath = mock(Path.class); + private boolean mockedZeroCopy = false; + + @Test + public void testCompleteBuild() { + DataReaderProperties properties = DataReaderProperties.builder() + .withFileSystem(mockedFileSystem) + .withPath(mockedPath) + .withCompression(CompressionKind.ZLIB) + .withZeroCopy(mockedZeroCopy) + .build(); + assertEquals(mockedFileSystem, properties.getFileSystem()); + assertEquals(mockedPath, properties.getPath()); + assertEquals(CompressionKind.ZLIB, properties.getCompression()); + assertEquals(mockedZeroCopy, properties.getZeroCopy()); + } + + @Test + public void testMissingNonRequiredArgs() { + DataReaderProperties properties = DataReaderProperties.builder() + .withFileSystem(mockedFileSystem) + .withPath(mockedPath) + .build(); + assertEquals(mockedFileSystem, properties.getFileSystem()); + assertEquals(mockedPath, properties.getPath()); + assertNull(properties.getCompression()); + assertFalse(properties.getZeroCopy()); + } + + @Test(expected = java.lang.NullPointerException.class) + public void testEmptyBuild() { + DataReaderProperties.builder().build(); + } + + @Test(expected = java.lang.NullPointerException.class) + public void testMissingPath() { + DataReaderProperties.builder() + .withFileSystem(mockedFileSystem) + .withCompression(CompressionKind.NONE) + .withZeroCopy(mockedZeroCopy) + .build(); + } + + @Test(expected = java.lang.NullPointerException.class) + public void testMissingFileSystem() { + DataReaderProperties.builder() + .withPath(mockedPath) + .withCompression(CompressionKind.NONE) + .withZeroCopy(mockedZeroCopy) + .build(); + } + +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java b/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java new file mode 100644 index 0000000..af583f7 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import java.util.Random; + +import org.apache.orc.impl.DynamicByteArray; +import org.apache.orc.impl.DynamicIntArray; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestDynamicArray { + + @Test + public void testByteArray() throws Exception { + DynamicByteArray dba = new DynamicByteArray(3, 10); + dba.add((byte) 0); + dba.add((byte) 1); + dba.set(3, (byte) 3); + dba.set(2, (byte) 2); + dba.add((byte) 4); + assertEquals("{0,1,2,3,4}", dba.toString()); + assertEquals(5, dba.size()); + byte[] val; + val = new byte[0]; + assertEquals(0, dba.compare(val, 0, 0, 2, 0)); + assertEquals(-1, dba.compare(val, 0, 0, 2, 1)); + val = new byte[]{3,42}; + assertEquals(1, dba.compare(val, 0, 1, 2, 0)); + assertEquals(1, dba.compare(val, 0, 1, 2, 1)); + assertEquals(0, dba.compare(val, 0, 1, 3, 1)); + assertEquals(-1, dba.compare(val, 0, 1, 3, 2)); + assertEquals(1, dba.compare(val, 0, 2, 3, 1)); + val = new byte[256]; + for(int b=-128; b < 128; ++b) { + dba.add((byte) b); + val[b+128] = (byte) b; + } + assertEquals(0, dba.compare(val, 0, 256, 5, 256)); + assertEquals(1, dba.compare(val, 0, 1, 0, 1)); + assertEquals(1, dba.compare(val, 254, 1, 0, 1)); + assertEquals(1, dba.compare(val, 120, 1, 64, 1)); + val = new byte[1024]; + Random rand = new Random(1701); + for(int i = 0; i < val.length; ++i) { + rand.nextBytes(val); + } + dba.add(val, 0, 1024); + assertEquals(1285, dba.size()); + assertEquals(0, dba.compare(val, 0, 1024, 261, 1024)); + } + + @Test + public void testIntArray() throws Exception { + DynamicIntArray dia = new DynamicIntArray(10); + for(int i=0; i < 10000; ++i) { + dia.add(2*i); + } + assertEquals(10000, dia.size()); + for(int i=0; i < 10000; ++i) { + assertEquals(2*i, dia.get(i)); + } + dia.clear(); + assertEquals(0, dia.size()); + dia.add(3); + dia.add(12); + dia.add(65); + assertEquals("{3,12,65}", dia.toString()); + for(int i=0; i < 5; ++i) { + dia.increment(i, 3); + } + assertEquals("{6,15,68,3,3}", dia.toString()); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestInStream.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestInStream.java b/java/core/src/test/org/apache/orc/impl/TestInStream.java new file mode 100644 index 0000000..9e65345 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestInStream.java @@ -0,0 +1,314 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.fail; + +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.apache.orc.CompressionCodec; +import org.junit.Test; + +public class TestInStream { + + static class OutputCollector implements OutStream.OutputReceiver { + DynamicByteArray buffer = new DynamicByteArray(); + + @Override + public void output(ByteBuffer buffer) throws IOException { + this.buffer.add(buffer.array(), buffer.arrayOffset() + buffer.position(), + buffer.remaining()); + } + } + + static class PositionCollector + implements PositionProvider, PositionRecorder { + private List<Long> positions = new ArrayList<Long>(); + private int index = 0; + + @Override + public long getNext() { + return positions.get(index++); + } + + @Override + public void addPosition(long offset) { + positions.add(offset); + } + + public void reset() { + index = 0; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder("position: "); + for(int i=0; i < positions.size(); ++i) { + if (i != 0) { + builder.append(", "); + } + builder.append(positions.get(i)); + } + return builder.toString(); + } + } + + @Test + public void testUncompressed() throws Exception { + OutputCollector collect = new OutputCollector(); + OutStream out = new OutStream("test", 100, null, collect); + PositionCollector[] positions = new PositionCollector[1024]; + for(int i=0; i < 1024; ++i) { + positions[i] = new PositionCollector(); + out.getPosition(positions[i]); + out.write(i); + } + out.flush(); + assertEquals(1024, collect.buffer.size()); + for(int i=0; i < 1024; ++i) { + assertEquals((byte) i, collect.buffer.get(i)); + } + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + InStream in = InStream.create("test", new ByteBuffer[]{inBuf}, + new long[]{0}, inBuf.remaining(), null, 100); + assertEquals("uncompressed stream test position: 0 length: 1024" + + " range: 0 offset: 0 limit: 0", + in.toString()); + for(int i=0; i < 1024; ++i) { + int x = in.read(); + assertEquals(i & 0xff, x); + } + for(int i=1023; i >= 0; --i) { + in.seek(positions[i]); + assertEquals(i & 0xff, in.read()); + } + } + + @Test + public void testCompressed() throws Exception { + OutputCollector collect = new OutputCollector(); + CompressionCodec codec = new ZlibCodec(); + OutStream out = new OutStream("test", 300, codec, collect); + PositionCollector[] positions = new PositionCollector[1024]; + for(int i=0; i < 1024; ++i) { + positions[i] = new PositionCollector(); + out.getPosition(positions[i]); + out.write(i); + } + out.flush(); + assertEquals("test", out.toString()); + assertEquals(961, collect.buffer.size()); + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + InStream in = InStream.create("test", new ByteBuffer[]{inBuf}, + new long[]{0}, inBuf.remaining(), codec, 300); + assertEquals("compressed stream test position: 0 length: 961 range: 0" + + " offset: 0 limit: 0 range 0 = 0 to 961", + in.toString()); + for(int i=0; i < 1024; ++i) { + int x = in.read(); + assertEquals(i & 0xff, x); + } + assertEquals(0, in.available()); + for(int i=1023; i >= 0; --i) { + in.seek(positions[i]); + assertEquals(i & 0xff, in.read()); + } + } + + @Test + public void testCorruptStream() throws Exception { + OutputCollector collect = new OutputCollector(); + CompressionCodec codec = new ZlibCodec(); + OutStream out = new OutStream("test", 500, codec, collect); + PositionCollector[] positions = new PositionCollector[1024]; + for(int i=0; i < 1024; ++i) { + positions[i] = new PositionCollector(); + out.getPosition(positions[i]); + out.write(i); + } + out.flush(); + + // now try to read the stream with a buffer that is too small + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + InStream in = InStream.create("test", new ByteBuffer[]{inBuf}, + new long[]{0}, inBuf.remaining(), codec, 100); + byte[] contents = new byte[1024]; + try { + in.read(contents); + fail(); + } catch(IllegalArgumentException iae) { + // EXPECTED + } + + // make a corrupted header + inBuf.clear(); + inBuf.put((byte) 32); + inBuf.put((byte) 0); + inBuf.flip(); + in = InStream.create("test2", new ByteBuffer[]{inBuf}, new long[]{0}, + inBuf.remaining(), codec, 300); + try { + in.read(); + fail(); + } catch (IllegalStateException ise) { + // EXPECTED + } + } + + @Test + public void testDisjointBuffers() throws Exception { + OutputCollector collect = new OutputCollector(); + CompressionCodec codec = new ZlibCodec(); + OutStream out = new OutStream("test", 400, codec, collect); + PositionCollector[] positions = new PositionCollector[1024]; + DataOutput stream = new DataOutputStream(out); + for(int i=0; i < 1024; ++i) { + positions[i] = new PositionCollector(); + out.getPosition(positions[i]); + stream.writeInt(i); + } + out.flush(); + assertEquals("test", out.toString()); + assertEquals(1674, collect.buffer.size()); + ByteBuffer[] inBuf = new ByteBuffer[3]; + inBuf[0] = ByteBuffer.allocate(500); + inBuf[1] = ByteBuffer.allocate(1200); + inBuf[2] = ByteBuffer.allocate(500); + collect.buffer.setByteBuffer(inBuf[0], 0, 483); + collect.buffer.setByteBuffer(inBuf[1], 483, 1625 - 483); + collect.buffer.setByteBuffer(inBuf[2], 1625, 1674 - 1625); + + for(int i=0; i < inBuf.length; ++i) { + inBuf[i].flip(); + } + InStream in = InStream.create("test", inBuf, + new long[]{0,483, 1625}, 1674, codec, 400); + assertEquals("compressed stream test position: 0 length: 1674 range: 0" + + " offset: 0 limit: 0 range 0 = 0 to 483;" + + " range 1 = 483 to 1142; range 2 = 1625 to 49", + in.toString()); + DataInputStream inStream = new DataInputStream(in); + for(int i=0; i < 1024; ++i) { + int x = inStream.readInt(); + assertEquals(i, x); + } + assertEquals(0, in.available()); + for(int i=1023; i >= 0; --i) { + in.seek(positions[i]); + assertEquals(i, inStream.readInt()); + } + + in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]}, + new long[]{483, 1625}, 1674, codec, 400); + inStream = new DataInputStream(in); + positions[303].reset(); + in.seek(positions[303]); + for(int i=303; i < 1024; ++i) { + assertEquals(i, inStream.readInt()); + } + + in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]}, + new long[]{0, 1625}, 1674, codec, 400); + inStream = new DataInputStream(in); + positions[1001].reset(); + for(int i=0; i < 300; ++i) { + assertEquals(i, inStream.readInt()); + } + in.seek(positions[1001]); + for(int i=1001; i < 1024; ++i) { + assertEquals(i, inStream.readInt()); + } + } + + @Test + public void testUncompressedDisjointBuffers() throws Exception { + OutputCollector collect = new OutputCollector(); + OutStream out = new OutStream("test", 400, null, collect); + PositionCollector[] positions = new PositionCollector[1024]; + DataOutput stream = new DataOutputStream(out); + for(int i=0; i < 1024; ++i) { + positions[i] = new PositionCollector(); + out.getPosition(positions[i]); + stream.writeInt(i); + } + out.flush(); + assertEquals("test", out.toString()); + assertEquals(4096, collect.buffer.size()); + ByteBuffer[] inBuf = new ByteBuffer[3]; + inBuf[0] = ByteBuffer.allocate(1100); + inBuf[1] = ByteBuffer.allocate(2200); + inBuf[2] = ByteBuffer.allocate(1100); + collect.buffer.setByteBuffer(inBuf[0], 0, 1024); + collect.buffer.setByteBuffer(inBuf[1], 1024, 2048); + collect.buffer.setByteBuffer(inBuf[2], 3072, 1024); + + for(int i=0; i < inBuf.length; ++i) { + inBuf[i].flip(); + } + InStream in = InStream.create("test", inBuf, + new long[]{0, 1024, 3072}, 4096, null, 400); + assertEquals("uncompressed stream test position: 0 length: 4096" + + " range: 0 offset: 0 limit: 0", + in.toString()); + DataInputStream inStream = new DataInputStream(in); + for(int i=0; i < 1024; ++i) { + int x = inStream.readInt(); + assertEquals(i, x); + } + assertEquals(0, in.available()); + for(int i=1023; i >= 0; --i) { + in.seek(positions[i]); + assertEquals(i, inStream.readInt()); + } + + in = InStream.create("test", new ByteBuffer[]{inBuf[1], inBuf[2]}, + new long[]{1024, 3072}, 4096, null, 400); + inStream = new DataInputStream(in); + positions[256].reset(); + in.seek(positions[256]); + for(int i=256; i < 1024; ++i) { + assertEquals(i, inStream.readInt()); + } + + in = InStream.create("test", new ByteBuffer[]{inBuf[0], inBuf[2]}, + new long[]{0, 3072}, 4096, null, 400); + inStream = new DataInputStream(in); + positions[768].reset(); + for(int i=0; i < 256; ++i) { + assertEquals(i, inStream.readInt()); + } + in.seek(positions[768]); + for(int i=768; i < 1024; ++i) { + assertEquals(i, inStream.readInt()); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java b/java/core/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java new file mode 100644 index 0000000..399f35e --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestIntegerCompressionReader.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import static junit.framework.Assert.assertEquals; + +import java.nio.ByteBuffer; +import java.util.Random; + +import org.apache.orc.CompressionCodec; +import org.junit.Test; + +public class TestIntegerCompressionReader { + + public void runSeekTest(CompressionCodec codec) throws Exception { + TestInStream.OutputCollector collect = new TestInStream.OutputCollector(); + RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2( + new OutStream("test", 1000, codec, collect), true); + TestInStream.PositionCollector[] positions = + new TestInStream.PositionCollector[4096]; + Random random = new Random(99); + int[] junk = new int[2048]; + for(int i=0; i < junk.length; ++i) { + junk[i] = random.nextInt(); + } + for(int i=0; i < 4096; ++i) { + positions[i] = new TestInStream.PositionCollector(); + out.getPosition(positions[i]); + // test runs, incrementing runs, non-runs + if (i < 1024) { + out.write(i/4); + } else if (i < 2048) { + out.write(2*i); + } else { + out.write(junk[i-2048]); + } + } + out.flush(); + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + RunLengthIntegerReaderV2 in = + new RunLengthIntegerReaderV2(InStream.create + ("test", new ByteBuffer[]{inBuf}, + new long[]{0}, inBuf.remaining(), + codec, 1000), true, false); + for(int i=0; i < 2048; ++i) { + int x = (int) in.next(); + if (i < 1024) { + assertEquals(i/4, x); + } else if (i < 2048) { + assertEquals(2*i, x); + } else { + assertEquals(junk[i-2048], x); + } + } + for(int i=2047; i >= 0; --i) { + in.seek(positions[i]); + int x = (int) in.next(); + if (i < 1024) { + assertEquals(i/4, x); + } else if (i < 2048) { + assertEquals(2*i, x); + } else { + assertEquals(junk[i-2048], x); + } + } + } + + @Test + public void testUncompressedSeek() throws Exception { + runSeekTest(null); + } + + @Test + public void testCompressedSeek() throws Exception { + runSeekTest(new ZlibCodec()); + } + + @Test + public void testSkips() throws Exception { + TestInStream.OutputCollector collect = new TestInStream.OutputCollector(); + RunLengthIntegerWriterV2 out = new RunLengthIntegerWriterV2( + new OutStream("test", 100, null, collect), true); + for(int i=0; i < 2048; ++i) { + if (i < 1024) { + out.write(i); + } else { + out.write(256 * i); + } + } + out.flush(); + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size()); + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size()); + inBuf.flip(); + RunLengthIntegerReaderV2 in = + new RunLengthIntegerReaderV2(InStream.create("test", + new ByteBuffer[]{inBuf}, + new long[]{0}, + inBuf.remaining(), + null, 100), true, false); + for(int i=0; i < 2048; i += 10) { + int x = (int) in.next(); + if (i < 1024) { + assertEquals(i, x); + } else { + assertEquals(256 * i, x); + } + if (i < 2038) { + in.skip(9); + } + in.skip(0); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java b/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java new file mode 100644 index 0000000..f48c545 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.orc.impl.MemoryManager; +import org.hamcrest.BaseMatcher; +import org.hamcrest.Description; +import org.junit.Test; +import org.mockito.Matchers; +import org.mockito.Mockito; + +import java.lang.management.ManagementFactory; + +import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +/** + * Test the ORC memory manager. + */ +public class TestMemoryManager { + private static final double ERROR = 0.000001; + + private static class NullCallback implements MemoryManager.Callback { + public boolean checkMemory(double newScale) { + return false; + } + } + + @Test + public void testBasics() throws Exception { + Configuration conf = new Configuration(); + MemoryManager mgr = new MemoryManager(conf); + NullCallback callback = new NullCallback(); + long poolSize = mgr.getTotalMemoryPool(); + assertEquals(Math.round(ManagementFactory.getMemoryMXBean(). + getHeapMemoryUsage().getMax() * 0.5d), poolSize); + assertEquals(1.0, mgr.getAllocationScale(), 0.00001); + mgr.addWriter(new Path("p1"), 1000, callback); + assertEquals(1.0, mgr.getAllocationScale(), 0.00001); + mgr.addWriter(new Path("p1"), poolSize / 2, callback); + assertEquals(1.0, mgr.getAllocationScale(), 0.00001); + mgr.addWriter(new Path("p2"), poolSize / 2, callback); + assertEquals(1.0, mgr.getAllocationScale(), 0.00001); + mgr.addWriter(new Path("p3"), poolSize / 2, callback); + assertEquals(0.6666667, mgr.getAllocationScale(), 0.00001); + mgr.addWriter(new Path("p4"), poolSize / 2, callback); + assertEquals(0.5, mgr.getAllocationScale(), 0.000001); + mgr.addWriter(new Path("p4"), 3 * poolSize / 2, callback); + assertEquals(0.3333333, mgr.getAllocationScale(), 0.000001); + mgr.removeWriter(new Path("p1")); + mgr.removeWriter(new Path("p2")); + assertEquals(0.5, mgr.getAllocationScale(), 0.00001); + mgr.removeWriter(new Path("p4")); + assertEquals(1.0, mgr.getAllocationScale(), 0.00001); + } + + @Test + public void testConfig() throws Exception { + Configuration conf = new Configuration(); + conf.set("hive.exec.orc.memory.pool", "0.9"); + MemoryManager mgr = new MemoryManager(conf); + long mem = + ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax(); + System.err.print("Memory = " + mem); + long pool = mgr.getTotalMemoryPool(); + assertTrue("Pool too small: " + pool, mem * 0.899 < pool); + assertTrue("Pool too big: " + pool, pool < mem * 0.901); + } + + private static class DoubleMatcher extends BaseMatcher<Double> { + final double expected; + final double error; + DoubleMatcher(double expected, double error) { + this.expected = expected; + this.error = error; + } + + @Override + public boolean matches(Object val) { + double dbl = (Double) val; + return Math.abs(dbl - expected) <= error; + } + + @Override + public void describeTo(Description description) { + description.appendText("not sufficiently close to "); + description.appendText(Double.toString(expected)); + } + } + + private static DoubleMatcher closeTo(double value, double error) { + return new DoubleMatcher(value, error); + } + + @Test + public void testCallback() throws Exception { + Configuration conf = new Configuration(); + MemoryManager mgr = new MemoryManager(conf); + long pool = mgr.getTotalMemoryPool(); + MemoryManager.Callback[] calls = new MemoryManager.Callback[20]; + for(int i=0; i < calls.length; ++i) { + calls[i] = Mockito.mock(MemoryManager.Callback.class); + mgr.addWriter(new Path(Integer.toString(i)), pool/4, calls[i]); + } + // add enough rows to get the memory manager to check the limits + for(int i=0; i < 10000; ++i) { + mgr.addedRow(1); + } + for(int call=0; call < calls.length; ++call) { + Mockito.verify(calls[call], Mockito.times(2)) + .checkMemory(Matchers.doubleThat(closeTo(0.2, ERROR))); + } + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestOrcWideTable.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestOrcWideTable.java b/java/core/src/test/org/apache/orc/impl/TestOrcWideTable.java new file mode 100644 index 0000000..289a86e --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestOrcWideTable.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.junit.Test; + +public class TestOrcWideTable { + + @Test + public void testBufferSizeFor1Col() throws IOException { + assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024, + 1, 128*1024)); + } + + @Test + public void testBufferSizeFor50Col() throws IOException { + assertEquals(256 * 1024, WriterImpl.getEstimatedBufferSize(256 * 1024 * 1024, + 50, 256*1024)); + } + + @Test + public void testBufferSizeFor1000Col() throws IOException { + assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024, + 1000, 128*1024)); + } + + @Test + public void testBufferSizeFor2000Col() throws IOException { + assertEquals(16 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024, + 2000, 256*1024)); + } + + @Test + public void testBufferSizeFor4000Col() throws IOException { + assertEquals(8 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024, + 4000, 256*1024)); + } + + @Test + public void testBufferSizeFor25000Col() throws IOException { + assertEquals(4 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024, + 25000, 256*1024)); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestOutStream.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestOutStream.java b/java/core/src/test/org/apache/orc/impl/TestOutStream.java new file mode 100644 index 0000000..e9614d5 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestOutStream.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import org.apache.orc.CompressionCodec; +import org.junit.Test; +import org.mockito.Mockito; + +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; + +public class TestOutStream { + + @Test + public void testFlush() throws Exception { + OutStream.OutputReceiver receiver = + Mockito.mock(OutStream.OutputReceiver.class); + CompressionCodec codec = new ZlibCodec(); + OutStream stream = new OutStream("test", 128*1024, codec, receiver); + assertEquals(0L, stream.getBufferSize()); + stream.write(new byte[]{0, 1, 2}); + stream.flush(); + Mockito.verify(receiver).output(Mockito.any(ByteBuffer.class)); + assertEquals(0L, stream.getBufferSize()); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestRLEv2.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestRLEv2.java b/java/core/src/test/org/apache/orc/impl/TestRLEv2.java new file mode 100644 index 0000000..e139619 --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestRLEv2.java @@ -0,0 +1,307 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.orc.CompressionKind; +import org.apache.orc.OrcFile; +import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; +import org.apache.orc.tools.FileDump; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +public class TestRLEv2 { + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + Path testFilePath; + Configuration conf; + FileSystem fs; + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem () throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestRLEv2." + + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + void appendInt(VectorizedRowBatch batch, int i) { + ((LongColumnVector) batch.cols[0]).vector[batch.size++] = i; + } + + @Test + public void testFixedDeltaZero() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + VectorizedRowBatch batch = schema.createRowBatch(5120); + for (int i = 0; i < 5120; ++i) { + appendInt(batch, 123); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123, + // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaOne() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + VectorizedRowBatch batch = schema.createRowBatch(5120); + for (int i = 0; i < 5120; ++i) { + appendInt(batch, i % 512); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0) + // and 1 byte delta (delta = 1). In total, 4 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaOneDescending() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + VectorizedRowBatch batch = schema.createRowBatch(5120); + for (int i = 0; i < 5120; ++i) { + appendInt(batch, 512 - (i % 512)); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint) + // and 1 byte delta (delta = 1). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaLarge() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + VectorizedRowBatch batch = schema.createRowBatch(5120); + for (int i = 0; i < 5120; ++i) { + appendInt(batch, i % 512 + ((i % 512) * 100)); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0) + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50")); + System.setOut(origOut); + } + + @Test + public void testFixedDeltaLargeDescending() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + VectorizedRowBatch batch = schema.createRowBatch(5120); + for (int i = 0; i < 5120; ++i) { + appendInt(batch, (512 - i % 512) + ((i % 512) * 100)); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint) + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run. + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60")); + System.setOut(origOut); + } + + @Test + public void testShortRepeat() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + VectorizedRowBatch batch = schema.createRowBatch(5120); + for (int i = 0; i < 5; ++i) { + appendInt(batch, 10); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // 1 byte header + 1 byte value + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2")); + System.setOut(origOut); + } + + @Test + public void testDeltaUnknownSign() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + VectorizedRowBatch batch = schema.createRowBatch(5120); + appendInt(batch, 0); + for (int i = 0; i < 511; ++i) { + appendInt(batch, i); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding + // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits + // each, 5120/8 = 640). Total bytes 642 + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642")); + System.setOut(origOut); + } + + @Test + public void testPatchedBase() throws Exception { + TypeDescription schema = TypeDescription.createInt(); + Writer w = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .compress(CompressionKind.NONE) + .setSchema(schema) + .rowIndexStride(0) + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION) + .version(OrcFile.Version.V_0_12) + ); + + Random rand = new Random(123); + VectorizedRowBatch batch = schema.createRowBatch(5120); + appendInt(batch, 10000000); + for (int i = 0; i < 511; ++i) { + appendInt(batch, rand.nextInt(i+1)); + } + w.addRowBatch(batch); + w.close(); + + PrintStream origOut = System.out; + ByteArrayOutputStream myOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(myOut)); + FileDump.main(new String[]{testFilePath.toUri().toString()}); + System.out.flush(); + String outDump = new String(myOut.toByteArray()); + // use PATCHED_BASE encoding + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583")); + System.setOut(origOut); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java new file mode 100644 index 0000000..23d0dab --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java @@ -0,0 +1,152 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.orc.impl; + +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; +import org.apache.orc.FileFormatException; +import org.apache.hadoop.io.Text; +import org.apache.orc.OrcFile; +import org.junit.Test; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.ExpectedException; + +public class TestReaderImpl { + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + private final Path path = new Path("test-file.orc"); + private FSDataInputStream in; + private int psLen; + private ByteBuffer buffer; + + @Before + public void setup() { + in = null; + } + + @Test + public void testEnsureOrcFooterSmallTextFile() throws IOException { + prepareTestCase("1".getBytes()); + thrown.expect(FileFormatException.class); + ReaderImpl.ensureOrcFooter(in, path, psLen, buffer); + } + + @Test + public void testEnsureOrcFooterLargeTextFile() throws IOException { + prepareTestCase("This is Some Text File".getBytes()); + thrown.expect(FileFormatException.class); + ReaderImpl.ensureOrcFooter(in, path, psLen, buffer); + } + + @Test + public void testEnsureOrcFooter011ORCFile() throws IOException { + prepareTestCase(composeContent(OrcFile.MAGIC, "FOOTER")); + ReaderImpl.ensureOrcFooter(in, path, psLen, buffer); + } + + @Test + public void testEnsureOrcFooterCorrectORCFooter() throws IOException { + prepareTestCase(composeContent("", OrcFile.MAGIC)); + ReaderImpl.ensureOrcFooter(in, path, psLen, buffer); + } + + private void prepareTestCase(byte[] bytes) { + buffer = ByteBuffer.wrap(bytes); + psLen = buffer.get(bytes.length - 1) & 0xff; + in = new FSDataInputStream(new SeekableByteArrayInputStream(bytes)); + } + + private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException { + ByteBuffer header = Text.encode(headerStr); + ByteBuffer footer = Text.encode(footerStr); + int headerLen = header.remaining(); + int footerLen = footer.remaining() + 1; + + ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen); + + buf.put(header); + buf.put(footer); + buf.put((byte) footerLen); + return buf.array(); + } + + private static final class SeekableByteArrayInputStream extends ByteArrayInputStream + implements Seekable, PositionedReadable { + + public SeekableByteArrayInputStream(byte[] buf) { + super(buf); + } + + @Override + public void seek(long pos) throws IOException { + this.reset(); + this.skip(pos); + } + + @Override + public long getPos() throws IOException { + return pos; + } + + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + long oldPos = getPos(); + int nread = -1; + try { + seek(position); + nread = read(buffer, offset, length); + } finally { + seek(oldPos); + } + return nread; + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) + throws IOException { + int nread = 0; + while (nread < length) { + int nbytes = read(position + nread, buffer, offset + nread, length - nread); + if (nbytes < 0) { + throw new EOFException("End of file reached before reading fully."); + } + nread += nbytes; + } + } + + @Override + public void readFully(long position, byte[] buffer) + throws IOException { + readFully(position, buffer, 0, buffer.length); + } + } +}
