[11/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley reviewed by prasanthj)

omalley Fri, 13 May 2016 12:51:18 -0700

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/TestStringDictionary.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestStringDictionary.java 
b/java/core/src/test/org/apache/orc/TestStringDictionary.java
new file mode 100644
index 0000000..46209bb
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/TestStringDictionary.java
@@ -0,0 +1,290 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import org.apache.orc.impl.RecordReaderImpl;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestStringDictionary {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + 
File.separator + "test"
+      + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." + 
testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testTooManyDistinct() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema)
+                                   .compress(CompressionKind.NONE)
+                                   .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector col = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      col.setVal(batch.size++, String.valueOf(i).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    col = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(idx++), col.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test 
resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) 
rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, 
encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testHalfDistinct() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        
OrcFile.writerOptions(conf).setSchema(schema).compress(CompressionKind.NONE)
+            .bufferSize(10000));
+    Random rand = new Random(123);
+    int[] input = new int[20000];
+    for (int i = 0; i < 20000; i++) {
+      input[i] = rand.nextInt(10000);
+    }
+
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector col = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      col.setVal(batch.size++, String.valueOf(input[i]).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    col = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(input[idx++]), col.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test 
resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) 
rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, 
encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testTooManyDistinctCheckDisabled() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    conf.setBoolean(OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getAttribute(), 
false);
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        
OrcFile.writerOptions(conf).setSchema(schema).compress(CompressionKind.NONE)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      string.setVal(batch.size++, String.valueOf(i).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    string = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(idx++), string.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test 
resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) 
rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        assertEquals(OrcProto.ColumnEncoding.Kind.DIRECT_V2, 
encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testHalfDistinctCheckDisabled() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    conf.setBoolean(OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getAttribute(),
+        false);
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000));
+    Random rand = new Random(123);
+    int[] input = new int[20000];
+    for (int i = 0; i < 20000; i++) {
+      input[i] = rand.nextInt(10000);
+    }
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      string.setVal(batch.size++, String.valueOf(input[i]).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    string = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(input[idx++]), string.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test 
resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) 
rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY_V2, 
encoding.getKind());
+      }
+    }
+  }
+
+  @Test
+  public void testTooManyDistinctV11AlwaysDictionary() throws Exception {
+    TypeDescription schema = TypeDescription.createString();
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema)
+            .compress(CompressionKind.NONE)
+            .version(OrcFile.Version.V_0_11).bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector string = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 20000; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      string.setVal(batch.size++, String.valueOf(i).getBytes());
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
+    batch = reader.getSchema().createRowBatch();
+    string = (BytesColumnVector) batch.cols[0];
+    RecordReader rows = reader.rows();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(String.valueOf(idx++), string.toString(r));
+      }
+    }
+
+    // make sure the encoding type is correct
+    for (StripeInformation stripe : reader.getStripes()) {
+      // hacky but does the job, this casting will work as long this test 
resides
+      // within the same package as ORC reader
+      OrcProto.StripeFooter footer = ((RecordReaderImpl) 
rows).readStripeFooter(stripe);
+      for (int i = 0; i < footer.getColumnsCount(); ++i) {
+        OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+        assertEquals(OrcProto.ColumnEncoding.Kind.DICTIONARY, 
encoding.getKind());
+      }
+    }
+
+  }
+
+}


http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestTypeDescription.java 
b/java/core/src/test/org/apache/orc/TestTypeDescription.java
new file mode 100644
index 0000000..1f6f254
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/TestTypeDescription.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class TestTypeDescription {
+  @Rule
+  public ExpectedException thrown= ExpectedException.none();
+
+  @Test
+  public void testJson() {
+    TypeDescription bin = TypeDescription.createBinary();
+    assertEquals("{\"category\": \"binary\", \"id\": 0, \"max\": 0}",
+        bin.toJson());
+    assertEquals("binary", bin.toString());
+    TypeDescription struct = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal());
+    assertEquals("struct<f1:int,f2:string,f3:decimal(38,10)>",
+        struct.toString());
+    assertEquals("{\"category\": \"struct\", \"id\": 0, \"max\": 3, 
\"fields\": [\n"
+            + "  \"f1\": {\"category\": \"int\", \"id\": 1, \"max\": 1},\n"
+            + "  \"f2\": {\"category\": \"string\", \"id\": 2, \"max\": 2},\n"
+            + "  \"f3\": {\"category\": \"decimal\", \"id\": 3, \"max\": 3, 
\"precision\": 38, \"scale\": 10}]}",
+        struct.toJson());
+    struct = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    
assertEquals("struct<f1:uniontype<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
+        struct.toString());
+    assertEquals(
+        "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" +
+            "  \"f1\": {\"category\": \"uniontype\", \"id\": 1, \"max\": 3, 
\"children\": [\n" +
+            "    {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" +
+            "    {\"category\": \"decimal\", \"id\": 3, \"max\": 3, 
\"precision\": 20, \"scale\": 10}]},\n" +
+            "  \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, 
\"fields\": [\n" +
+            "    \"f3\": {\"category\": \"date\", \"id\": 5, \"max\": 5},\n" +
+            "    \"f4\": {\"category\": \"double\", \"id\": 6, \"max\": 6},\n" 
+
+            "    \"f5\": {\"category\": \"boolean\", \"id\": 7, \"max\": 
7}]},\n" +
+            "  \"f6\": {\"category\": \"char\", \"id\": 8, \"max\": 8, 
\"length\": 100}]}",
+        struct.toJson());
+  }
+
+  @Test
+  public void testParserSimple() {
+    TypeDescription expected = TypeDescription.createStruct()
+        .addField("b1", TypeDescription.createBinary())
+        .addField("b2", TypeDescription.createBoolean())
+        .addField("b3", TypeDescription.createByte())
+        .addField("c", TypeDescription.createChar().withMaxLength(10))
+        .addField("d1", TypeDescription.createDate())
+        .addField("d2", 
TypeDescription.createDecimal().withScale(5).withPrecision(20))
+        .addField("d3", TypeDescription.createDouble())
+        .addField("fff", TypeDescription.createFloat())
+        .addField("int", TypeDescription.createInt())
+        .addField("l", TypeDescription.createList
+            (TypeDescription.createLong()))
+        .addField("map", TypeDescription.createMap
+            (TypeDescription.createShort(), TypeDescription.createString()))
+        .addField("str", TypeDescription.createStruct()
+           .addField("u", TypeDescription.createUnion()
+               .addUnionChild(TypeDescription.createTimestamp())
+               .addUnionChild(TypeDescription.createVarchar()
+                   .withMaxLength(100))));
+    String expectedStr =
+        "struct<b1:binary,b2:boolean,b3:tinyint,c:char(10),d1:date," +
+            "d2:decimal(20,5),d3:double,fff:float,int:int,l:array<bigint>," +
+            "map:map<smallint,string>,str:struct<u:uniontype<timestamp," +
+            "varchar(100)>>>";
+    assertEquals(expectedStr, expected.toString());
+    TypeDescription actual = TypeDescription.fromString(expectedStr);
+    assertEquals(expected, actual);
+    assertEquals(expectedStr, actual.toString());
+  }
+
+  @Test
+  public void testParserUpper() {
+    TypeDescription type = TypeDescription.fromString("BIGINT");
+    assertEquals(TypeDescription.Category.LONG, type.getCategory());
+    type = TypeDescription.fromString("STRUCT<MY_FIELD:INT>");
+    assertEquals(TypeDescription.Category.STRUCT, type.getCategory());
+    assertEquals("MY_FIELD", type.getFieldNames().get(0));
+    assertEquals(TypeDescription.Category.INT,
+        type.getChildren().get(0).getCategory());
+    type = TypeDescription.fromString("UNIONTYPE<STRING>");
+    assertEquals(TypeDescription.Category.UNION, type.getCategory());
+    assertEquals(TypeDescription.Category.STRING,
+        type.getChildren().get(0).getCategory());
+  }
+
+  @Test
+  public void testParserUnknownCategory() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Can't parse category at 'FOOBAR^'");
+    TypeDescription.fromString("FOOBAR");
+  }
+
+  @Test
+  public void testParserEmptyCategory() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Can't parse category at '^<int>'");
+    TypeDescription.fromString("<int>");
+  }
+
+  @Test
+  public void testParserMissingInt() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Missing integer at 'char(^)'");
+    TypeDescription.fromString("char()");
+  }
+
+  @Test
+  public void testParserMissingSize() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Missing required char '(' at 'struct<c:char^>'");
+    TypeDescription.fromString("struct<c:char>");
+  }
+
+  @Test
+  public void testParserExtraStuff() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Extra characters at 'struct<i:int>^,'");
+    TypeDescription.fromString("struct<i:int>,");
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java 
b/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java
new file mode 100644
index 0000000..ef8fcd0
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import com.google.common.collect.Lists;
+import com.google.common.primitives.Longs;
+
+@RunWith(value = Parameterized.class)
+public class TestUnrolledBitPack {
+
+  private long val;
+
+  public TestUnrolledBitPack(long val) {
+    this.val = val;
+  }
+
+  @Parameters
+  public static Collection<Object[]> data() {
+    Object[][] data = new Object[][] { { -1 }, { 1 }, { 7 }, { -128 }, { 32000 
}, { 8300000 },
+        { Integer.MAX_VALUE }, { 540000000000L }, { 140000000000000L }, { 
36000000000000000L },
+        { Long.MAX_VALUE } };
+    return Arrays.asList(data);
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + 
File.separator + "test"
+      + File.separator + "tmp"));
+
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." + 
testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testBitPacking() throws Exception {
+    TypeDescription schema = TypeDescription.createLong();
+
+    long[] inp = new long[] { val, 0, val, val, 0, val, 0, val, val, 0, val, 
0, val, val, 0, 0,
+        val, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, 
val, 0, 0, val, 0, val,
+        0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, 
val, 0, val, 0, val, 0,
+        0, val, 0, val, 0, val, 0, 0, val, 0, val, 0, val, 0, 0, val, 0, val, 
0, val, 0, 0, val, 0,
+        val, 0, val, 0, 0, val, 0, val, 0, 0, val, val };
+    List<Long> input = Lists.newArrayList(Longs.asList(inp));
+
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .compress(CompressionKind.NONE).bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    for (Long l : input) {
+      int row = batch.size++;
+      ((LongColumnVector) batch.cols[0]).vector[row] = l;
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, 
OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for(int r=0; r < batch.size; ++r) {
+        assertEquals(input.get(idx++).longValue(),
+            ((LongColumnVector) batch.cols[0]).vector[r]);
+      }
+    }
+  }
+
+}

[11/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley reviewed by prasanthj)

Reply via email to