guiyanakuang commented on a change in pull request #992:
URL: https://github.com/apache/orc/pull/992#discussion_r779527800



##########
File path: java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
##########
@@ -2449,4 +2449,70 @@ public void testSargApplier() throws Exception {
     f2.setAccessible(true);
     assertFalse((boolean)f2.get(applier1));
   }
+
+  @Test
+  public void testCompatibleSpecificationUnofficialFile() throws IOException {
+    // unofficial.orc is a file compatible with the ORC specification,
+    // but its column statistics only implements the ColumnStatistics interface
+    // without providing other information such as min and max
+    Path path = new 
Path(ClassLoader.getSystemResource("unofficial.orc").getPath());
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.getLocal(conf);
+    TypeDescription readSchema =
+        TypeDescription.fromString("struct<INT:int>");
+    Reader reader = OrcFile.createReader(path, 
OrcFile.readerOptions(conf).filesystem(fs));
+    RecordReader rowIterator = reader.rows(
+        reader.options()
+            .schema(readSchema)
+            .searchArgument(SearchArgumentFactory.newBuilder()
+                .equals("INT", PredicateLeaf.Type.LONG, 2L)
+                .build(), new String[]{"INT"}) //predict push down
+    );
+
+    VectorizedRowBatch batch = readSchema.createRowBatch();
+    LongColumnVector x = (LongColumnVector) batch.cols[0];
+
+    assertTrue(rowIterator.nextBatch(batch));
+    assertEquals(10, batch.size);
+    assertFalse(x.noNulls);
+    for (int row = 0; row < batch.size; ++row) {
+      int xRow = x.isRepeating ? 0 : row;
+      if (xRow % 2 == 0) {
+        assertFalse(x.isNull[xRow]);
+        assertEquals(xRow, x.vector[xRow]);
+      } else {
+        assertTrue(x.isNull[xRow]);
+      }
+    }
+    rowIterator.close();
+  }
+
+  @Test
+  public void testCDUFUnofficialFile() throws IOException {

Review comment:
       Looks better, I'll change later




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to