This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 09693fee2 ORC-1959: Add test String statistics with Presto writer
09693fee2 is described below

commit 09693fee2d65a49a6a65b5774cf4eb2001bedf3c
Author: sychen <syc...@ctrip.com>
AuthorDate: Wed Jul 16 21:51:08 2025 -0700

    ORC-1959: Add test String statistics with Presto writer
    
    ### What changes were proposed in this pull request?
    This PR aims to add a string statistics test for ORC-1075 fix.
    
    ### Why are the changes needed?
    https://github.com/apache/orc/issues/1061
    
    ### How was this patch tested?
    Add UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #2332 from cxzl25/ORC-1959.
    
    Authored-by: sychen <syc...@ctrip.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../org/apache/orc/impl/TestRecordReaderImpl.java  |  33 +++++++++++++++++++++
 .../src/test/resources/orc-file-presto-string.orc  | Bin 0 -> 521 bytes
 2 files changed, 33 insertions(+)

diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index f785e6e58..3c70b7284 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -2518,6 +2518,39 @@ public class TestRecordReaderImpl implements TestConf {
     }
   }
 
+  @Test
+  public void testStringStatisticsWithPrestoWriter() throws Exception {
+    // struct<id:int,name:string,score:int>
+    Path testFilePath = new Path(ClassLoader.
+        getSystemResource("orc-file-presto-string.orc").getPath());
+    FileSystem fs = FileSystem.get(conf);
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    try (RecordReader rr = reader.rows()) {
+      RecordReaderImpl rri = (RecordReaderImpl) rr;
+      // x.z id is 2, We just need to read this column
+      OrcIndex orcIndex = rri.readRowIndex(0,
+          new boolean[] { false, false, true, false },
+          new boolean[] { false, false, true, false });
+      OrcProto.RowIndex[] rowGroupIndex = orcIndex.getRowGroupIndex();
+      OrcProto.ColumnStatistics statistics = 
rowGroupIndex[2].getEntry(0).getStatistics();
+      OrcProto.ColumnEncoding encoding = OrcProto.ColumnEncoding.newBuilder()
+          .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2)
+          .build();
+      PredicateLeaf pred = createPredicateLeaf(
+          PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING, "name", 
null, null);
+
+      TruthValue truthValue = RecordReaderImpl.evaluatePredicateProto(
+          statistics,
+          pred, null, encoding, null,
+          CURRENT_WRITER, TypeDescription.createString());
+
+      assertEquals(TruthValue.YES_NO_NULL, truthValue);
+    }
+  }
+
   @Test
   public void testDoubleColumnWithoutDoubleStatistics() throws Exception {
     // orc-file-no-double-statistic.orc is an orc file created by cudf with a 
schema of
diff --git a/java/core/src/test/resources/orc-file-presto-string.orc 
b/java/core/src/test/resources/orc-file-presto-string.orc
new file mode 100644
index 000000000..8884485d1
Binary files /dev/null and 
b/java/core/src/test/resources/orc-file-presto-string.orc differ

Reply via email to