This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push: new 09693fee2 ORC-1959: Add test String statistics with Presto writer 09693fee2 is described below commit 09693fee2d65a49a6a65b5774cf4eb2001bedf3c Author: sychen <syc...@ctrip.com> AuthorDate: Wed Jul 16 21:51:08 2025 -0700 ORC-1959: Add test String statistics with Presto writer ### What changes were proposed in this pull request? This PR aims to add a string statistics test for ORC-1075 fix. ### Why are the changes needed? https://github.com/apache/orc/issues/1061 ### How was this patch tested? Add UT ### Was this patch authored or co-authored using generative AI tooling? No Closes #2332 from cxzl25/ORC-1959. Authored-by: sychen <syc...@ctrip.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../org/apache/orc/impl/TestRecordReaderImpl.java | 33 +++++++++++++++++++++ .../src/test/resources/orc-file-presto-string.orc | Bin 0 -> 521 bytes 2 files changed, 33 insertions(+) diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index f785e6e58..3c70b7284 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -2518,6 +2518,39 @@ public class TestRecordReaderImpl implements TestConf { } } + @Test + public void testStringStatisticsWithPrestoWriter() throws Exception { + // struct<id:int,name:string,score:int> + Path testFilePath = new Path(ClassLoader. + getSystemResource("orc-file-presto-string.orc").getPath()); + FileSystem fs = FileSystem.get(conf); + + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + + try (RecordReader rr = reader.rows()) { + RecordReaderImpl rri = (RecordReaderImpl) rr; + // x.z id is 2, We just need to read this column + OrcIndex orcIndex = rri.readRowIndex(0, + new boolean[] { false, false, true, false }, + new boolean[] { false, false, true, false }); + OrcProto.RowIndex[] rowGroupIndex = orcIndex.getRowGroupIndex(); + OrcProto.ColumnStatistics statistics = rowGroupIndex[2].getEntry(0).getStatistics(); + OrcProto.ColumnEncoding encoding = OrcProto.ColumnEncoding.newBuilder() + .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2) + .build(); + PredicateLeaf pred = createPredicateLeaf( + PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING, "name", null, null); + + TruthValue truthValue = RecordReaderImpl.evaluatePredicateProto( + statistics, + pred, null, encoding, null, + CURRENT_WRITER, TypeDescription.createString()); + + assertEquals(TruthValue.YES_NO_NULL, truthValue); + } + } + @Test public void testDoubleColumnWithoutDoubleStatistics() throws Exception { // orc-file-no-double-statistic.orc is an orc file created by cudf with a schema of diff --git a/java/core/src/test/resources/orc-file-presto-string.orc b/java/core/src/test/resources/orc-file-presto-string.orc new file mode 100644 index 000000000..8884485d1 Binary files /dev/null and b/java/core/src/test/resources/orc-file-presto-string.orc differ