This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-1.7 in repository https://gitbox.apache.org/repos/asf/orc.git
commit b3016d779e79d71ab70a283fb292390a5ae37100 Author: Yiqun Zhang <[email protected]> AuthorDate: Thu Aug 24 20:11:54 2023 -0700 ORC-1482: Adaptation to read ORC files created by CUDF This pr is aimed at adapting to read ORC files created by CUDF, which may have missing statistics in their DOUBLE/FLOAT columns. Official ORC readers can't read CUDF-created ORC files properly. Added UT. Closes #1598 from guiyanakuang/ORC-1482-to-1.8. Authored-by: Yiqun Zhang <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> --- .../src/java/org/apache/orc/impl/RecordReaderImpl.java | 12 ++++++++++-- .../test/org/apache/orc/impl/TestRecordReaderImpl.java | 1 + .../src/test/resources/orc-file-no-double-statistic.orc | Bin 0 -> 161 bytes 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 333cf5b9e..48ba95260 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -650,8 +650,8 @@ public class RecordReaderImpl implements RecordReader { " include ORC-517. Writer version: {}", predicate.getColumnName(), writerVersion); return TruthValue.YES_NO_NULL; - } else if (category == TypeDescription.Category.DOUBLE - || category == TypeDescription.Category.FLOAT) { + } else if ((category == TypeDescription.Category.DOUBLE || + category == TypeDescription.Category.FLOAT) && cs instanceof DoubleColumnStatistics) { DoubleColumnStatistics dstas = (DoubleColumnStatistics) cs; if (Double.isNaN(dstas.getSum())) { LOG.debug("Not using predication pushdown on {} because stats contain NaN values", @@ -1654,4 +1654,12 @@ public class RecordReaderImpl implements RecordReader { public int getMaxDiskRangeChunkLimit() { return maxDiskRangeChunkLimit; } + + /** + * Get sargApplier for testing. + * @return sargApplier in record reader. + */ + SargApplier getSargApp() { + return sargApp; + } } diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index 85e502afb..555954b08 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -41,6 +41,7 @@ import org.apache.orc.ColumnStatistics; import org.apache.orc.CompressionCodec; import org.apache.orc.CompressionKind; import org.apache.orc.DataReader; +import org.apache.orc.DoubleColumnStatistics; import org.apache.orc.OrcConf; import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; diff --git a/java/core/src/test/resources/orc-file-no-double-statistic.orc b/java/core/src/test/resources/orc-file-no-double-statistic.orc new file mode 100644 index 000000000..9da6e42e3 Binary files /dev/null and b/java/core/src/test/resources/orc-file-no-double-statistic.orc differ
