This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git

commit b3016d779e79d71ab70a283fb292390a5ae37100
Author: Yiqun Zhang <[email protected]>
AuthorDate: Thu Aug 24 20:11:54 2023 -0700

    ORC-1482: Adaptation to read ORC files created by CUDF
    
    This pr is aimed at adapting to read ORC files created by CUDF, which may 
have missing statistics in their DOUBLE/FLOAT columns.
    
    Official ORC readers can't read CUDF-created ORC files properly.
    
    Added UT.
    
    Closes #1598 from guiyanakuang/ORC-1482-to-1.8.
    
    Authored-by: Yiqun Zhang <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../src/java/org/apache/orc/impl/RecordReaderImpl.java    |  12 ++++++++++--
 .../test/org/apache/orc/impl/TestRecordReaderImpl.java    |   1 +
 .../src/test/resources/orc-file-no-double-statistic.orc   | Bin 0 -> 161 bytes
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java 
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 333cf5b9e..48ba95260 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -650,8 +650,8 @@ public class RecordReaderImpl implements RecordReader {
                    " include ORC-517. Writer version: {}",
           predicate.getColumnName(), writerVersion);
       return TruthValue.YES_NO_NULL;
-    } else if (category == TypeDescription.Category.DOUBLE
-        || category == TypeDescription.Category.FLOAT) {
+    } else if ((category == TypeDescription.Category.DOUBLE ||
+        category == TypeDescription.Category.FLOAT) && cs instanceof 
DoubleColumnStatistics) {
       DoubleColumnStatistics dstas = (DoubleColumnStatistics) cs;
       if (Double.isNaN(dstas.getSum())) {
         LOG.debug("Not using predication pushdown on {} because stats contain 
NaN values",
@@ -1654,4 +1654,12 @@ public class RecordReaderImpl implements RecordReader {
   public int getMaxDiskRangeChunkLimit() {
     return maxDiskRangeChunkLimit;
   }
+
+  /**
+   * Get sargApplier for testing.
+   * @return sargApplier in record reader.
+   */
+  SargApplier getSargApp() {
+    return sargApp;
+  }
 }
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 85e502afb..555954b08 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -41,6 +41,7 @@ import org.apache.orc.ColumnStatistics;
 import org.apache.orc.CompressionCodec;
 import org.apache.orc.CompressionKind;
 import org.apache.orc.DataReader;
+import org.apache.orc.DoubleColumnStatistics;
 import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
diff --git a/java/core/src/test/resources/orc-file-no-double-statistic.orc 
b/java/core/src/test/resources/orc-file-no-double-statistic.orc
new file mode 100644
index 000000000..9da6e42e3
Binary files /dev/null and 
b/java/core/src/test/resources/orc-file-no-double-statistic.orc differ

Reply via email to