This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new 60b03ef  ORC-705: Predicate evaluation should take into account writer 
calendar (#588)
60b03ef is described below

commit 60b03ef5c8ed175be715b90763f32aa6ca21bc97
Author: Panagiotis Garefalakis <[email protected]>
AuthorDate: Wed Dec 30 08:18:12 2020 +0200

    ORC-705: Predicate evaluation should take into account writer calendar 
(#588)
    
    ### What changes were proposed in this pull request?
    RecordReaderImp should pass down the writer calendar info 
(writerUsedProlepticGregorian) when evaluating predicates to make sure column 
stats are properly deserialized (affects TimestampStatistics)
    
    
    ### Why are the changes needed?
    Correct evaluation of predicates with Timestamps
    
    
    ### How was this patch tested?
    TestRecordReaderImpl.testPredEvalTimestampStatsDiffWriter
---
 .../java/org/apache/orc/impl/RecordReaderImpl.java | 10 ++--
 .../org/apache/orc/impl/TestRecordReaderImpl.java  | 54 +++++++++++++++++++++-
 2 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java 
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 781e13c..c05a4e7 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -480,19 +480,20 @@ public class RecordReaderImpl implements RecordReader {
                                            OrcFile.WriterVersion writerVersion,
                                            TypeDescription type) {
     return evaluatePredicateProto(statsProto, predicate, kind, encoding, 
bloomFilter,
-        writerVersion, type, false);
+        writerVersion, type, true, false);
   }
 
   /**
    * Evaluate a predicate with respect to the statistics from the column
    * that is referenced in the predicate.
    * Includes option to specify if timestamp column stats values
-   * should be in UTC.
+   * should be in UTC and if the file writer used proleptic Gregorian calendar.
    * @param statsProto the statistics for the column mentioned in the predicate
    * @param predicate the leaf predicate we need to evaluation
    * @param bloomFilter the bloom filter
    * @param writerVersion the version of software that wrote the file
    * @param type what is the kind of this column
+   * @param writerUsedProlepticGregorian file written using the proleptic 
Gregorian calendar
    * @param useUTCTimestamp
    * @return the set of truth values that may be returned for the given
    *   predicate.
@@ -505,8 +506,9 @@ public class RecordReaderImpl implements RecordReader {
                                            OrcProto.BloomFilter bloomFilter,
                                            OrcFile.WriterVersion writerVersion,
                                            TypeDescription type,
+                                           boolean 
writerUsedProlepticGregorian,
                                            boolean useUTCTimestamp) {
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, statsProto);
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, statsProto, 
writerUsedProlepticGregorian, true);
     ValueRange range = getValueRange(cs, predicate, useUTCTimestamp);
 
     // files written before ORC-135 stores timestamp wrt to local timezone 
causing issues with PPD.
@@ -1031,7 +1033,7 @@ public class RecordReaderImpl implements RecordReader {
                     predicate, bfk, encodings.get(columnIx), bf,
                     writerVersion, evolution.getFileSchema().
                     findSubtype(columnIx),
-                    useUTCTimestamp);
+                    writerUsedProlepticGregorian, useUTCTimestamp);
               } catch (Exception e) {
                 exceptionCount[pred] += 1;
                 if (e instanceof SargCastException) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 5350efb..4375d3a 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -593,7 +593,22 @@ public class TestRecordReaderImpl {
     return RecordReaderImpl.evaluatePredicateProto(stats, predicate, null,
         encoding, null,
         include135 ? OrcFile.WriterVersion.ORC_135: 
OrcFile.WriterVersion.ORC_101,
-        TypeDescription.createTimestamp(), useUTCTimestamp);
+        TypeDescription.createTimestamp(), true, useUTCTimestamp);
+  }
+
+  static TruthValue 
evaluateTimestampWithWriterCalendar(OrcProto.ColumnStatistics stats,
+                                                        PredicateLeaf 
predicate,
+                                                        boolean include135,
+                                                        boolean 
writerUsedProlepticGregorian,
+                                                        boolean 
useUTCTimestamp) {
+    OrcProto.ColumnEncoding encoding =
+        OrcProto.ColumnEncoding.newBuilder()
+            .setKind(OrcProto.ColumnEncoding.Kind.DIRECT)
+            .build();
+    return RecordReaderImpl.evaluatePredicateProto(stats, predicate, null,
+        encoding, null,
+        include135 ? OrcFile.WriterVersion.ORC_135: 
OrcFile.WriterVersion.ORC_101,
+        TypeDescription.createTimestamp(), writerUsedProlepticGregorian, 
useUTCTimestamp);
   }
 
   static TruthValue evaluateTimestampBloomfilter(OrcProto.ColumnStatistics 
stats,
@@ -616,7 +631,7 @@ public class TestRecordReaderImpl {
     BloomFilterIO.serialize(builder, bloom);
     return RecordReaderImpl.evaluatePredicateProto(stats, predicate, kind,
         encoding.build(), builder.build(), version,
-        TypeDescription.createTimestamp(), useUTCTimestamp);
+        TypeDescription.createTimestamp(), true, useUTCTimestamp);
   }
 
   @Test
@@ -896,6 +911,41 @@ public class TestRecordReaderImpl {
   }
 
   @Test
+  public void testPredEvalTimestampStatsDiffWriter() {
+    // Proleptic - NoUTC
+    PredicateLeaf pred = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, 
"x",
+        Timestamp.valueOf("1017-01-01 00:00:00"), null);
+    assertEquals(TruthValue.YES_NO,
+        evaluateTimestampWithWriterCalendar(createTimestampStats("1017-01-01 
00:00:00", "1017-01-01 00:00:00"),
+            pred, true, true, false));
+
+    // NoProleptic - NoUTC -> 1016-12-26 00:00:00.0
+    long predTime = 
DateUtils.convertTimeToProleptic(Timestamp.valueOf("1017-01-01 
00:00:00").getTime(), false);
+    PredicateLeaf pred2 = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, 
"x", new Timestamp(predTime), null);
+    assertEquals(TruthValue.YES_NO,
+        evaluateTimestampWithWriterCalendar(createTimestampStats("1017-01-01 
00:00:00", "1017-01-01 00:00:00"),
+            pred2, true, false, false));
+
+    // NoProleptic - UTC -> 1016-12-25 16:00:00.0
+    predTime = DateUtils.convertTimeToProleptic(getUtcTimestamp("1017-01-01 
00:00:00"), true);
+    PredicateLeaf pred3 = createPredicateLeaf(
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, 
"x", new Timestamp(predTime), null);
+    assertEquals(TruthValue.YES_NO,
+        evaluateTimestampWithWriterCalendar(createTimestampStats("1017-01-01 
00:00:00", "1017-01-01 00:00:00"),
+            pred3, true, false, true));
+
+    // Proleptic - UTC -> 1016-12-31 16:00:00.0
+    predTime = getUtcTimestamp("1017-01-01 00:00:00");
+    PredicateLeaf pred4 = 
createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+        PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(predTime), null);
+    assertEquals(TruthValue.YES_NO,
+        evaluateTimestampWithWriterCalendar(createTimestampStats("1017-01-01 
00:00:00", "1017-01-01 00:00:00"),
+            pred4, true, true, true));
+  }
+
+  @Test
   public void testPredEvalWithTimestampStats() throws Exception {
     PredicateLeaf pred = createPredicateLeaf(
         PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP,

Reply via email to