This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.6 by this push:
     new b52b379  ORC-673: PPD LTE Point equality comparison is wrong when RG 
MIN==MAX (#552)
b52b379 is described below

commit b52b379d4eb5a490fde08a4436406dd78b5bc321
Author: Panagiotis Garefalakis <pga...@cloudera.com>
AuthorDate: Sat Oct 17 01:25:26 2020 +0100

    ORC-673: PPD LTE Point equality comparison is wrong when RG MIN==MAX (#552)
    
    ### What changes were proposed in this pull request?
    
    FIx PPD Less than Equals comparison for min==max corner case
    
    ### Why are the changes needed?
    
    Better PPD evaluation
    
    ### How was this patch tested?
    
    Extra JUnit test
---
 .../java/org/apache/orc/impl/RecordReaderImpl.java |  3 +-
 .../test/org/apache/orc/TestOrcTimestampPPD.java   |  2 +-
 .../orc/impl/TestPredicatePushDownBounds.java      | 49 ++++++++++++++++++++++
 .../org/apache/orc/impl/TestRecordReaderImpl.java  |  2 +-
 4 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java 
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 6d83a6a..72b90fc 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -636,7 +636,8 @@ public class RecordReaderImpl implements RecordReader {
         }
       case LESS_THAN_EQUALS:
         loc = range.compare(predObj);
-        if (loc == Location.AFTER || loc == Location.MAX) {
+        if (loc == Location.AFTER || loc == Location.MAX ||
+            (loc == Location.MIN && range.isSingleton())) {
           return range.addNull(TruthValue.YES);
         } else if (loc == Location.BEFORE) {
           return range.addNull(TruthValue.NO);
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java 
b/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java
index dfe7f7c..e39ff5a 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java
@@ -129,7 +129,7 @@ public class TestOrcTimestampPPD {
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, 
PredicateLeaf.Type.TIMESTAMP, "c",
         Timestamp.valueOf("1970-01-01 00:00:00.0005"), null);
-    Assert.assertEquals(SearchArgument.TruthValue.YES_NO, 
RecordReaderImpl.evaluatePredicate(colStats[0], pred, null));
+    Assert.assertEquals(SearchArgument.TruthValue.YES, 
RecordReaderImpl.evaluatePredicate(colStats[0], pred, null));
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN, 
PredicateLeaf.Type.TIMESTAMP, "c",
         Timestamp.valueOf("1970-01-01 00:00:00.0005"), null);
diff --git 
a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java 
b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
index 36d589a..56f4194 100644
--- a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
+++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
@@ -20,8 +20,11 @@ package org.apache.orc.impl;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
+import org.apache.orc.IntegerColumnStatistics;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.util.BloomFilter;
+import org.junit.Assert;
 import org.junit.Test;
 
 import java.nio.charset.StandardCharsets;
@@ -322,4 +325,50 @@ public class TestPredicatePushDownBounds {
 
   }
 
+  /**
+   * Test for LESS_THAN_EQUALS search arg when upper and lower bounds are the 
same.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testLessThanEquals() {
+    final TypeDescription schema = TypeDescription.createInt();
+    final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema);
+
+    stat.increment();
+    stat.updateInteger(1, 100);
+    stat.updateInteger(3, 100);
+
+    IntegerColumnStatistics typed = (IntegerColumnStatistics) stat;
+    Assert.assertEquals(1, typed.getMinimum());
+    Assert.assertEquals(3, typed.getMaximum());
+
+    SearchArgument sArg = SearchArgumentFactory.newBuilder()
+        .startAnd()
+        .lessThanEquals("c", PredicateLeaf.Type.LONG, 3L)
+        .end()
+        .build();
+
+    assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl
+        .evaluatePredicate(stat, sArg.getLeaves().get(0), null));
+
+    // Corner case where MIN == MAX == 3
+    final ColumnStatisticsImpl newStat = ColumnStatisticsImpl.create(schema);
+
+    newStat.increment();
+    newStat.updateInteger(3, 100);
+
+    typed = (IntegerColumnStatistics) newStat;
+    Assert.assertEquals(3, typed.getMinimum());
+    Assert.assertEquals(3, typed.getMaximum());
+
+    sArg = SearchArgumentFactory.newBuilder()
+        .startAnd()
+        .lessThanEquals("c", PredicateLeaf.Type.LONG, 3L)
+        .end()
+        .build();
+
+    assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl
+        .evaluatePredicate(newStat, sArg.getLeaves().get(0), null));
+  }
 }
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index b11036e..9e597a8 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -1150,7 +1150,7 @@ public class TestRecordReaderImpl {
         evaluateInteger(createStringStats("c", "d", true), pred)); // min
     assertEquals(TruthValue.YES_NO_NULL,
         evaluateInteger(createStringStats("b", "d", true), pred)); // middle
-    assertEquals(TruthValue.YES_NO_NULL,
+    assertEquals(TruthValue.YES_NULL,
         evaluateInteger(createStringStats("c", "c", true), pred)); // same
   }
 

Reply via email to