This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-1.6 in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.6 by this push: new b52b379 ORC-673: PPD LTE Point equality comparison is wrong when RG MIN==MAX (#552) b52b379 is described below commit b52b379d4eb5a490fde08a4436406dd78b5bc321 Author: Panagiotis Garefalakis <pga...@cloudera.com> AuthorDate: Sat Oct 17 01:25:26 2020 +0100 ORC-673: PPD LTE Point equality comparison is wrong when RG MIN==MAX (#552) ### What changes were proposed in this pull request? FIx PPD Less than Equals comparison for min==max corner case ### Why are the changes needed? Better PPD evaluation ### How was this patch tested? Extra JUnit test --- .../java/org/apache/orc/impl/RecordReaderImpl.java | 3 +- .../test/org/apache/orc/TestOrcTimestampPPD.java | 2 +- .../orc/impl/TestPredicatePushDownBounds.java | 49 ++++++++++++++++++++++ .../org/apache/orc/impl/TestRecordReaderImpl.java | 2 +- 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 6d83a6a..72b90fc 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -636,7 +636,8 @@ public class RecordReaderImpl implements RecordReader { } case LESS_THAN_EQUALS: loc = range.compare(predObj); - if (loc == Location.AFTER || loc == Location.MAX) { + if (loc == Location.AFTER || loc == Location.MAX || + (loc == Location.MIN && range.isSingleton())) { return range.addNull(TruthValue.YES); } else if (loc == Location.BEFORE) { return range.addNull(TruthValue.NO); diff --git a/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java b/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java index dfe7f7c..e39ff5a 100644 --- a/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java +++ b/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java @@ -129,7 +129,7 @@ public class TestOrcTimestampPPD { pred = createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.TIMESTAMP, "c", Timestamp.valueOf("1970-01-01 00:00:00.0005"), null); - Assert.assertEquals(SearchArgument.TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(colStats[0], pred, null)); + Assert.assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl.evaluatePredicate(colStats[0], pred, null)); pred = createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.TIMESTAMP, "c", Timestamp.valueOf("1970-01-01 00:00:00.0005"), null); diff --git a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java index 36d589a..56f4194 100644 --- a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java +++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java @@ -20,8 +20,11 @@ package org.apache.orc.impl; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.orc.IntegerColumnStatistics; import org.apache.orc.TypeDescription; import org.apache.orc.util.BloomFilter; +import org.junit.Assert; import org.junit.Test; import java.nio.charset.StandardCharsets; @@ -322,4 +325,50 @@ public class TestPredicatePushDownBounds { } + /** + * Test for LESS_THAN_EQUALS search arg when upper and lower bounds are the same. + * + * @throws Exception + */ + @Test + public void testLessThanEquals() { + final TypeDescription schema = TypeDescription.createInt(); + final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema); + + stat.increment(); + stat.updateInteger(1, 100); + stat.updateInteger(3, 100); + + IntegerColumnStatistics typed = (IntegerColumnStatistics) stat; + Assert.assertEquals(1, typed.getMinimum()); + Assert.assertEquals(3, typed.getMaximum()); + + SearchArgument sArg = SearchArgumentFactory.newBuilder() + .startAnd() + .lessThanEquals("c", PredicateLeaf.Type.LONG, 3L) + .end() + .build(); + + assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl + .evaluatePredicate(stat, sArg.getLeaves().get(0), null)); + + // Corner case where MIN == MAX == 3 + final ColumnStatisticsImpl newStat = ColumnStatisticsImpl.create(schema); + + newStat.increment(); + newStat.updateInteger(3, 100); + + typed = (IntegerColumnStatistics) newStat; + Assert.assertEquals(3, typed.getMinimum()); + Assert.assertEquals(3, typed.getMaximum()); + + sArg = SearchArgumentFactory.newBuilder() + .startAnd() + .lessThanEquals("c", PredicateLeaf.Type.LONG, 3L) + .end() + .build(); + + assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl + .evaluatePredicate(newStat, sArg.getLeaves().get(0), null)); + } } diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index b11036e..9e597a8 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -1150,7 +1150,7 @@ public class TestRecordReaderImpl { evaluateInteger(createStringStats("c", "d", true), pred)); // min assertEquals(TruthValue.YES_NO_NULL, evaluateInteger(createStringStats("b", "d", true), pred)); // middle - assertEquals(TruthValue.YES_NO_NULL, + assertEquals(TruthValue.YES_NULL, evaluateInteger(createStringStats("c", "c", true), pred)); // same }