This is an automated email from the ASF dual-hosted git repository.

kishoreg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new af5a901  Optimize RealtimeDictionaryBasedRangePredicateEvaluator by 
not scanning the dictionary when cardinality is high (#5331)
af5a901 is described below

commit af5a90171039873c708e8517dc0d3242af41e475
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Fri May 8 23:44:43 2020 -0700

    Optimize RealtimeDictionaryBasedRangePredicateEvaluator by not scanning the 
dictionary when cardinality is high (#5331)
    
    For real-time range predicate, because the dictionary is not sorted, in 
order to get the matching dictionary ids, we have to scan the whole dictionary.
    This will cause performance issue when the cardinality is high for the 
column.
    Optimize it by adding a cardinality threshold (1000 for now) to decide 
whether to pre-calculate all the matching dictionary ids.
---
 .../BaseDictionaryBasedPredicateEvaluator.java     |   3 -
 .../BaseRawValueBasedPredicateEvaluator.java       |   2 -
 .../predicate/PredicateEvaluatorProvider.java      |   8 +-
 .../predicate/RangePredicateEvaluatorFactory.java  | 109 +++++++++++++++------
 ...ngeOfflineDictionaryPredicateEvaluatorTest.java |  31 ++++--
 5 files changed, 107 insertions(+), 46 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
index 74b114f..0fbd9d9 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
@@ -18,9 +18,6 @@
  */
 package org.apache.pinot.core.operator.filter.predicate;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-
-
 public abstract class BaseDictionaryBasedPredicateEvaluator extends 
BasePredicateEvaluator {
   protected boolean _alwaysTrue;
   protected boolean _alwaysFalse;
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
index e1b33db..a9e269b 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
@@ -18,8 +18,6 @@
  */
 package org.apache.pinot.core.operator.filter.predicate;
 
-import org.apache.commons.lang3.mutable.MutableInt;
-
 public abstract class BaseRawValueBasedPredicateEvaluator extends 
BasePredicateEvaluator {
 
   @Override
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
index 6208b25..c92396e 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.core.operator.filter.predicate;
 
-import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.core.common.Predicate;
 import org.apache.pinot.core.common.predicate.EqPredicate;
 import org.apache.pinot.core.common.predicate.InPredicate;
@@ -29,13 +28,15 @@ import 
org.apache.pinot.core.common.predicate.RegexpLikePredicate;
 import org.apache.pinot.core.common.predicate.TextMatchPredicate;
 import org.apache.pinot.core.query.exception.BadQueryRequestException;
 import org.apache.pinot.core.segment.index.readers.Dictionary;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
 
 
 public class PredicateEvaluatorProvider {
   private PredicateEvaluatorProvider() {
   }
 
-  public static PredicateEvaluator getPredicateEvaluator(Predicate predicate, 
Dictionary dictionary, DataType dataType) {
+  public static PredicateEvaluator getPredicateEvaluator(Predicate predicate, 
Dictionary dictionary,
+      DataType dataType) {
     try {
       if (dictionary != null) {
         // dictionary based predicate evaluators
@@ -49,7 +50,8 @@ public class PredicateEvaluatorProvider {
           case NOT_IN:
             return 
NotInPredicateEvaluatorFactory.newDictionaryBasedEvaluator((NotInPredicate) 
predicate, dictionary);
           case RANGE:
-            return 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator((RangePredicate) 
predicate, dictionary);
+            return RangePredicateEvaluatorFactory
+                .newDictionaryBasedEvaluator((RangePredicate) predicate, 
dictionary, dataType);
           case REGEXP_LIKE:
             return RegexpLikePredicateEvaluatorFactory
                 .newDictionaryBasedEvaluator((RegexpLikePredicate) predicate, 
dictionary);
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
index 08c13d1..af6354c 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
@@ -19,14 +19,14 @@
 package org.apache.pinot.core.operator.filter.predicate;
 
 import it.unimi.dsi.fastutil.ints.IntSet;
-import org.apache.pinot.spi.data.FieldSpec;
-import org.apache.pinot.spi.utils.BytesUtils;
-import org.apache.pinot.spi.utils.ByteArray;
 import org.apache.pinot.core.common.Predicate;
 import org.apache.pinot.core.common.predicate.RangePredicate;
 import org.apache.pinot.core.realtime.impl.dictionary.BaseMutableDictionary;
 import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
 import org.apache.pinot.core.segment.index.readers.Dictionary;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.ByteArray;
+import org.apache.pinot.spi.utils.BytesUtils;
 
 
 /**
@@ -41,14 +41,16 @@ public class RangePredicateEvaluatorFactory {
    *
    * @param rangePredicate RANGE predicate to evaluate
    * @param dictionary Dictionary for the column
+   * @param dataType Data type for the column
    * @return Dictionary based RANGE predicate evaluator
    */
   public static BaseDictionaryBasedPredicateEvaluator 
newDictionaryBasedEvaluator(RangePredicate rangePredicate,
-      Dictionary dictionary) {
+      Dictionary dictionary, DataType dataType) {
     if (dictionary instanceof BaseImmutableDictionary) {
       return new OfflineDictionaryBasedRangePredicateEvaluator(rangePredicate, 
(BaseImmutableDictionary) dictionary);
     } else {
-      return new 
RealtimeDictionaryBasedRangePredicateEvaluator(rangePredicate, 
(BaseMutableDictionary) dictionary);
+      return new 
RealtimeDictionaryBasedRangePredicateEvaluator(rangePredicate, 
(BaseMutableDictionary) dictionary,
+          dataType);
     }
   }
 
@@ -60,7 +62,7 @@ public class RangePredicateEvaluatorFactory {
    * @return Raw value based RANGE predicate evaluator
    */
   public static BaseRawValueBasedPredicateEvaluator 
newRawValueBasedEvaluator(RangePredicate rangePredicate,
-      FieldSpec.DataType dataType) {
+      DataType dataType) {
     switch (dataType) {
       case INT:
         return new IntRawValueBasedRangePredicateEvaluator(rangePredicate);
@@ -169,19 +171,59 @@ public class RangePredicateEvaluatorFactory {
   }
 
   private static final class RealtimeDictionaryBasedRangePredicateEvaluator 
extends BaseDictionaryBasedPredicateEvaluator {
+    // When the cardinality of the column is lower than this threshold, 
pre-calculate the matching dictionary ids;
+    // otherwise, fetch the value when evaluating each dictionary id.
+    // TODO: Tune this threshold
+    private static final int DICT_ID_SET_BASED_CARDINALITY_THRESHOLD = 1000;
+
+    final BaseMutableDictionary _dictionary;
+    final DataType _dataType;
+    final boolean _dictIdSetBased;
     final IntSet _matchingDictIdSet;
-    final int _numMatchingDictIds;
-    int[] _matchingDictIds;
-
-    RealtimeDictionaryBasedRangePredicateEvaluator(RangePredicate 
rangePredicate, BaseMutableDictionary dictionary) {
-      _matchingDictIdSet = dictionary
-          .getDictIdsInRange(rangePredicate.getLowerBoundary(), 
rangePredicate.getUpperBoundary(),
-              rangePredicate.includeLowerBoundary(), 
rangePredicate.includeUpperBoundary());
-      _numMatchingDictIds = _matchingDictIdSet.size();
-      if (_numMatchingDictIds == 0) {
-        _alwaysFalse = true;
-      } else if (_numMatchingDictIds == dictionary.length()) {
-        _alwaysTrue = true;
+    final BaseRawValueBasedPredicateEvaluator _rawValueBasedEvaluator;
+
+    RealtimeDictionaryBasedRangePredicateEvaluator(RangePredicate 
rangePredicate, BaseMutableDictionary dictionary,
+        DataType dataType) {
+      _dictionary = dictionary;
+      _dataType = dataType;
+      int cardinality = dictionary.length();
+      if (cardinality < DICT_ID_SET_BASED_CARDINALITY_THRESHOLD) {
+        _dictIdSetBased = true;
+        _rawValueBasedEvaluator = null;
+        _matchingDictIdSet = dictionary
+            .getDictIdsInRange(rangePredicate.getLowerBoundary(), 
rangePredicate.getUpperBoundary(),
+                rangePredicate.includeLowerBoundary(), 
rangePredicate.includeUpperBoundary());
+        int numMatchingDictIds = _matchingDictIdSet.size();
+        if (numMatchingDictIds == 0) {
+          _alwaysFalse = true;
+        } else if (numMatchingDictIds == cardinality) {
+          _alwaysTrue = true;
+        }
+      } else {
+        _dictIdSetBased = false;
+        _matchingDictIdSet = null;
+        switch (dataType) {
+          case INT:
+            _rawValueBasedEvaluator = new 
IntRawValueBasedRangePredicateEvaluator(rangePredicate);
+            break;
+          case LONG:
+            _rawValueBasedEvaluator = new 
LongRawValueBasedRangePredicateEvaluator(rangePredicate);
+            break;
+          case FLOAT:
+            _rawValueBasedEvaluator = new 
FloatRawValueBasedRangePredicateEvaluator(rangePredicate);
+            break;
+          case DOUBLE:
+            _rawValueBasedEvaluator = new 
DoubleRawValueBasedRangePredicateEvaluator(rangePredicate);
+            break;
+          case STRING:
+            _rawValueBasedEvaluator = new 
StringRawValueBasedRangePredicateEvaluator(rangePredicate);
+            break;
+          case BYTES:
+            _rawValueBasedEvaluator = new 
BytesRawValueBasedRangePredicateEvaluator(rangePredicate);
+            break;
+          default:
+            throw new IllegalStateException();
+        }
       }
     }
 
@@ -192,20 +234,31 @@ public class RangePredicateEvaluatorFactory {
 
     @Override
     public boolean applySV(int dictId) {
-      return _matchingDictIdSet.contains(dictId);
-    }
-
-    @Override
-    public int getNumMatchingDictIds() {
-      return _numMatchingDictIds;
+      if (_dictIdSetBased) {
+        return _matchingDictIdSet.contains(dictId);
+      } else {
+        switch (_dataType) {
+          case INT:
+            return 
_rawValueBasedEvaluator.applySV(_dictionary.getIntValue(dictId));
+          case LONG:
+            return 
_rawValueBasedEvaluator.applySV(_dictionary.getLongValue(dictId));
+          case FLOAT:
+            return 
_rawValueBasedEvaluator.applySV(_dictionary.getFloatValue(dictId));
+          case DOUBLE:
+            return 
_rawValueBasedEvaluator.applySV(_dictionary.getDoubleValue(dictId));
+          case STRING:
+            return 
_rawValueBasedEvaluator.applySV(_dictionary.getStringValue(dictId));
+          case BYTES:
+            return 
_rawValueBasedEvaluator.applySV(_dictionary.getBytesValue(dictId));
+          default:
+            throw new IllegalStateException();
+        }
+      }
     }
 
     @Override
     public int[] getMatchingDictIds() {
-      if (_matchingDictIds == null) {
-        _matchingDictIds = _matchingDictIdSet.toIntArray();
-      }
-      return _matchingDictIds;
+      throw new UnsupportedOperationException();
     }
   }
 
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
index 65fba47..18cf762 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
@@ -22,6 +22,7 @@ import org.apache.pinot.core.common.predicate.RangePredicate;
 import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
 import 
org.apache.pinot.core.operator.filter.predicate.RangePredicateEvaluatorFactory;
 import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
@@ -41,7 +42,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = 5;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd, 
true);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -62,7 +64,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = 5;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd, 
true);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertFalse(evaluator.applySV(rangeStart));
@@ -83,7 +86,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = 5;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd, 
false);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -104,7 +108,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = 5;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd, 
false);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertFalse(evaluator.applySV(rangeStart));
@@ -137,7 +142,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = 5;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd, 
false);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -157,7 +163,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = 5;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd, 
true);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -171,7 +178,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = DICT_LEN - 1;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd, 
true);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -191,7 +199,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = DICT_LEN - 1;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd, 
true);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertFalse(evaluator.applySV(rangeStart));
@@ -205,7 +214,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = DICT_LEN - 1;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd, 
true);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertFalse(evaluator.isAlwaysFalse());
       Assert.assertTrue(evaluator.isAlwaysTrue());
       Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -224,7 +234,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
       rangeEnd = 5;
       BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
       RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd, 
false);
-      PredicateEvaluator evaluator = 
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+      PredicateEvaluator evaluator =
+          
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader, 
DataType.INT);
       Assert.assertTrue(evaluator.isAlwaysFalse());
       Assert.assertFalse(evaluator.isAlwaysTrue());
       Assert.assertFalse(evaluator.applySV(rangeStart));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to