This is an automated email from the ASF dual-hosted git repository.
kishoreg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new af5a901 Optimize RealtimeDictionaryBasedRangePredicateEvaluator by
not scanning the dictionary when cardinality is high (#5331)
af5a901 is described below
commit af5a90171039873c708e8517dc0d3242af41e475
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Fri May 8 23:44:43 2020 -0700
Optimize RealtimeDictionaryBasedRangePredicateEvaluator by not scanning the
dictionary when cardinality is high (#5331)
For real-time range predicate, because the dictionary is not sorted, in
order to get the matching dictionary ids, we have to scan the whole dictionary.
This will cause performance issue when the cardinality is high for the
column.
Optimize it by adding a cardinality threshold (1000 for now) to decide
whether to pre-calculate all the matching dictionary ids.
---
.../BaseDictionaryBasedPredicateEvaluator.java | 3 -
.../BaseRawValueBasedPredicateEvaluator.java | 2 -
.../predicate/PredicateEvaluatorProvider.java | 8 +-
.../predicate/RangePredicateEvaluatorFactory.java | 109 +++++++++++++++------
...ngeOfflineDictionaryPredicateEvaluatorTest.java | 31 ++++--
5 files changed, 107 insertions(+), 46 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
index 74b114f..0fbd9d9 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java
@@ -18,9 +18,6 @@
*/
package org.apache.pinot.core.operator.filter.predicate;
-import org.apache.commons.lang3.mutable.MutableInt;
-
-
public abstract class BaseDictionaryBasedPredicateEvaluator extends
BasePredicateEvaluator {
protected boolean _alwaysTrue;
protected boolean _alwaysFalse;
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
index e1b33db..a9e269b 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java
@@ -18,8 +18,6 @@
*/
package org.apache.pinot.core.operator.filter.predicate;
-import org.apache.commons.lang3.mutable.MutableInt;
-
public abstract class BaseRawValueBasedPredicateEvaluator extends
BasePredicateEvaluator {
@Override
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
index 6208b25..c92396e 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluatorProvider.java
@@ -18,7 +18,6 @@
*/
package org.apache.pinot.core.operator.filter.predicate;
-import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.core.common.Predicate;
import org.apache.pinot.core.common.predicate.EqPredicate;
import org.apache.pinot.core.common.predicate.InPredicate;
@@ -29,13 +28,15 @@ import
org.apache.pinot.core.common.predicate.RegexpLikePredicate;
import org.apache.pinot.core.common.predicate.TextMatchPredicate;
import org.apache.pinot.core.query.exception.BadQueryRequestException;
import org.apache.pinot.core.segment.index.readers.Dictionary;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
public class PredicateEvaluatorProvider {
private PredicateEvaluatorProvider() {
}
- public static PredicateEvaluator getPredicateEvaluator(Predicate predicate,
Dictionary dictionary, DataType dataType) {
+ public static PredicateEvaluator getPredicateEvaluator(Predicate predicate,
Dictionary dictionary,
+ DataType dataType) {
try {
if (dictionary != null) {
// dictionary based predicate evaluators
@@ -49,7 +50,8 @@ public class PredicateEvaluatorProvider {
case NOT_IN:
return
NotInPredicateEvaluatorFactory.newDictionaryBasedEvaluator((NotInPredicate)
predicate, dictionary);
case RANGE:
- return
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator((RangePredicate)
predicate, dictionary);
+ return RangePredicateEvaluatorFactory
+ .newDictionaryBasedEvaluator((RangePredicate) predicate,
dictionary, dataType);
case REGEXP_LIKE:
return RegexpLikePredicateEvaluatorFactory
.newDictionaryBasedEvaluator((RegexpLikePredicate) predicate,
dictionary);
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
index 08c13d1..af6354c 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java
@@ -19,14 +19,14 @@
package org.apache.pinot.core.operator.filter.predicate;
import it.unimi.dsi.fastutil.ints.IntSet;
-import org.apache.pinot.spi.data.FieldSpec;
-import org.apache.pinot.spi.utils.BytesUtils;
-import org.apache.pinot.spi.utils.ByteArray;
import org.apache.pinot.core.common.Predicate;
import org.apache.pinot.core.common.predicate.RangePredicate;
import org.apache.pinot.core.realtime.impl.dictionary.BaseMutableDictionary;
import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
import org.apache.pinot.core.segment.index.readers.Dictionary;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.ByteArray;
+import org.apache.pinot.spi.utils.BytesUtils;
/**
@@ -41,14 +41,16 @@ public class RangePredicateEvaluatorFactory {
*
* @param rangePredicate RANGE predicate to evaluate
* @param dictionary Dictionary for the column
+ * @param dataType Data type for the column
* @return Dictionary based RANGE predicate evaluator
*/
public static BaseDictionaryBasedPredicateEvaluator
newDictionaryBasedEvaluator(RangePredicate rangePredicate,
- Dictionary dictionary) {
+ Dictionary dictionary, DataType dataType) {
if (dictionary instanceof BaseImmutableDictionary) {
return new OfflineDictionaryBasedRangePredicateEvaluator(rangePredicate,
(BaseImmutableDictionary) dictionary);
} else {
- return new
RealtimeDictionaryBasedRangePredicateEvaluator(rangePredicate,
(BaseMutableDictionary) dictionary);
+ return new
RealtimeDictionaryBasedRangePredicateEvaluator(rangePredicate,
(BaseMutableDictionary) dictionary,
+ dataType);
}
}
@@ -60,7 +62,7 @@ public class RangePredicateEvaluatorFactory {
* @return Raw value based RANGE predicate evaluator
*/
public static BaseRawValueBasedPredicateEvaluator
newRawValueBasedEvaluator(RangePredicate rangePredicate,
- FieldSpec.DataType dataType) {
+ DataType dataType) {
switch (dataType) {
case INT:
return new IntRawValueBasedRangePredicateEvaluator(rangePredicate);
@@ -169,19 +171,59 @@ public class RangePredicateEvaluatorFactory {
}
private static final class RealtimeDictionaryBasedRangePredicateEvaluator
extends BaseDictionaryBasedPredicateEvaluator {
+ // When the cardinality of the column is lower than this threshold,
pre-calculate the matching dictionary ids;
+ // otherwise, fetch the value when evaluating each dictionary id.
+ // TODO: Tune this threshold
+ private static final int DICT_ID_SET_BASED_CARDINALITY_THRESHOLD = 1000;
+
+ final BaseMutableDictionary _dictionary;
+ final DataType _dataType;
+ final boolean _dictIdSetBased;
final IntSet _matchingDictIdSet;
- final int _numMatchingDictIds;
- int[] _matchingDictIds;
-
- RealtimeDictionaryBasedRangePredicateEvaluator(RangePredicate
rangePredicate, BaseMutableDictionary dictionary) {
- _matchingDictIdSet = dictionary
- .getDictIdsInRange(rangePredicate.getLowerBoundary(),
rangePredicate.getUpperBoundary(),
- rangePredicate.includeLowerBoundary(),
rangePredicate.includeUpperBoundary());
- _numMatchingDictIds = _matchingDictIdSet.size();
- if (_numMatchingDictIds == 0) {
- _alwaysFalse = true;
- } else if (_numMatchingDictIds == dictionary.length()) {
- _alwaysTrue = true;
+ final BaseRawValueBasedPredicateEvaluator _rawValueBasedEvaluator;
+
+ RealtimeDictionaryBasedRangePredicateEvaluator(RangePredicate
rangePredicate, BaseMutableDictionary dictionary,
+ DataType dataType) {
+ _dictionary = dictionary;
+ _dataType = dataType;
+ int cardinality = dictionary.length();
+ if (cardinality < DICT_ID_SET_BASED_CARDINALITY_THRESHOLD) {
+ _dictIdSetBased = true;
+ _rawValueBasedEvaluator = null;
+ _matchingDictIdSet = dictionary
+ .getDictIdsInRange(rangePredicate.getLowerBoundary(),
rangePredicate.getUpperBoundary(),
+ rangePredicate.includeLowerBoundary(),
rangePredicate.includeUpperBoundary());
+ int numMatchingDictIds = _matchingDictIdSet.size();
+ if (numMatchingDictIds == 0) {
+ _alwaysFalse = true;
+ } else if (numMatchingDictIds == cardinality) {
+ _alwaysTrue = true;
+ }
+ } else {
+ _dictIdSetBased = false;
+ _matchingDictIdSet = null;
+ switch (dataType) {
+ case INT:
+ _rawValueBasedEvaluator = new
IntRawValueBasedRangePredicateEvaluator(rangePredicate);
+ break;
+ case LONG:
+ _rawValueBasedEvaluator = new
LongRawValueBasedRangePredicateEvaluator(rangePredicate);
+ break;
+ case FLOAT:
+ _rawValueBasedEvaluator = new
FloatRawValueBasedRangePredicateEvaluator(rangePredicate);
+ break;
+ case DOUBLE:
+ _rawValueBasedEvaluator = new
DoubleRawValueBasedRangePredicateEvaluator(rangePredicate);
+ break;
+ case STRING:
+ _rawValueBasedEvaluator = new
StringRawValueBasedRangePredicateEvaluator(rangePredicate);
+ break;
+ case BYTES:
+ _rawValueBasedEvaluator = new
BytesRawValueBasedRangePredicateEvaluator(rangePredicate);
+ break;
+ default:
+ throw new IllegalStateException();
+ }
}
}
@@ -192,20 +234,31 @@ public class RangePredicateEvaluatorFactory {
@Override
public boolean applySV(int dictId) {
- return _matchingDictIdSet.contains(dictId);
- }
-
- @Override
- public int getNumMatchingDictIds() {
- return _numMatchingDictIds;
+ if (_dictIdSetBased) {
+ return _matchingDictIdSet.contains(dictId);
+ } else {
+ switch (_dataType) {
+ case INT:
+ return
_rawValueBasedEvaluator.applySV(_dictionary.getIntValue(dictId));
+ case LONG:
+ return
_rawValueBasedEvaluator.applySV(_dictionary.getLongValue(dictId));
+ case FLOAT:
+ return
_rawValueBasedEvaluator.applySV(_dictionary.getFloatValue(dictId));
+ case DOUBLE:
+ return
_rawValueBasedEvaluator.applySV(_dictionary.getDoubleValue(dictId));
+ case STRING:
+ return
_rawValueBasedEvaluator.applySV(_dictionary.getStringValue(dictId));
+ case BYTES:
+ return
_rawValueBasedEvaluator.applySV(_dictionary.getBytesValue(dictId));
+ default:
+ throw new IllegalStateException();
+ }
+ }
}
@Override
public int[] getMatchingDictIds() {
- if (_matchingDictIds == null) {
- _matchingDictIds = _matchingDictIdSet.toIntArray();
- }
- return _matchingDictIds;
+ throw new UnsupportedOperationException();
}
}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
index 65fba47..18cf762 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/predicate/RangeOfflineDictionaryPredicateEvaluatorTest.java
@@ -22,6 +22,7 @@ import org.apache.pinot.core.common.predicate.RangePredicate;
import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
import
org.apache.pinot.core.operator.filter.predicate.RangePredicateEvaluatorFactory;
import org.apache.pinot.core.segment.index.readers.BaseImmutableDictionary;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -41,7 +42,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = 5;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd,
true);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -62,7 +64,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = 5;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd,
true);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertFalse(evaluator.applySV(rangeStart));
@@ -83,7 +86,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = 5;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd,
false);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -104,7 +108,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = 5;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd,
false);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertFalse(evaluator.applySV(rangeStart));
@@ -137,7 +142,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = 5;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd,
false);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -157,7 +163,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = 5;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd,
true);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -171,7 +178,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = DICT_LEN - 1;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd,
true);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -191,7 +199,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = DICT_LEN - 1;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd,
true);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertFalse(evaluator.applySV(rangeStart));
@@ -205,7 +214,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = DICT_LEN - 1;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, true, rangeEnd,
true);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertFalse(evaluator.isAlwaysFalse());
Assert.assertTrue(evaluator.isAlwaysTrue());
Assert.assertTrue(evaluator.applySV(rangeStart));
@@ -224,7 +234,8 @@ public class RangeOfflineDictionaryPredicateEvaluatorTest {
rangeEnd = 5;
BaseImmutableDictionary reader = createReader(rangeStart, rangeEnd);
RangePredicate predicate = createPredicate(rangeStart, false, rangeEnd,
false);
- PredicateEvaluator evaluator =
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader);
+ PredicateEvaluator evaluator =
+
RangePredicateEvaluatorFactory.newDictionaryBasedEvaluator(predicate, reader,
DataType.INT);
Assert.assertTrue(evaluator.isAlwaysFalse());
Assert.assertFalse(evaluator.isAlwaysTrue());
Assert.assertFalse(evaluator.applySV(rangeStart));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]