This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch fixing_raw_bytes_comparison in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit e8c4e08190f39aa461db2ff92229447eed36fcb0 Author: Xiang Fu <fx19880...@gmail.com> AuthorDate: Wed Sep 4 14:42:52 2019 -0700 Support predicates on raw bytes column without dictionary --- .../BaseDictionaryBasedPredicateEvaluator.java | 10 +++++ .../BaseRawValueBasedPredicateEvaluator.java | 26 +++++++++++++ .../predicate/EqualsPredicateEvaluatorFactory.java | 22 +++++++++++ .../predicate/InPredicateEvaluatorFactory.java | 23 ++++++++++++ .../NotEqualsPredicateEvaluatorFactory.java | 22 +++++++++++ .../predicate/NotInPredicateEvaluatorFactory.java | 24 ++++++++++++ .../filter/predicate/PredicateEvaluator.java | 17 +++++++++ .../predicate/RangePredicateEvaluatorFactory.java | 43 ++++++++++++++++++++++ 8 files changed, 187 insertions(+) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java index 0d5c6c7..0f55f7b 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java @@ -80,6 +80,16 @@ public abstract class BaseDictionaryBasedPredicateEvaluator extends BasePredicat throw new UnsupportedOperationException(); } + @Override + public final boolean applySV(byte[] value) { + throw new UnsupportedOperationException(); + } + + @Override + public final boolean applyMV(byte[][] values, int length) { + throw new UnsupportedOperationException(); + } + // NOTE: override it for exclusive predicate @Override public int[] getNonMatchingDictIds() { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java index 13dfc64..41be977 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseRawValueBasedPredicateEvaluator.java @@ -188,4 +188,30 @@ public abstract class BaseRawValueBasedPredicateEvaluator extends BasePredicateE return false; } } + + @Override + public boolean applySV(byte[] value) { + throw new UnsupportedOperationException(); + } + + @SuppressWarnings("Duplicates") + @Override + public boolean applyMV(byte[][] values, int length) { + if (isExclusive()) { + for (int i = 0; i < length; i++) { + if (!applySV(values[i])) { + return false; + } + } + return true; + } else { + for (int i = 0; i < length; i++) { + if (applySV(values[i])) { + return true; + } + } + return false; + } + } + } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java index c4c35fd..7cee99c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java @@ -19,6 +19,8 @@ package org.apache.pinot.core.operator.filter.predicate; import org.apache.pinot.common.data.FieldSpec; +import org.apache.pinot.common.utils.BytesUtils; +import org.apache.pinot.common.utils.primitive.ByteArray; import org.apache.pinot.core.common.Predicate; import org.apache.pinot.core.common.predicate.EqPredicate; import org.apache.pinot.core.segment.index.readers.Dictionary; @@ -63,6 +65,8 @@ public class EqualsPredicateEvaluatorFactory { return new DoubleRawValueBasedEqPredicateEvaluator(eqPredicate); case STRING: return new StringRawValueBasedEqPredicateEvaluator(eqPredicate); + case BYTES: + return new BytesRawValueBasedEqPredicateEvaluator(eqPredicate); default: throw new UnsupportedOperationException("Unsupported data type: " + dataType); } @@ -190,4 +194,22 @@ public class EqualsPredicateEvaluatorFactory { return _matchingValue.equals(value); } } + + private static final class BytesRawValueBasedEqPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { + final byte[] _matchingValue; + + BytesRawValueBasedEqPredicateEvaluator(EqPredicate eqPredicate) { + _matchingValue = BytesUtils.toBytes(eqPredicate.getEqualsValue()); + } + + @Override + public Predicate.Type getPredicateType() { + return Predicate.Type.EQ; + } + + @Override + public boolean applySV(byte[] value) { + return ByteArray.compare(_matchingValue, value) == 0; + } + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java index 02a449d..e0d77df 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java @@ -30,6 +30,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; import org.apache.pinot.common.data.FieldSpec; +import org.apache.pinot.common.utils.BytesUtils; import org.apache.pinot.common.utils.HashUtil; import org.apache.pinot.core.common.Predicate; import org.apache.pinot.core.common.predicate.InPredicate; @@ -75,6 +76,8 @@ public class InPredicateEvaluatorFactory { return new DoubleRawValueBasedInPredicateEvaluator(inPredicate); case STRING: return new StringRawValueBasedInPredicateEvaluator(inPredicate); + case BYTES: + return new BytesRawValueBasedInPredicateEvaluator(inPredicate); default: throw new UnsupportedOperationException("Unsupported data type: " + dataType); } @@ -233,4 +236,24 @@ public class InPredicateEvaluatorFactory { return _matchingValues.contains(value); } } + + private static final class BytesRawValueBasedInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { + final Set<String> _matchingValues; + + BytesRawValueBasedInPredicateEvaluator(InPredicate inPredicate) { + String[] values = inPredicate.getValues(); + _matchingValues = new HashSet<>(HashUtil.getMinHashSetSize(values.length)); + Collections.addAll(_matchingValues, values); + } + + @Override + public Predicate.Type getPredicateType() { + return Predicate.Type.IN; + } + + @Override + public boolean applySV(byte[] value) { + return _matchingValues.contains(BytesUtils.toHexString(value)); + } + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java index 514e2e2..4a92569 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java @@ -19,6 +19,8 @@ package org.apache.pinot.core.operator.filter.predicate; import org.apache.pinot.common.data.FieldSpec; +import org.apache.pinot.common.utils.BytesUtils; +import org.apache.pinot.common.utils.primitive.ByteArray; import org.apache.pinot.core.common.Predicate; import org.apache.pinot.core.common.predicate.NEqPredicate; import org.apache.pinot.core.segment.index.readers.Dictionary; @@ -63,6 +65,8 @@ public class NotEqualsPredicateEvaluatorFactory { return new DoubleRawValueBasedNeqPredicateEvaluator(nEqPredicate); case STRING: return new StringRawValueBasedNeqPredicateEvaluator(nEqPredicate); + case BYTES: + return new BytesRawValueBasedNeqPredicateEvaluator(nEqPredicate); default: throw new UnsupportedOperationException("Unsupported data type: " + dataType); } @@ -215,4 +219,22 @@ public class NotEqualsPredicateEvaluatorFactory { return !_nonMatchingValue.equals(value); } } + + private static final class BytesRawValueBasedNeqPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { + final byte[] _nonMatchingValue; + + BytesRawValueBasedNeqPredicateEvaluator(NEqPredicate nEqPredicate) { + _nonMatchingValue = BytesUtils.toBytes(nEqPredicate.getNotEqualsValue()); + } + + @Override + public Predicate.Type getPredicateType() { + return Predicate.Type.NEQ; + } + + @Override + public boolean applySV(byte[] value) { + return ByteArray.compare(_nonMatchingValue, value) != 0; + } + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java index 33d31c1..678cd11 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java @@ -30,6 +30,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.Set; import org.apache.pinot.common.data.FieldSpec; +import org.apache.pinot.common.utils.BytesUtils; import org.apache.pinot.common.utils.HashUtil; import org.apache.pinot.core.common.Predicate; import org.apache.pinot.core.common.predicate.NotInPredicate; @@ -75,6 +76,8 @@ public class NotInPredicateEvaluatorFactory { return new DoubleRawValueBasedNotInPredicateEvaluator(notInPredicate); case STRING: return new StringRawValueBasedNotInPredicateEvaluator(notInPredicate); + case BYTES: + return new BytesRawValueBasedNotInPredicateEvaluator(notInPredicate); default: throw new UnsupportedOperationException("Unsupported data type: " + dataType); } @@ -251,4 +254,25 @@ public class NotInPredicateEvaluatorFactory { return !_nonMatchingValues.contains(value); } } + + private static final class BytesRawValueBasedNotInPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { + final Set<String> _nonMatchingValues; + + BytesRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate) { + String[] values = notInPredicate.getValues(); + _nonMatchingValues = new HashSet<>(HashUtil.getMinHashSetSize(values.length)); + Collections.addAll(_nonMatchingValues, values); + } + + @Override + public Predicate.Type getPredicateType() { + return Predicate.Type.NOT_IN; + } + + @Override + public boolean applySV(byte[] value) { + return !_nonMatchingValues.contains(BytesUtils.toHexString(value)); + } + } + } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java index 2255a14..9cb2894 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java @@ -164,4 +164,21 @@ public interface PredicateEvaluator { * @return Whether the entry matches the predicate */ boolean applyMV(String[] values, int length); + + /** + * Apply a single-value entry to the predicate. + * + * @param value Raw value + * @return Whether the entry matches the predicate + */ + boolean applySV(byte[] value); + + /** + * Apply a multi-value entry to the predicate. + * + * @param values Array of raw values + * @param length Number of values in the entry + * @return Whether the entry matches the predicate + */ + boolean applyMV(byte[][] values, int length); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java index 4431fdc..f2b7ad3 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java @@ -21,6 +21,8 @@ package org.apache.pinot.core.operator.filter.predicate; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; import org.apache.pinot.common.data.FieldSpec; +import org.apache.pinot.common.utils.BytesUtils; +import org.apache.pinot.common.utils.primitive.ByteArray; import org.apache.pinot.core.common.Predicate; import org.apache.pinot.core.common.predicate.RangePredicate; import org.apache.pinot.core.realtime.impl.dictionary.MutableDictionary; @@ -71,6 +73,8 @@ public class RangePredicateEvaluatorFactory { return new DoubleRawValueBasedRangePredicateEvaluator(rangePredicate); case STRING: return new StringRawValueBasedRangePredicateEvaluator(rangePredicate); + case BYTES: + return new BytesRawValueBasedRangePredicateEvaluator(rangePredicate); default: throw new UnsupportedOperationException("Unsupported data type: " + dataType); } @@ -408,4 +412,43 @@ public class RangePredicateEvaluatorFactory { return result; } } + + private static final class BytesRawValueBasedRangePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { + final byte[] _lowerBoundary; + final byte[] _upperBoundary; + final boolean _includeLowerBoundary; + final boolean _includeUpperBoundary; + + BytesRawValueBasedRangePredicateEvaluator(RangePredicate rangePredicate) { + _lowerBoundary = BytesUtils.toBytes(rangePredicate.getLowerBoundary()); + _upperBoundary = BytesUtils.toBytes(rangePredicate.getUpperBoundary()); + _includeLowerBoundary = rangePredicate.includeLowerBoundary(); + _includeUpperBoundary = rangePredicate.includeUpperBoundary(); + } + + @Override + public Predicate.Type getPredicateType() { + return Predicate.Type.RANGE; + } + + @Override + public boolean applySV(byte[] value) { + boolean result = true; + if (!_lowerBoundary.equals("*")) { + if (_includeLowerBoundary) { + result = ByteArray.compare(_lowerBoundary, value) <= 0; + } else { + result = ByteArray.compare(_lowerBoundary, value) < 0; + } + } + if (!_upperBoundary.equals("*")) { + if (_includeUpperBoundary) { + result &= ByteArray.compare(_upperBoundary, value) >= 0; + } else { + result &= ByteArray.compare(_upperBoundary, value) > 0; + } + } + return result; + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org