Jackie-Jiang commented on code in PR #8891:
URL: https://github.com/apache/pinot/pull/8891#discussion_r898206930
##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java:
##########
@@ -70,67 +68,80 @@ public static BaseDictionaryBasedPredicateEvaluator
newDictionaryBasedEvaluator(
*/
public static BaseRawValueBasedPredicateEvaluator
newRawValueBasedEvaluator(NotInPredicate notInPredicate,
DataType dataType) {
- List<String> values = notInPredicate.getValues();
- int hashSetSize = HashUtil.getMinHashSetSize(values.size());
switch (dataType) {
case INT: {
- IntSet nonMatchingValues = new IntOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Integer.parseInt(value));
+ int[] intValues = notInPredicate.getIntValues();
+ IntSet nonMatchingValues = new
IntOpenHashSet(HashUtil.getMinHashSetSize(intValues.length));
+ for (int value : intValues) {
+ nonMatchingValues.add(value);
}
return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case LONG: {
- LongSet nonMatchingValues = new LongOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Long.parseLong(value));
+ long[] longValues = notInPredicate.getLongValues();
+ LongSet nonMatchingValues = new
LongOpenHashSet(HashUtil.getMinHashSetSize(longValues.length));
+ for (long value : longValues) {
+ nonMatchingValues.add(value);
}
return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case FLOAT: {
- FloatSet nonMatchingValues = new FloatOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Float.parseFloat(value));
+ float[] floatValues = notInPredicate.getFloatValues();
+ FloatSet nonMatchingValues = new
FloatOpenHashSet(HashUtil.getMinHashSetSize(floatValues.length));
+ for (float value : floatValues) {
+ nonMatchingValues.add(value);
}
return new FloatRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case DOUBLE: {
- DoubleSet nonMatchingValues = new DoubleOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Double.parseDouble(value));
+ double[] doubleValues = notInPredicate.getDoubleValues();
+ DoubleSet nonMatchingValues = new
DoubleOpenHashSet(HashUtil.getMinHashSetSize(doubleValues.length));
+ for (double value : doubleValues) {
+ nonMatchingValues.add(value);
}
return new DoubleRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BIG_DECIMAL: {
- TreeSet<BigDecimal> nonMatchingValues = new TreeSet<>();
- for (String value : values) {
- nonMatchingValues.add(new BigDecimal(value));
- }
+ BigDecimal[] bigDecimalValues = notInPredicate.getBigDecimalValues();
+ // NOTE: Use TreeSet because BigDecimal's compareTo() is not
consistent with equals()
+ // E.g. compareTo(3.0, 3) returns 0 but equals(3.0, 3) returns
false
+ TreeSet<BigDecimal> nonMatchingValues = new
TreeSet<>(Arrays.asList(bigDecimalValues));
return new
BigDecimalRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BOOLEAN: {
- IntSet nonMatchingValues = new IntOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(BooleanUtils.toInt(value));
+ int[] booleanValues = notInPredicate.getBooleanValues();
+ IntSet nonMatchingValues = new
IntOpenHashSet(HashUtil.getMinHashSetSize(booleanValues.length));
+ for (int value : booleanValues) {
+ nonMatchingValues.add(value);
}
return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case TIMESTAMP: {
- LongSet nonMatchingValues = new LongOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(TimestampUtils.toMillisSinceEpoch(value));
+ long[] timestampValues = notInPredicate.getTimestampValues();
+ LongSet nonMatchingValues = new
LongOpenHashSet(HashUtil.getMinHashSetSize(timestampValues.length));
+ for (long value : timestampValues) {
+ nonMatchingValues.add(value);
}
return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case STRING: {
- Set<String> nonMatchingValues = new ObjectOpenHashSet<>(hashSetSize);
- nonMatchingValues.addAll(values);
+ List<String> stringValues = notInPredicate.getValues();
+ Set<String> nonMatchingValues = new
ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(stringValues.size()));
+ // NOTE: Add value-by-value to avoid overhead
+ for (String value : stringValues) {
+ //noinspection UseBulkOperation
+ nonMatchingValues.add(value);
+ }
return new StringRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BYTES: {
- Set<ByteArray> nonMatchingValues = new
ObjectOpenHashSet<>(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(BytesUtils.toByteArray(value));
+ ByteArray[] bytesValues = notInPredicate.getBytesValues();
+ Set<ByteArray> nonMatchingValues = new
ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(bytesValues.length));
+ // NOTE: Add value-by-value to avoid overhead
+ //noinspection ManualArrayToCollectionCopy
Review Comment:
Directly construct the set from a list won't honor the min hash set size
(not sure how much it helps, but don't want to couple that change into this
change).
I decide to keep the value-by-value add to skip the redundant capacity check
in the `ObjectOpenHashSet.addAll()` because we already set the proper capacity
up-front. Also want to keep the behavior the same for all data types so that it
is easier to track
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]