richardstartin commented on code in PR #8891:
URL: https://github.com/apache/pinot/pull/8891#discussion_r899399714
##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java:
##########
@@ -70,67 +68,80 @@ public static BaseDictionaryBasedPredicateEvaluator
newDictionaryBasedEvaluator(
*/
public static BaseRawValueBasedPredicateEvaluator
newRawValueBasedEvaluator(NotInPredicate notInPredicate,
DataType dataType) {
- List<String> values = notInPredicate.getValues();
- int hashSetSize = HashUtil.getMinHashSetSize(values.size());
switch (dataType) {
case INT: {
- IntSet nonMatchingValues = new IntOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Integer.parseInt(value));
+ int[] intValues = notInPredicate.getIntValues();
+ IntSet nonMatchingValues = new
IntOpenHashSet(HashUtil.getMinHashSetSize(intValues.length));
+ for (int value : intValues) {
+ nonMatchingValues.add(value);
}
return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case LONG: {
- LongSet nonMatchingValues = new LongOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Long.parseLong(value));
+ long[] longValues = notInPredicate.getLongValues();
+ LongSet nonMatchingValues = new
LongOpenHashSet(HashUtil.getMinHashSetSize(longValues.length));
+ for (long value : longValues) {
+ nonMatchingValues.add(value);
}
return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case FLOAT: {
- FloatSet nonMatchingValues = new FloatOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Float.parseFloat(value));
+ float[] floatValues = notInPredicate.getFloatValues();
+ FloatSet nonMatchingValues = new
FloatOpenHashSet(HashUtil.getMinHashSetSize(floatValues.length));
+ for (float value : floatValues) {
+ nonMatchingValues.add(value);
}
return new FloatRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case DOUBLE: {
- DoubleSet nonMatchingValues = new DoubleOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Double.parseDouble(value));
+ double[] doubleValues = notInPredicate.getDoubleValues();
+ DoubleSet nonMatchingValues = new
DoubleOpenHashSet(HashUtil.getMinHashSetSize(doubleValues.length));
+ for (double value : doubleValues) {
+ nonMatchingValues.add(value);
}
return new DoubleRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BIG_DECIMAL: {
- TreeSet<BigDecimal> nonMatchingValues = new TreeSet<>();
- for (String value : values) {
- nonMatchingValues.add(new BigDecimal(value));
- }
+ BigDecimal[] bigDecimalValues = notInPredicate.getBigDecimalValues();
+ // NOTE: Use TreeSet because BigDecimal's compareTo() is not
consistent with equals()
+ // E.g. compareTo(3.0, 3) returns 0 but equals(3.0, 3) returns
false
+ TreeSet<BigDecimal> nonMatchingValues = new
TreeSet<>(Arrays.asList(bigDecimalValues));
return new
BigDecimalRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BOOLEAN: {
- IntSet nonMatchingValues = new IntOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(BooleanUtils.toInt(value));
+ int[] booleanValues = notInPredicate.getBooleanValues();
+ IntSet nonMatchingValues = new
IntOpenHashSet(HashUtil.getMinHashSetSize(booleanValues.length));
+ for (int value : booleanValues) {
+ nonMatchingValues.add(value);
}
return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case TIMESTAMP: {
- LongSet nonMatchingValues = new LongOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(TimestampUtils.toMillisSinceEpoch(value));
+ long[] timestampValues = notInPredicate.getTimestampValues();
+ LongSet nonMatchingValues = new
LongOpenHashSet(HashUtil.getMinHashSetSize(timestampValues.length));
+ for (long value : timestampValues) {
+ nonMatchingValues.add(value);
}
return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case STRING: {
- Set<String> nonMatchingValues = new ObjectOpenHashSet<>(hashSetSize);
- nonMatchingValues.addAll(values);
+ List<String> stringValues = notInPredicate.getValues();
+ Set<String> nonMatchingValues = new
ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(stringValues.size()));
+ // NOTE: Add value-by-value to avoid overhead
+ for (String value : stringValues) {
+ //noinspection UseBulkOperation
+ nonMatchingValues.add(value);
+ }
return new StringRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BYTES: {
- Set<ByteArray> nonMatchingValues = new
ObjectOpenHashSet<>(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(BytesUtils.toByteArray(value));
+ ByteArray[] bytesValues = notInPredicate.getBytesValues();
+ Set<ByteArray> nonMatchingValues = new
ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(bytesValues.length));
+ // NOTE: Add value-by-value to avoid overhead
+ //noinspection ManualArrayToCollectionCopy
Review Comment:
I’m sure there hash sets were too small by default but this could have been
resolved via the load factor, and would probably have made this even faster (it
would have been nice if profiles before and after were captured for posterity).
##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java:
##########
@@ -70,67 +68,80 @@ public static BaseDictionaryBasedPredicateEvaluator
newDictionaryBasedEvaluator(
*/
public static BaseRawValueBasedPredicateEvaluator
newRawValueBasedEvaluator(NotInPredicate notInPredicate,
DataType dataType) {
- List<String> values = notInPredicate.getValues();
- int hashSetSize = HashUtil.getMinHashSetSize(values.size());
switch (dataType) {
case INT: {
- IntSet nonMatchingValues = new IntOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Integer.parseInt(value));
+ int[] intValues = notInPredicate.getIntValues();
+ IntSet nonMatchingValues = new
IntOpenHashSet(HashUtil.getMinHashSetSize(intValues.length));
+ for (int value : intValues) {
+ nonMatchingValues.add(value);
}
return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case LONG: {
- LongSet nonMatchingValues = new LongOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Long.parseLong(value));
+ long[] longValues = notInPredicate.getLongValues();
+ LongSet nonMatchingValues = new
LongOpenHashSet(HashUtil.getMinHashSetSize(longValues.length));
+ for (long value : longValues) {
+ nonMatchingValues.add(value);
}
return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case FLOAT: {
- FloatSet nonMatchingValues = new FloatOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Float.parseFloat(value));
+ float[] floatValues = notInPredicate.getFloatValues();
+ FloatSet nonMatchingValues = new
FloatOpenHashSet(HashUtil.getMinHashSetSize(floatValues.length));
+ for (float value : floatValues) {
+ nonMatchingValues.add(value);
}
return new FloatRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case DOUBLE: {
- DoubleSet nonMatchingValues = new DoubleOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(Double.parseDouble(value));
+ double[] doubleValues = notInPredicate.getDoubleValues();
+ DoubleSet nonMatchingValues = new
DoubleOpenHashSet(HashUtil.getMinHashSetSize(doubleValues.length));
+ for (double value : doubleValues) {
+ nonMatchingValues.add(value);
}
return new DoubleRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BIG_DECIMAL: {
- TreeSet<BigDecimal> nonMatchingValues = new TreeSet<>();
- for (String value : values) {
- nonMatchingValues.add(new BigDecimal(value));
- }
+ BigDecimal[] bigDecimalValues = notInPredicate.getBigDecimalValues();
+ // NOTE: Use TreeSet because BigDecimal's compareTo() is not
consistent with equals()
+ // E.g. compareTo(3.0, 3) returns 0 but equals(3.0, 3) returns
false
+ TreeSet<BigDecimal> nonMatchingValues = new
TreeSet<>(Arrays.asList(bigDecimalValues));
return new
BigDecimalRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BOOLEAN: {
- IntSet nonMatchingValues = new IntOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(BooleanUtils.toInt(value));
+ int[] booleanValues = notInPredicate.getBooleanValues();
+ IntSet nonMatchingValues = new
IntOpenHashSet(HashUtil.getMinHashSetSize(booleanValues.length));
+ for (int value : booleanValues) {
+ nonMatchingValues.add(value);
}
return new IntRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case TIMESTAMP: {
- LongSet nonMatchingValues = new LongOpenHashSet(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(TimestampUtils.toMillisSinceEpoch(value));
+ long[] timestampValues = notInPredicate.getTimestampValues();
+ LongSet nonMatchingValues = new
LongOpenHashSet(HashUtil.getMinHashSetSize(timestampValues.length));
+ for (long value : timestampValues) {
+ nonMatchingValues.add(value);
}
return new LongRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case STRING: {
- Set<String> nonMatchingValues = new ObjectOpenHashSet<>(hashSetSize);
- nonMatchingValues.addAll(values);
+ List<String> stringValues = notInPredicate.getValues();
+ Set<String> nonMatchingValues = new
ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(stringValues.size()));
+ // NOTE: Add value-by-value to avoid overhead
+ for (String value : stringValues) {
+ //noinspection UseBulkOperation
+ nonMatchingValues.add(value);
+ }
return new StringRawValueBasedNotInPredicateEvaluator(notInPredicate,
nonMatchingValues);
}
case BYTES: {
- Set<ByteArray> nonMatchingValues = new
ObjectOpenHashSet<>(hashSetSize);
- for (String value : values) {
- nonMatchingValues.add(BytesUtils.toByteArray(value));
+ ByteArray[] bytesValues = notInPredicate.getBytesValues();
+ Set<ByteArray> nonMatchingValues = new
ObjectOpenHashSet<>(HashUtil.getMinHashSetSize(bytesValues.length));
+ // NOTE: Add value-by-value to avoid overhead
+ //noinspection ManualArrayToCollectionCopy
Review Comment:
I’m sure the hash sets were too small by default but this could have been
resolved via the load factor, and would probably have made this even faster (it
would have been nice if profiles before and after were captured for posterity).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]