This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 9b96068988 Optimize MergeEqInFilterOptimizer by reducing the hash
computation of Expression (#14732)
9b96068988 is described below
commit 9b9606898872105341b5a218c81a61f8d60d76e4
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon Dec 30 23:02:46 2024 -0800
Optimize MergeEqInFilterOptimizer by reducing the hash computation of
Expression (#14732)
---
.../optimizer/filter/MergeEqInFilterOptimizer.java | 124 ++++++++++++---------
1 file changed, 74 insertions(+), 50 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
index 6836f80226..5104587322 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
@@ -18,16 +18,17 @@
*/
package org.apache.pinot.core.query.optimizer.filter;
+import com.google.common.collect.Maps;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import javax.annotation.Nullable;
import org.apache.pinot.common.request.Expression;
import org.apache.pinot.common.request.ExpressionType;
import org.apache.pinot.common.request.Function;
+import org.apache.pinot.common.request.context.RequestContextUtils;
import org.apache.pinot.common.utils.request.RequestUtils;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.sql.FilterKind;
@@ -61,9 +62,10 @@ public class MergeEqInFilterOptimizer implements
FilterOptimizer {
String operator = function.getOperator();
if (operator.equals(FilterKind.OR.name())) {
List<Expression> children = function.getOperands();
- Map<Expression, Set<Expression>> valuesMap = new HashMap<>();
- List<Expression> newChildren = new ArrayList<>();
- boolean recreateFilter = false;
+ // Key is the lhs of the EQ/IN predicate, value is the map from string
representation of the value to the value
+ Map<Expression, Map<String, Expression>> valuesMap = new HashMap<>();
+ List<Expression> newChildren = new ArrayList<>(children.size());
+ boolean[] recreateFilter = new boolean[1];
// Iterate over all the child filters to merge EQ and IN predicates
for (Expression child : children) {
@@ -80,52 +82,62 @@ public class MergeEqInFilterOptimizer implements
FilterOptimizer {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
Expression value = operands.get(1);
- Set<Expression> values = valuesMap.get(lhs);
- if (values == null) {
- values = new HashSet<>();
- values.add(value);
- valuesMap.put(lhs, values);
- } else {
- values.add(value);
- // Recreate filter when multiple predicates can be merged
- recreateFilter = true;
- }
+ // Use string value to de-duplicate the values to prevent the
overhead of Expression.hashCode(). This is
+ // consistent with how server handles predicates.
+ String stringValue = RequestContextUtils.getStringValue(value);
+ valuesMap.compute(lhs, (k, v) -> {
+ if (v == null) {
+ Map<String, Expression> values = new HashMap<>();
+ values.put(stringValue, value);
+ return values;
+ } else {
+ v.put(stringValue, value);
+ // Recreate filter when multiple predicates can be merged
+ recreateFilter[0] = true;
+ return v;
+ }
+ });
} else if (childOperator.equals(FilterKind.IN.name())) {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
- Set<Expression> inPredicateValuesSet = new HashSet<>();
- int numOperands = operands.size();
- for (int i = 1; i < numOperands; i++) {
- inPredicateValuesSet.add(operands.get(i));
- }
- int numUniqueValues = inPredicateValuesSet.size();
- if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
- // Recreate filter when the IN predicate contains only 1 value
(can be rewritten to EQ predicate),
- // or values can be de-duplicated
- recreateFilter = true;
- }
- Set<Expression> values = valuesMap.get(lhs);
- if (values == null) {
- valuesMap.put(lhs, inPredicateValuesSet);
- } else {
- values.addAll(inPredicateValuesSet);
- // Recreate filter when multiple predicates can be merged
- recreateFilter = true;
- }
+ valuesMap.compute(lhs, (k, v) -> {
+ if (v == null) {
+ Map<String, Expression> values = getInValues(operands);
+ int numUniqueValues = values.size();
+ if (numUniqueValues == 1 || numUniqueValues != operands.size()
- 1) {
+ // Recreate filter when the IN predicate contains only 1
value (can be rewritten to EQ predicate), or
+ // values can be de-duplicated
+ recreateFilter[0] = true;
+ }
+ return values;
+ } else {
+ int numOperands = operands.size();
+ for (int i = 1; i < numOperands; i++) {
+ Expression value = operands.get(i);
+ // Use string value to de-duplicate the values to prevent
the overhead of Expression.hashCode(). This
+ // is consistent with how server handles predicates.
+ String stringValue =
RequestContextUtils.getStringValue(value);
+ v.put(stringValue, value);
+ }
+ // Recreate filter when multiple predicates can be merged
+ recreateFilter[0] = true;
+ return v;
+ }
+ });
} else {
newChildren.add(child);
}
}
}
- if (recreateFilter) {
+ if (recreateFilter[0]) {
if (newChildren.isEmpty() && valuesMap.size() == 1) {
// Single range without other filters
- Map.Entry<Expression, Set<Expression>> entry =
valuesMap.entrySet().iterator().next();
- return getFilterExpression(entry.getKey(), entry.getValue());
+ Map.Entry<Expression, Map<String, Expression>> entry =
valuesMap.entrySet().iterator().next();
+ return getFilterExpression(entry.getKey(),
entry.getValue().values());
} else {
- for (Map.Entry<Expression, Set<Expression>> entry :
valuesMap.entrySet()) {
- newChildren.add(getFilterExpression(entry.getKey(),
entry.getValue()));
+ for (Map.Entry<Expression, Map<String, Expression>> entry :
valuesMap.entrySet()) {
+ newChildren.add(getFilterExpression(entry.getKey(),
entry.getValue().values()));
}
function.setOperands(newChildren);
return filterExpression;
@@ -138,17 +150,12 @@ public class MergeEqInFilterOptimizer implements
FilterOptimizer {
return filterExpression;
} else if (operator.equals(FilterKind.IN.name())) {
List<Expression> operands = function.getOperands();
- Expression lhs = operands.get(0);
- Set<Expression> values = new HashSet<>();
- int numOperands = operands.size();
- for (int i = 1; i < numOperands; i++) {
- values.add(operands.get(i));
- }
+ Map<String, Expression> values = getInValues(operands);
int numUniqueValues = values.size();
- if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
- // Recreate filter when the IN predicate contains only 1 value (can be
rewritten to EQ predicate), or values
- // can be de-duplicated
- return getFilterExpression(lhs, values);
+ if (numUniqueValues == 1 || numUniqueValues != operands.size() - 1) {
+ // Recreate filter when the IN predicate contains only 1 value (can be
rewritten to EQ predicate), or values can
+ // be de-duplicated
+ return getFilterExpression(operands.get(0), values.values());
} else {
return filterExpression;
}
@@ -157,10 +164,27 @@ public class MergeEqInFilterOptimizer implements
FilterOptimizer {
}
}
+ /**
+ * Helper method to get the values from the IN predicate. Returns a map from
string representation of the value to the
+ * value.
+ */
+ private Map<String, Expression> getInValues(List<Expression> operands) {
+ int numOperands = operands.size();
+ Map<String, Expression> values =
Maps.newHashMapWithExpectedSize(numOperands - 1);
+ for (int i = 1; i < numOperands; i++) {
+ Expression value = operands.get(i);
+ // Use string value to de-duplicate the values to prevent the overhead
of Expression.hashCode(). This is
+ // consistent with how server handles predicates.
+ String stringValue = RequestContextUtils.getStringValue(value);
+ values.put(stringValue, value);
+ }
+ return values;
+ }
+
/**
* Helper method to construct a EQ or IN predicate filter Expression from
the given lhs and values.
*/
- private static Expression getFilterExpression(Expression lhs,
Set<Expression> values) {
+ private static Expression getFilterExpression(Expression lhs,
Collection<Expression> values) {
int numValues = values.size();
if (numValues == 1) {
return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), lhs,
values.iterator().next());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]