This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 9b96068988 Optimize MergeEqInFilterOptimizer by reducing the hash 
computation of Expression (#14732)
9b96068988 is described below

commit 9b9606898872105341b5a218c81a61f8d60d76e4
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon Dec 30 23:02:46 2024 -0800

    Optimize MergeEqInFilterOptimizer by reducing the hash computation of 
Expression (#14732)
---
 .../optimizer/filter/MergeEqInFilterOptimizer.java | 124 ++++++++++++---------
 1 file changed, 74 insertions(+), 50 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
index 6836f80226..5104587322 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java
@@ -18,16 +18,17 @@
  */
 package org.apache.pinot.core.query.optimizer.filter;
 
+import com.google.common.collect.Maps;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import javax.annotation.Nullable;
 import org.apache.pinot.common.request.Expression;
 import org.apache.pinot.common.request.ExpressionType;
 import org.apache.pinot.common.request.Function;
+import org.apache.pinot.common.request.context.RequestContextUtils;
 import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.sql.FilterKind;
@@ -61,9 +62,10 @@ public class MergeEqInFilterOptimizer implements 
FilterOptimizer {
     String operator = function.getOperator();
     if (operator.equals(FilterKind.OR.name())) {
       List<Expression> children = function.getOperands();
-      Map<Expression, Set<Expression>> valuesMap = new HashMap<>();
-      List<Expression> newChildren = new ArrayList<>();
-      boolean recreateFilter = false;
+      // Key is the lhs of the EQ/IN predicate, value is the map from string 
representation of the value to the value
+      Map<Expression, Map<String, Expression>> valuesMap = new HashMap<>();
+      List<Expression> newChildren = new ArrayList<>(children.size());
+      boolean[] recreateFilter = new boolean[1];
 
       // Iterate over all the child filters to merge EQ and IN predicates
       for (Expression child : children) {
@@ -80,52 +82,62 @@ public class MergeEqInFilterOptimizer implements 
FilterOptimizer {
             List<Expression> operands = childFunction.getOperands();
             Expression lhs = operands.get(0);
             Expression value = operands.get(1);
-            Set<Expression> values = valuesMap.get(lhs);
-            if (values == null) {
-              values = new HashSet<>();
-              values.add(value);
-              valuesMap.put(lhs, values);
-            } else {
-              values.add(value);
-              // Recreate filter when multiple predicates can be merged
-              recreateFilter = true;
-            }
+            // Use string value to de-duplicate the values to prevent the 
overhead of Expression.hashCode(). This is
+            // consistent with how server handles predicates.
+            String stringValue = RequestContextUtils.getStringValue(value);
+            valuesMap.compute(lhs, (k, v) -> {
+              if (v == null) {
+                Map<String, Expression> values = new HashMap<>();
+                values.put(stringValue, value);
+                return values;
+              } else {
+                v.put(stringValue, value);
+                // Recreate filter when multiple predicates can be merged
+                recreateFilter[0] = true;
+                return v;
+              }
+            });
           } else if (childOperator.equals(FilterKind.IN.name())) {
             List<Expression> operands = childFunction.getOperands();
             Expression lhs = operands.get(0);
-            Set<Expression> inPredicateValuesSet = new HashSet<>();
-            int numOperands = operands.size();
-            for (int i = 1; i < numOperands; i++) {
-              inPredicateValuesSet.add(operands.get(i));
-            }
-            int numUniqueValues = inPredicateValuesSet.size();
-            if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
-              // Recreate filter when the IN predicate contains only 1 value 
(can be rewritten to EQ predicate),
-              // or values can be de-duplicated
-              recreateFilter = true;
-            }
-            Set<Expression> values = valuesMap.get(lhs);
-            if (values == null) {
-              valuesMap.put(lhs, inPredicateValuesSet);
-            } else {
-              values.addAll(inPredicateValuesSet);
-              // Recreate filter when multiple predicates can be merged
-              recreateFilter = true;
-            }
+            valuesMap.compute(lhs, (k, v) -> {
+              if (v == null) {
+                Map<String, Expression> values = getInValues(operands);
+                int numUniqueValues = values.size();
+                if (numUniqueValues == 1 || numUniqueValues != operands.size() 
- 1) {
+                  // Recreate filter when the IN predicate contains only 1 
value (can be rewritten to EQ predicate), or
+                  // values can be de-duplicated
+                  recreateFilter[0] = true;
+                }
+                return values;
+              } else {
+                int numOperands = operands.size();
+                for (int i = 1; i < numOperands; i++) {
+                  Expression value = operands.get(i);
+                  // Use string value to de-duplicate the values to prevent 
the overhead of Expression.hashCode(). This
+                  // is consistent with how server handles predicates.
+                  String stringValue = 
RequestContextUtils.getStringValue(value);
+                  v.put(stringValue, value);
+                }
+                // Recreate filter when multiple predicates can be merged
+                recreateFilter[0] = true;
+                return v;
+              }
+            });
           } else {
             newChildren.add(child);
           }
         }
       }
 
-      if (recreateFilter) {
+      if (recreateFilter[0]) {
         if (newChildren.isEmpty() && valuesMap.size() == 1) {
           // Single range without other filters
-          Map.Entry<Expression, Set<Expression>> entry = 
valuesMap.entrySet().iterator().next();
-          return getFilterExpression(entry.getKey(), entry.getValue());
+          Map.Entry<Expression, Map<String, Expression>> entry = 
valuesMap.entrySet().iterator().next();
+          return getFilterExpression(entry.getKey(), 
entry.getValue().values());
         } else {
-          for (Map.Entry<Expression, Set<Expression>> entry : 
valuesMap.entrySet()) {
-            newChildren.add(getFilterExpression(entry.getKey(), 
entry.getValue()));
+          for (Map.Entry<Expression, Map<String, Expression>> entry : 
valuesMap.entrySet()) {
+            newChildren.add(getFilterExpression(entry.getKey(), 
entry.getValue().values()));
           }
           function.setOperands(newChildren);
           return filterExpression;
@@ -138,17 +150,12 @@ public class MergeEqInFilterOptimizer implements 
FilterOptimizer {
       return filterExpression;
     } else if (operator.equals(FilterKind.IN.name())) {
       List<Expression> operands = function.getOperands();
-      Expression lhs = operands.get(0);
-      Set<Expression> values = new HashSet<>();
-      int numOperands = operands.size();
-      for (int i = 1; i < numOperands; i++) {
-        values.add(operands.get(i));
-      }
+      Map<String, Expression> values = getInValues(operands);
       int numUniqueValues = values.size();
-      if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
-        // Recreate filter when the IN predicate contains only 1 value (can be 
rewritten to EQ predicate), or values
-        // can be de-duplicated
-        return getFilterExpression(lhs, values);
+      if (numUniqueValues == 1 || numUniqueValues != operands.size() - 1) {
+        // Recreate filter when the IN predicate contains only 1 value (can be 
rewritten to EQ predicate), or values can
+        // be de-duplicated
+        return getFilterExpression(operands.get(0), values.values());
       } else {
         return filterExpression;
       }
@@ -157,10 +164,27 @@ public class MergeEqInFilterOptimizer implements 
FilterOptimizer {
     }
   }
 
+  /**
+   * Helper method to get the values from the IN predicate. Returns a map from 
string representation of the value to the
+   * value.
+   */
+  private Map<String, Expression> getInValues(List<Expression> operands) {
+    int numOperands = operands.size();
+    Map<String, Expression> values = 
Maps.newHashMapWithExpectedSize(numOperands - 1);
+    for (int i = 1; i < numOperands; i++) {
+      Expression value = operands.get(i);
+      // Use string value to de-duplicate the values to prevent the overhead 
of Expression.hashCode(). This is
+      // consistent with how server handles predicates.
+      String stringValue = RequestContextUtils.getStringValue(value);
+      values.put(stringValue, value);
+    }
+    return values;
+  }
+
   /**
    * Helper method to construct a EQ or IN predicate filter Expression from 
the given lhs and values.
    */
-  private static Expression getFilterExpression(Expression lhs, 
Set<Expression> values) {
+  private static Expression getFilterExpression(Expression lhs, 
Collection<Expression> values) {
     int numValues = values.size();
     if (numValues == 1) {
       return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), lhs, 
values.iterator().next());


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to