>From Peeyush Gupta <[email protected]>:

Peeyush Gupta has submitted this change. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20987?usp=email )

Change subject: [NO ISSUE][COMP] Improve compile time
......................................................................

[NO ISSUE][COMP] Improve compile time

- user model changes: no
- storage format changes: no
- interface changes: no

Ext-ref: MB-68548

Change-Id: Iff383be11f351df3b74657b7128b914b9cb569a4
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20987
Tested-by: Peeyush Gupta <[email protected]>
Reviewed-by: Ali Alsuliman <[email protected]>
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
M 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
M 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
M 
hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
M 
hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
M 
hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
M 
hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
15 files changed, 137 insertions(+), 46 deletions(-)

Approvals:
  Peeyush Gupta: Verified
  Ali Alsuliman: Looks good to me, approved




diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
index 2dded4a..f823c4b 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java
@@ -55,7 +55,7 @@

         this.context = context;

-        boolean changed = transform(condRef);
+        boolean changed = transform(condRef, context);
         if (changed) {
             context.computeAndSetTypeEnvironmentForOperator(op);
         }
@@ -82,5 +82,5 @@
      * @return {@code <code>true</code>} condition has been modified
      * {@code <code>false</code>} otherwise.
      */
-    protected abstract boolean transform(Mutable<ILogicalExpression> condRef);
+    protected abstract boolean transform(Mutable<ILogicalExpression> condRef, 
IOptimizationContext context);
 }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
index dfd1781..8dd6fce 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java
@@ -23,6 +23,7 @@
 import org.apache.asterix.lang.common.util.FunctionUtil;
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
@@ -40,7 +41,12 @@
 public class InlineAndRemoveRedundantBooleanExpressionsRule extends 
AbstractConditionExpressionRule {

     @Override
-    protected boolean transform(Mutable<ILogicalExpression> condRef) {
+    protected boolean transform(Mutable<ILogicalExpression> condRef, 
IOptimizationContext context) {
+        int maxExprTreeSize = 
context.getPhysicalOptimizationConfig().getMaxExpressionTreeSize();
+        return transform(condRef, maxExprTreeSize);
+    }
+
+    private boolean transform(Mutable<ILogicalExpression> condRef, int 
maxExprTreeSize) {
         AbstractFunctionCallExpression function = 
getFunctionExpression(condRef.getValue());
         if (function == null) {
             return false;
@@ -48,18 +54,20 @@

         boolean changed = false;
         for (Mutable<ILogicalExpression> argRef : function.getArguments()) {
-            changed |= transform(argRef);
+            changed |= transform(argRef, maxExprTreeSize);
         }

         final FunctionIdentifier fid = function.getFunctionIdentifier();
         if (AlgebricksBuiltinFunctions.AND.equals(fid) || 
AlgebricksBuiltinFunctions.OR.equals(fid)) {
-            changed |= inlineCondition(function);
-            changed |= removeRedundantExpressions(function.getArguments());
+            if (function.getArguments().size() <= maxExprTreeSize) {
+                changed |= inlineCondition(function);
+                changed |= removeRedundantExpressions(function.getArguments(), 
maxExprTreeSize);

-            //Special case: disjuncts/conjuncts have been factored out into a 
single (non-disjunct/conjunct) expression
-            if (function.getArguments().size() == 1) {
-                final ILogicalExpression newCond = 
function.getArguments().get(0).getValue();
-                condRef.setValue(newCond);
+                //Special case: disjuncts/conjuncts have been factored out 
into a single (non-disjunct/conjunct) expression
+                if (function.getArguments().size() == 1) {
+                    final ILogicalExpression newCond = 
function.getArguments().get(0).getValue();
+                    condRef.setValue(newCond);
+                }
             }
         }

@@ -86,7 +94,10 @@
         return changed;
     }

-    public static boolean 
removeRedundantExpressions(List<Mutable<ILogicalExpression>> exprs) {
+    public static boolean 
removeRedundantExpressions(List<Mutable<ILogicalExpression>> exprs, int 
maxExprTreeSize) {
+        if (exprs.size() > maxExprTreeSize) {
+            return false;
+        }
         final int originalSize = exprs.size();
         int i = 0;
         while (i < exprs.size()) {
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java
index 378c758..a78944c 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java
@@ -57,13 +57,16 @@
     @Override
     public boolean rewritePre(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
             throws AlgebricksException {
+        if (context.checkIfInDontApplySet(this, opRef.get())) {
+            return false;
+        }
         Map<LogicalVariable, Integer> nspAggVars = new HashMap<>();
         Map<LogicalVariable, Integer> nspAggVarToPlanIndex = new HashMap<>();
         Map<LogicalVariable, AbstractOperatorWithNestedPlans> nspWithAgg = new 
HashMap<>();
         Map<ILogicalExpression, ILogicalExpression> aggExprToVarExpr = new 
HashMap<>();
         // first collect vars. referring to listified sequences
-        boolean changed =
-                collectVarsBottomUp(opRef, context, nspAggVars, nspWithAgg, 
nspAggVarToPlanIndex, aggExprToVarExpr);
+        boolean changed = collectVarsBottomUp(opRef, context, nspAggVars, 
nspWithAgg, nspAggVarToPlanIndex,
+                aggExprToVarExpr, true);
         if (changed) {
             removeRedundantListifies(nspAggVars, nspWithAgg, 
nspAggVarToPlanIndex);
         }
@@ -116,13 +119,16 @@
             Map<LogicalVariable, Integer> nspListifyVarsCount,
             Map<LogicalVariable, AbstractOperatorWithNestedPlans> nspWithAgg,
             Map<LogicalVariable, Integer> nspAggVarToPlanIndex,
-            Map<ILogicalExpression, ILogicalExpression> 
aggregateExprToVarExpr) throws AlgebricksException {
+            Map<ILogicalExpression, ILogicalExpression> 
aggregateExprToVarExpr, boolean first)
+            throws AlgebricksException {
         AbstractLogicalOperator op1 = (AbstractLogicalOperator) 
opRef.getValue();
-        context.addToDontApplySet(this, op1);
+        if (!first) {
+            context.addToDontApplySet(this, op1);
+        }
         boolean change = false;
         for (Mutable<ILogicalOperator> child : op1.getInputs()) {
             if (collectVarsBottomUp(child, context, nspListifyVarsCount, 
nspWithAgg, nspAggVarToPlanIndex,
-                    aggregateExprToVarExpr)) {
+                    aggregateExprToVarExpr, false)) {
                 change = true;
             }
         }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java
index f77fc2c..a806836 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java
@@ -88,7 +88,7 @@
         Mutable<ILogicalExpression> joinCondRef = joinOp.getCondition();
         Mutable<ILogicalExpression> clonedCondition = new 
MutableObject<>(joinCondRef.getValue().cloneExpression());

-        if (normalizeVariables(leftEqMap, rightEqMap, clonedCondition) && 
transform(clonedCondition)) {
+        if (normalizeVariables(leftEqMap, rightEqMap, clonedCondition) && 
transform(clonedCondition, context)) {
             // replace the join condition iff the normalization led to a 
minimized circuit
             joinCondRef.setValue(clonedCondition.getValue());
             return true;
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java
index b4292ff..b663c5e 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java
@@ -96,7 +96,8 @@

         // Remove redundant expressions from AND/OR
         if (isAnd(funcExpr) || isOr(funcExpr)) {
-            
InlineAndRemoveRedundantBooleanExpressionsRule.removeRedundantExpressions(args);
+            
InlineAndRemoveRedundantBooleanExpressionsRule.removeRedundantExpressions(args,
+                    
context.getPhysicalOptimizationConfig().getMaxExpressionTreeSize());
             if (args.size() == 1) {
                 // InlineAndRemoveRedundantBooleanExpressionsRule produced a 
single argument return it
                 return args.get(0).getValue();
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java
index 478b81d..8ae209b 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java
@@ -138,6 +138,7 @@
     };

     private static final IOperatorSchema[] _emptySchemas = new 
IOperatorSchema[] {};
+
     private static final Map<FunctionIdentifier, IAObject> FUNC_ID_TO_CONSTANT 
= ImmutableMap
             .of(BuiltinFunctions.NUMERIC_E, new ADouble(Math.E), 
BuiltinFunctions.NUMERIC_PI, new ADouble(Math.PI));
     private final JobGenContext jobGenCtx;
@@ -190,10 +191,18 @@
     @Override
     public Pair<Boolean, ILogicalExpression> 
visitScalarFunctionCallExpression(ScalarFunctionCallExpression expr,
             Void arg) throws AlgebricksException {
+        FunctionIdentifier fid = expr.getFunctionIdentifier();
+
+        // Guard against O(N^2) behavior for large OR/AND trees (e.g. IN with 
a long list expands
+        // into O(N) OR nodes; visiting each one and iterating all siblings is 
O(N^2) total).
+        if ((BuiltinFunctions.OR.equals(fid) || 
BuiltinFunctions.AND.equals(fid))
+                && expr.getArguments().size() > 
optContext.getPhysicalOptimizationConfig().getMaxExpressionTreeSize()) {
+            return new Pair<>(false, expr);
+        }
+
         boolean changed = constantFoldArgs(expr, arg);
         List<Mutable<ILogicalExpression>> argList = expr.getArguments();
         int argConstantCount = countConstantArgs(argList);
-        FunctionIdentifier fid = expr.getFunctionIdentifier();
         if (argConstantCount != argList.size()) {
             if (argConstantCount > 0 && (BuiltinFunctions.OR.equals(fid) || 
BuiltinFunctions.AND.equals(fid))) {
                 if (foldOrAndArgs(expr)) {
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index 11ba44f..6d7deed 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -52,6 +52,7 @@
     "compiler\.indexonly" : true,
     "compiler\.internal\.sanitycheck" : true,
     "compiler\.joinmemory" : 262144,
+    "compiler\.max\.expression\.tree\.size" : 10000,
     "compiler\.max\.variable\.occurrences\.inlining" : 128,
     "compiler.min.groupmemory" : 524288,
     "compiler.min.joinmemory" : 524288,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index b72753c..08922ef 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -52,6 +52,7 @@
     "compiler\.indexonly" : true,
     "compiler\.internal\.sanitycheck" : false,
     "compiler\.joinmemory" : 262144,
+    "compiler\.max\.expression\.tree\.size" : 10000,
     "compiler\.max\.variable\.occurrences\.inlining" : 128,
     "compiler.min.groupmemory" : 524288,
     "compiler.min.joinmemory" : 524288,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 61f0f3a..ecc29f0 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -52,6 +52,7 @@
     "compiler\.indexonly" : true,
     "compiler\.internal\.sanitycheck" : false,
     "compiler\.joinmemory" : 262144,
+    "compiler\.max\.expression\.tree\.size" : 10000,
     "compiler\.max\.variable\.occurrences\.inlining" : 128,
     "compiler.min.groupmemory" : 524288,
     "compiler.min.joinmemory" : 524288,
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
index 418e589..af2b450 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
@@ -157,6 +157,11 @@
                 getRangedIntegerType(0, Integer.MAX_VALUE),
                 128,
                 "Maximum occurrences of a variable allowed in an expression 
for inlining"),
+        COMPILER_MAX_EXPRESSION_TREE_SIZE(
+                getRangedIntegerType(1, Integer.MAX_VALUE),
+                AlgebricksConfig.MAX_EXPRESSION_TREE_SIZE_DEFAULT,
+                "Maximum number of OR/AND arguments before skipping costly 
O(N^2) optimization passes "
+                        + "(e.g. constant folding, CSE) on large IN-list 
queries"),
         COMPILER_ORDERED_FIELDS(BOOLEAN, AlgebricksConfig.ORDERED_FIELDS, 
"Enable/disable select order list"),
         COMPILER_DELTALAKE_FILESPLITS(BOOLEAN, false, "Enable/disable delta 
lake file splits"),
         COMPILER_REWRITE_DISJUNCTION(
@@ -248,6 +253,8 @@
     public static final String COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING_KEY =
             Option.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING.ini();

+    public static final String COMPILER_MAX_EXPRESSION_TREE_SIZE_KEY = 
Option.COMPILER_MAX_EXPRESSION_TREE_SIZE.ini();
+
     public static final String COMPILER_ORDERED_FIELDS_KEY = 
Option.COMPILER_ORDERED_FIELDS.ini();

     public static final int COMPILER_PARALLELISM_AS_STORAGE = 0;
@@ -399,6 +406,10 @@
         return 
accessor.getInt(Option.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING);
     }

+    public int getMaxExpressionTreeSize() {
+        return accessor.getInt(Option.COMPILER_MAX_EXPRESSION_TREE_SIZE);
+    }
+
     public boolean isDeltaLakeFileSplitsEnabled() {
         return accessor.getBoolean(Option.COMPILER_DELTALAKE_FILESPLITS);
     }
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
index 3979542..9f2ac87 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
@@ -99,6 +99,7 @@
                 compilerProperties.isColumnFilter());
         int maxVariableOccurrencesForInlining =
                 getMaxVariableOccurrencesForInlining(compilerProperties, 
querySpecificConfig, sourceLoc);
+        int maxExpressionTreeSize = 
getMaxExpressionTreeSize(compilerProperties, querySpecificConfig, sourceLoc);
         boolean orderFields = getBoolean(querySpecificConfig, 
CompilerProperties.COMPILER_ORDERED_FIELDS_KEY,
                 compilerProperties.isOrderedFields());

@@ -131,6 +132,7 @@
         
physOptConf.setMinGroupFrames(compilerProperties.getMinGroupMemoryFrames());
         
physOptConf.setMinWindowFrames(compilerProperties.getMinWindowMemoryFrames());
         
physOptConf.setMaxVariableOccurrencesForInlining(maxVariableOccurrencesForInlining);
+        physOptConf.setMaxExpressionTreeSize(maxExpressionTreeSize);
         physOptConf.setOrderFields(orderFields);

         // We should have already validated the parameter names at this 
point...
@@ -240,4 +242,16 @@
             throw AsterixException.create(ErrorCode.COMPILATION_ERROR, 
sourceLoc, e.getMessage());
         }
     }
+
+    private static int getMaxExpressionTreeSize(CompilerProperties 
compilerProperties,
+            Map<String, Object> querySpecificConfig, SourceLocation sourceLoc) 
throws AsterixException {
+        String valueInQuery =
+                (String) 
querySpecificConfig.get(CompilerProperties.COMPILER_MAX_EXPRESSION_TREE_SIZE_KEY);
+        try {
+            return valueInQuery == null ? 
compilerProperties.getMaxExpressionTreeSize()
+                    : OptionTypes.POSITIVE_INTEGER.parse(valueInQuery);
+        } catch (IllegalArgumentException e) {
+            throw AsterixException.create(ErrorCode.COMPILATION_ERROR, 
sourceLoc, e.getMessage());
+        }
+    }
 }
diff --git 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
index 5aab406..b3506f4 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
@@ -51,4 +51,5 @@
     public static final int MAX_VARIABLE_OCCURRENCES_INLINING_DEFAULT = 128;
     public static final String HASH_BASED_OR_OPTION = "hash_based_or";
     public static final boolean HASH_BASED_OR_OPTION_DEFAULT = false;
+    public static final int MAX_EXPRESSION_TREE_SIZE_DEFAULT = 10000;
 }
diff --git 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
index 38c1a54..340f626 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
@@ -64,6 +64,7 @@
     private static final String MIN_GROUP_FRAMES = "MIN_GROUP_FRAMES";
     private static final String MIN_WINDOW_FRAMES = "MIN_WINDOW_FRAMES";
     private static final String MAX_VARIABLE_OCCURRENCES_INLINING = 
"MAX_VARIABLE_OCCURRENCES_INLINING";
+    private static final String MAX_EXPRESSION_TREE_SIZE = 
"MAX_EXPRESSION_TREE_SIZE";

     private static final String ORDER_FIELDS = "ORDERED_FIELDS";

@@ -411,6 +412,14 @@
         setInt(MAX_VARIABLE_OCCURRENCES_INLINING, 
maxVariableOccurrencesForInlining);
     }

+    public int getMaxExpressionTreeSize() {
+        return getInt(MAX_EXPRESSION_TREE_SIZE, 
AlgebricksConfig.MAX_EXPRESSION_TREE_SIZE_DEFAULT);
+    }
+
+    public void setMaxExpressionTreeSize(int maxExpressionTreeSize) {
+        setInt(MAX_EXPRESSION_TREE_SIZE, maxExpressionTreeSize);
+    }
+
     private void setInt(String property, int value) {
         properties.setProperty(property, Integer.toString(value));
     }
diff --git 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
index 2c27589..764e8d0 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
@@ -272,33 +272,59 @@
         private IOptimizationContext context;
         private ILogicalOperator op;

+        // Cached live/used variables for the current operator. Avoids 
recomputing them for every
+        // expression node visited within the same operator (which is 
O(plan-size) each time).
+        private Set<LogicalVariable> cachedLiveVars;
+        private List<LogicalVariable> cachedUsedVars;
+
         public void setContext(IOptimizationContext context) {
             this.context = context;
         }

         public void setOperator(ILogicalOperator op) throws 
AlgebricksException {
             this.op = op;
+            // Invalidate the live/used variable caches whenever the target 
operator changes.
+            cachedLiveVars = null;
+            cachedUsedVars = null;
+        }
+
+        /**
+         * Lazily computes and caches the live and used variables for the 
current operator.
+         * Call this before any code path that needs liveVars / usedVars.
+         */
+        private void ensureLiveVarsCached() throws AlgebricksException {
+            if (cachedLiveVars == null) {
+                cachedLiveVars = new HashSet<>();
+                cachedUsedVars = new ArrayList<>();
+                VariableUtilities.getLiveVariables(op, cachedLiveVars);
+                VariableUtilities.getUsedVariables(op, cachedUsedVars);
+            }
         }

         @Override
         public boolean transform(Mutable<ILogicalExpression> exprRef) throws 
AlgebricksException {
             AbstractLogicalExpression expr = (AbstractLogicalExpression) 
exprRef.getValue();
+
+            // Skip variable references and constants early — no CSE benefit, 
and avoids map lookup cost.
+            LogicalExpressionTag tag = expr.getExpressionTag();
+            if (tag == LogicalExpressionTag.VARIABLE || tag == 
LogicalExpressionTag.CONSTANT) {
+                return false;
+            }
+
             boolean modified = false;
             ExprEquivalenceClass exprEqClass = exprEqClassMap.get(expr);
             if (exprEqClass != null) {
                 // Replace common subexpression with existing variable.
                 if (exprEqClass.variableIsSet()) {
                     if (expr.isFunctional()) {
-                        Set<LogicalVariable> liveVars = new HashSet<>();
-                        List<LogicalVariable> usedVars = new ArrayList<>();
-                        VariableUtilities.getLiveVariables(op, liveVars);
-                        VariableUtilities.getUsedVariables(op, usedVars);
+                        ensureLiveVarsCached();
                         // Check if the replacing variable is live at this op.
                         // However, if the op is already using variables that 
are not live,
                         // then a replacement may enable fixing the plan.
                         // This behavior is necessary to, e.g., properly deal 
with distinct by.
                         // Also just replace the expr if we are replacing 
common exprs from within the same operator.
-                        if (liveVars.contains(exprEqClass.getVariable()) || 
!liveVars.containsAll(usedVars)
+                        if (cachedLiveVars.contains(exprEqClass.getVariable())
+                                || !cachedLiveVars.containsAll(cachedUsedVars)
                                 || op == exprEqClass.getFirstOperator()) {
                             VariableReferenceExpression varRef =
                                     new 
VariableReferenceExpression(exprEqClass.getVariable());
@@ -311,11 +337,13 @@
                 } else {
                     if (expr.isFunctional() && 
assignCommonExpression(exprEqClass, expr)) {
                         modified = true;
-                        //re-obtain the live vars after rewriting in the 
method called in the if condition
-                        Set<LogicalVariable> liveVars = new 
HashSet<LogicalVariable>();
-                        VariableUtilities.getLiveVariables(op, liveVars);
-                        //rewrite only when the variable is live
-                        if (liveVars.contains(exprEqClass.getVariable())) {
+                        // assignCommonExpression inserts a new AssignOperator 
into the plan, so the
+                        // previously cached live variables are stale — 
recompute them now.
+                        cachedLiveVars = null;
+                        cachedUsedVars = null;
+                        ensureLiveVarsCached();
+                        // Rewrite only when the variable is live.
+                        if 
(cachedLiveVars.contains(exprEqClass.getVariable())) {
                             VariableReferenceExpression varRef =
                                     new 
VariableReferenceExpression(exprEqClass.getVariable());
                             varRef.setSourceLocation(expr.getSourceLocation());
@@ -326,15 +354,17 @@
                     }
                 }
             } else {
-                if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE
-                        && expr.getExpressionTag() != 
LogicalExpressionTag.CONSTANT) {
+                // Guard against unbounded map growth for very large 
expression trees (e.g. IN with a
+                // long list expands into O(N) OR nodes). Beyond the limit the 
map lookups themselves
+                // become O(N) due to equals() chain traversal, making the 
whole pass O(N^2).
+                if (exprEqClassMap.size() < 
context.getPhysicalOptimizationConfig().getMaxExpressionTreeSize()) {
                     exprEqClass = new ExprEquivalenceClass(op, exprRef);
                     exprEqClassMap.put(expr, exprEqClass);
                 }
             }

             // Descend into function arguments.
-            if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) 
{
+            if (tag == LogicalExpressionTag.FUNCTION_CALL) {
                 AbstractFunctionCallExpression funcExpr = 
(AbstractFunctionCallExpression) expr;
                 for (Mutable<ILogicalExpression> arg : 
funcExpr.getArguments()) {
                     if (transform(arg)) {
diff --git 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
index ab6d91a..629ac92 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
@@ -103,16 +103,11 @@
      * variables if they are used afterwards.
      */
     private Set<LogicalVariable> 
removeAssignVarFromConsideration(Mutable<ILogicalOperator> opRef) {
+        ILogicalOperator op = opRef.getValue();
         Set<LogicalVariable> assignVarsSetForThisOp = null;
-        Set<LogicalVariable> usedVarsSetForThisOp = null;
+        Set<LogicalVariable> usedVarsSetForThisOp = 
accumulatedUsedVarFromRootMap.get(opRef);

-        if (accumulatedUsedVarFromRootMap.containsKey(opRef)) {
-            usedVarsSetForThisOp = accumulatedUsedVarFromRootMap.get(opRef);
-        }
-
-        if (assignedVarMap.containsKey(opRef)) {
-            assignVarsSetForThisOp = assignedVarMap.get(opRef);
-        }
+        assignVarsSetForThisOp = assignedVarMap.get(opRef);

         if (assignVarsSetForThisOp != null && 
!assignVarsSetForThisOp.isEmpty()) {
             Iterator<LogicalVariable> varIter = 
assignVarsSetForThisOp.iterator();
@@ -128,9 +123,9 @@
         // The source variables of the UNIONALL operator should be survived
         // since we are sure that the output of UNIONALL operator is used
         // afterwards.
-        if (opRef.getValue().getOperatorTag() == LogicalOperatorTag.UNIONALL) {
+        if (op.getOperatorTag() == LogicalOperatorTag.UNIONALL) {
             Iterator<Triple<LogicalVariable, LogicalVariable, 
LogicalVariable>> iter =
-                    ((UnionAllOperator) 
opRef.getValue()).getVariableMappings().iterator();
+                    ((UnionAllOperator) op).getVariableMappings().iterator();
             while (iter.hasNext()) {
                 Triple<LogicalVariable, LogicalVariable, LogicalVariable> 
varMapping = iter.next();
                 survivedUnionSourceVarSet.add(varMapping.first);
@@ -408,8 +403,7 @@
                         // definition, which is for rebinding the referred 
variable.
                         VariableReferenceExpression varExpr =
                                 (VariableReferenceExpression) 
decorMapping.second.getValue();
-                        LogicalVariable reboundDecorVar = 
varExpr.getVariableReference();
-                        assignVarsSetInThisOp.add(reboundDecorVar);
+                        
assignVarsSetInThisOp.add(varExpr.getVariableReference());
                     }
                 }
                 break;
@@ -418,6 +412,8 @@
                 assignVarsSetInThisOp.addAll(winOp.getVariables());
                 targetOpFound = true;
                 break;
+            default:
+                break;
         }

         if (targetOpFound) {
@@ -433,10 +429,10 @@
             if (accumulatedUsedVarFromRootMap.containsKey(opRef)) {
                 
accumulatedUsedVarFromRootMap.get(opRef).addAll(accumulatedUsedVarFromRootSet);
             } else {
-                accumulatedUsedVarFromRootMap.put(opRef, new 
HashSet<LogicalVariable>(accumulatedUsedVarFromRootSet));
+                accumulatedUsedVarFromRootMap.put(opRef, 
accumulatedUsedVarFromRootSet);
             }
         } else {
-            accumulatedUsedVarFromRootMap.put(opRef, new 
HashSet<LogicalVariable>(accumulatedUsedVarFromRootSet));
+            accumulatedUsedVarFromRootMap.put(opRef, 
accumulatedUsedVarFromRootSet);
         }

         for (Mutable<ILogicalOperator> c : op.getInputs()) {

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20987?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: Iff383be11f351df3b74657b7128b914b9cb569a4
Gerrit-Change-Number: 20987
Gerrit-PatchSet: 14
Gerrit-Owner: Peeyush Gupta <[email protected]>
Gerrit-Reviewer: Ali Alsuliman <[email protected]>
Gerrit-Reviewer: Anon. E. Moose #1000171
Gerrit-Reviewer: Jenkins <[email protected]>
Gerrit-Reviewer: Murtadha Hubail <[email protected]>
Gerrit-Reviewer: Peeyush Gupta <[email protected]>

Reply via email to