>From Peeyush Gupta <[email protected]>: Peeyush Gupta has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20987?usp=email )
Change subject: [NO ISSUE][COMP] Improve compile time ...................................................................... [NO ISSUE][COMP] Improve compile time - user model changes: no - storage format changes: no - interface changes: no Ext-ref: MB-68548 Change-Id: Iff383be11f351df3b74657b7128b914b9cb569a4 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20987 Tested-by: Peeyush Gupta <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm M asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java M asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java M hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java M hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java M hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java M hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java 15 files changed, 137 insertions(+), 46 deletions(-) Approvals: Peeyush Gupta: Verified Ali Alsuliman: Looks good to me, approved diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java index 2dded4a..f823c4b 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/AbstractConditionExpressionRule.java @@ -55,7 +55,7 @@ this.context = context; - boolean changed = transform(condRef); + boolean changed = transform(condRef, context); if (changed) { context.computeAndSetTypeEnvironmentForOperator(op); } @@ -82,5 +82,5 @@ * @return {@code <code>true</code>} condition has been modified * {@code <code>false</code>} otherwise. */ - protected abstract boolean transform(Mutable<ILogicalExpression> condRef); + protected abstract boolean transform(Mutable<ILogicalExpression> condRef, IOptimizationContext context); } diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java index dfd1781..8dd6fce 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/InlineAndRemoveRedundantBooleanExpressionsRule.java @@ -23,6 +23,7 @@ import org.apache.asterix.lang.common.util.FunctionUtil; import org.apache.commons.lang3.mutable.Mutable; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; +import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; @@ -40,7 +41,12 @@ public class InlineAndRemoveRedundantBooleanExpressionsRule extends AbstractConditionExpressionRule { @Override - protected boolean transform(Mutable<ILogicalExpression> condRef) { + protected boolean transform(Mutable<ILogicalExpression> condRef, IOptimizationContext context) { + int maxExprTreeSize = context.getPhysicalOptimizationConfig().getMaxExpressionTreeSize(); + return transform(condRef, maxExprTreeSize); + } + + private boolean transform(Mutable<ILogicalExpression> condRef, int maxExprTreeSize) { AbstractFunctionCallExpression function = getFunctionExpression(condRef.getValue()); if (function == null) { return false; @@ -48,18 +54,20 @@ boolean changed = false; for (Mutable<ILogicalExpression> argRef : function.getArguments()) { - changed |= transform(argRef); + changed |= transform(argRef, maxExprTreeSize); } final FunctionIdentifier fid = function.getFunctionIdentifier(); if (AlgebricksBuiltinFunctions.AND.equals(fid) || AlgebricksBuiltinFunctions.OR.equals(fid)) { - changed |= inlineCondition(function); - changed |= removeRedundantExpressions(function.getArguments()); + if (function.getArguments().size() <= maxExprTreeSize) { + changed |= inlineCondition(function); + changed |= removeRedundantExpressions(function.getArguments(), maxExprTreeSize); - //Special case: disjuncts/conjuncts have been factored out into a single (non-disjunct/conjunct) expression - if (function.getArguments().size() == 1) { - final ILogicalExpression newCond = function.getArguments().get(0).getValue(); - condRef.setValue(newCond); + //Special case: disjuncts/conjuncts have been factored out into a single (non-disjunct/conjunct) expression + if (function.getArguments().size() == 1) { + final ILogicalExpression newCond = function.getArguments().get(0).getValue(); + condRef.setValue(newCond); + } } } @@ -86,7 +94,10 @@ return changed; } - public static boolean removeRedundantExpressions(List<Mutable<ILogicalExpression>> exprs) { + public static boolean removeRedundantExpressions(List<Mutable<ILogicalExpression>> exprs, int maxExprTreeSize) { + if (exprs.size() > maxExprTreeSize) { + return false; + } final int originalSize = exprs.size(); int i = 0; while (i < exprs.size()) { diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java index 378c758..a78944c 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushAggregateIntoNestedSubplanRule.java @@ -57,13 +57,16 @@ @Override public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { + if (context.checkIfInDontApplySet(this, opRef.get())) { + return false; + } Map<LogicalVariable, Integer> nspAggVars = new HashMap<>(); Map<LogicalVariable, Integer> nspAggVarToPlanIndex = new HashMap<>(); Map<LogicalVariable, AbstractOperatorWithNestedPlans> nspWithAgg = new HashMap<>(); Map<ILogicalExpression, ILogicalExpression> aggExprToVarExpr = new HashMap<>(); // first collect vars. referring to listified sequences - boolean changed = - collectVarsBottomUp(opRef, context, nspAggVars, nspWithAgg, nspAggVarToPlanIndex, aggExprToVarExpr); + boolean changed = collectVarsBottomUp(opRef, context, nspAggVars, nspWithAgg, nspAggVarToPlanIndex, + aggExprToVarExpr, true); if (changed) { removeRedundantListifies(nspAggVars, nspWithAgg, nspAggVarToPlanIndex); } @@ -116,13 +119,16 @@ Map<LogicalVariable, Integer> nspListifyVarsCount, Map<LogicalVariable, AbstractOperatorWithNestedPlans> nspWithAgg, Map<LogicalVariable, Integer> nspAggVarToPlanIndex, - Map<ILogicalExpression, ILogicalExpression> aggregateExprToVarExpr) throws AlgebricksException { + Map<ILogicalExpression, ILogicalExpression> aggregateExprToVarExpr, boolean first) + throws AlgebricksException { AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue(); - context.addToDontApplySet(this, op1); + if (!first) { + context.addToDontApplySet(this, op1); + } boolean change = false; for (Mutable<ILogicalOperator> child : op1.getInputs()) { if (collectVarsBottomUp(child, context, nspListifyVarsCount, nspWithAgg, nspAggVarToPlanIndex, - aggregateExprToVarExpr)) { + aggregateExprToVarExpr, false)) { change = true; } } diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java index f77fc2c..a806836 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/RemoveRedundantBooleanExpressionsInJoinRule.java @@ -88,7 +88,7 @@ Mutable<ILogicalExpression> joinCondRef = joinOp.getCondition(); Mutable<ILogicalExpression> clonedCondition = new MutableObject<>(joinCondRef.getValue().cloneExpression()); - if (normalizeVariables(leftEqMap, rightEqMap, clonedCondition) && transform(clonedCondition)) { + if (normalizeVariables(leftEqMap, rightEqMap, clonedCondition) && transform(clonedCondition, context)) { // replace the join condition iff the normalization led to a minimized circuit joinCondRef.setValue(clonedCondition.getValue()); return true; diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java index b4292ff..b663c5e 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/processor/InlineAndNormalizeFilterExpressionsProcessor.java @@ -96,7 +96,8 @@ // Remove redundant expressions from AND/OR if (isAnd(funcExpr) || isOr(funcExpr)) { - InlineAndRemoveRedundantBooleanExpressionsRule.removeRedundantExpressions(args); + InlineAndRemoveRedundantBooleanExpressionsRule.removeRedundantExpressions(args, + context.getPhysicalOptimizationConfig().getMaxExpressionTreeSize()); if (args.size() == 1) { // InlineAndRemoveRedundantBooleanExpressionsRule produced a single argument return it return args.get(0).getValue(); diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java index 478b81d..8ae209b 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/visitor/ConstantFoldingVisitor.java @@ -138,6 +138,7 @@ }; private static final IOperatorSchema[] _emptySchemas = new IOperatorSchema[] {}; + private static final Map<FunctionIdentifier, IAObject> FUNC_ID_TO_CONSTANT = ImmutableMap .of(BuiltinFunctions.NUMERIC_E, new ADouble(Math.E), BuiltinFunctions.NUMERIC_PI, new ADouble(Math.PI)); private final JobGenContext jobGenCtx; @@ -190,10 +191,18 @@ @Override public Pair<Boolean, ILogicalExpression> visitScalarFunctionCallExpression(ScalarFunctionCallExpression expr, Void arg) throws AlgebricksException { + FunctionIdentifier fid = expr.getFunctionIdentifier(); + + // Guard against O(N^2) behavior for large OR/AND trees (e.g. IN with a long list expands + // into O(N) OR nodes; visiting each one and iterating all siblings is O(N^2) total). + if ((BuiltinFunctions.OR.equals(fid) || BuiltinFunctions.AND.equals(fid)) + && expr.getArguments().size() > optContext.getPhysicalOptimizationConfig().getMaxExpressionTreeSize()) { + return new Pair<>(false, expr); + } + boolean changed = constantFoldArgs(expr, arg); List<Mutable<ILogicalExpression>> argList = expr.getArguments(); int argConstantCount = countConstantArgs(argList); - FunctionIdentifier fid = expr.getFunctionIdentifier(); if (argConstantCount != argList.size()) { if (argConstantCount > 0 && (BuiltinFunctions.OR.equals(fid) || BuiltinFunctions.AND.equals(fid))) { if (foldOrAndArgs(expr)) { diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm index 11ba44f..6d7deed 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm @@ -52,6 +52,7 @@ "compiler\.indexonly" : true, "compiler\.internal\.sanitycheck" : true, "compiler\.joinmemory" : 262144, + "compiler\.max\.expression\.tree\.size" : 10000, "compiler\.max\.variable\.occurrences\.inlining" : 128, "compiler.min.groupmemory" : 524288, "compiler.min.joinmemory" : 524288, diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm index b72753c..08922ef 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm @@ -52,6 +52,7 @@ "compiler\.indexonly" : true, "compiler\.internal\.sanitycheck" : false, "compiler\.joinmemory" : 262144, + "compiler\.max\.expression\.tree\.size" : 10000, "compiler\.max\.variable\.occurrences\.inlining" : 128, "compiler.min.groupmemory" : 524288, "compiler.min.joinmemory" : 524288, diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm index 61f0f3a..ecc29f0 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm @@ -52,6 +52,7 @@ "compiler\.indexonly" : true, "compiler\.internal\.sanitycheck" : false, "compiler\.joinmemory" : 262144, + "compiler\.max\.expression\.tree\.size" : 10000, "compiler\.max\.variable\.occurrences\.inlining" : 128, "compiler.min.groupmemory" : 524288, "compiler.min.joinmemory" : 524288, diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java index 418e589..af2b450 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java @@ -157,6 +157,11 @@ getRangedIntegerType(0, Integer.MAX_VALUE), 128, "Maximum occurrences of a variable allowed in an expression for inlining"), + COMPILER_MAX_EXPRESSION_TREE_SIZE( + getRangedIntegerType(1, Integer.MAX_VALUE), + AlgebricksConfig.MAX_EXPRESSION_TREE_SIZE_DEFAULT, + "Maximum number of OR/AND arguments before skipping costly O(N^2) optimization passes " + + "(e.g. constant folding, CSE) on large IN-list queries"), COMPILER_ORDERED_FIELDS(BOOLEAN, AlgebricksConfig.ORDERED_FIELDS, "Enable/disable select order list"), COMPILER_DELTALAKE_FILESPLITS(BOOLEAN, false, "Enable/disable delta lake file splits"), COMPILER_REWRITE_DISJUNCTION( @@ -248,6 +253,8 @@ public static final String COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING_KEY = Option.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING.ini(); + public static final String COMPILER_MAX_EXPRESSION_TREE_SIZE_KEY = Option.COMPILER_MAX_EXPRESSION_TREE_SIZE.ini(); + public static final String COMPILER_ORDERED_FIELDS_KEY = Option.COMPILER_ORDERED_FIELDS.ini(); public static final int COMPILER_PARALLELISM_AS_STORAGE = 0; @@ -399,6 +406,10 @@ return accessor.getInt(Option.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING); } + public int getMaxExpressionTreeSize() { + return accessor.getInt(Option.COMPILER_MAX_EXPRESSION_TREE_SIZE); + } + public boolean isDeltaLakeFileSplitsEnabled() { return accessor.getBoolean(Option.COMPILER_DELTALAKE_FILESPLITS); } diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java index 3979542..9f2ac87 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java @@ -99,6 +99,7 @@ compilerProperties.isColumnFilter()); int maxVariableOccurrencesForInlining = getMaxVariableOccurrencesForInlining(compilerProperties, querySpecificConfig, sourceLoc); + int maxExpressionTreeSize = getMaxExpressionTreeSize(compilerProperties, querySpecificConfig, sourceLoc); boolean orderFields = getBoolean(querySpecificConfig, CompilerProperties.COMPILER_ORDERED_FIELDS_KEY, compilerProperties.isOrderedFields()); @@ -131,6 +132,7 @@ physOptConf.setMinGroupFrames(compilerProperties.getMinGroupMemoryFrames()); physOptConf.setMinWindowFrames(compilerProperties.getMinWindowMemoryFrames()); physOptConf.setMaxVariableOccurrencesForInlining(maxVariableOccurrencesForInlining); + physOptConf.setMaxExpressionTreeSize(maxExpressionTreeSize); physOptConf.setOrderFields(orderFields); // We should have already validated the parameter names at this point... @@ -240,4 +242,16 @@ throw AsterixException.create(ErrorCode.COMPILATION_ERROR, sourceLoc, e.getMessage()); } } + + private static int getMaxExpressionTreeSize(CompilerProperties compilerProperties, + Map<String, Object> querySpecificConfig, SourceLocation sourceLoc) throws AsterixException { + String valueInQuery = + (String) querySpecificConfig.get(CompilerProperties.COMPILER_MAX_EXPRESSION_TREE_SIZE_KEY); + try { + return valueInQuery == null ? compilerProperties.getMaxExpressionTreeSize() + : OptionTypes.POSITIVE_INTEGER.parse(valueInQuery); + } catch (IllegalArgumentException e) { + throw AsterixException.create(ErrorCode.COMPILATION_ERROR, sourceLoc, e.getMessage()); + } + } } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java index 5aab406..b3506f4 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java @@ -51,4 +51,5 @@ public static final int MAX_VARIABLE_OCCURRENCES_INLINING_DEFAULT = 128; public static final String HASH_BASED_OR_OPTION = "hash_based_or"; public static final boolean HASH_BASED_OR_OPTION_DEFAULT = false; + public static final int MAX_EXPRESSION_TREE_SIZE_DEFAULT = 10000; } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java index 38c1a54..340f626 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java @@ -64,6 +64,7 @@ private static final String MIN_GROUP_FRAMES = "MIN_GROUP_FRAMES"; private static final String MIN_WINDOW_FRAMES = "MIN_WINDOW_FRAMES"; private static final String MAX_VARIABLE_OCCURRENCES_INLINING = "MAX_VARIABLE_OCCURRENCES_INLINING"; + private static final String MAX_EXPRESSION_TREE_SIZE = "MAX_EXPRESSION_TREE_SIZE"; private static final String ORDER_FIELDS = "ORDERED_FIELDS"; @@ -411,6 +412,14 @@ setInt(MAX_VARIABLE_OCCURRENCES_INLINING, maxVariableOccurrencesForInlining); } + public int getMaxExpressionTreeSize() { + return getInt(MAX_EXPRESSION_TREE_SIZE, AlgebricksConfig.MAX_EXPRESSION_TREE_SIZE_DEFAULT); + } + + public void setMaxExpressionTreeSize(int maxExpressionTreeSize) { + setInt(MAX_EXPRESSION_TREE_SIZE, maxExpressionTreeSize); + } + private void setInt(String property, int value) { properties.setProperty(property, Integer.toString(value)); } diff --git a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java index 2c27589..764e8d0 100644 --- a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java +++ b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java @@ -272,33 +272,59 @@ private IOptimizationContext context; private ILogicalOperator op; + // Cached live/used variables for the current operator. Avoids recomputing them for every + // expression node visited within the same operator (which is O(plan-size) each time). + private Set<LogicalVariable> cachedLiveVars; + private List<LogicalVariable> cachedUsedVars; + public void setContext(IOptimizationContext context) { this.context = context; } public void setOperator(ILogicalOperator op) throws AlgebricksException { this.op = op; + // Invalidate the live/used variable caches whenever the target operator changes. + cachedLiveVars = null; + cachedUsedVars = null; + } + + /** + * Lazily computes and caches the live and used variables for the current operator. + * Call this before any code path that needs liveVars / usedVars. + */ + private void ensureLiveVarsCached() throws AlgebricksException { + if (cachedLiveVars == null) { + cachedLiveVars = new HashSet<>(); + cachedUsedVars = new ArrayList<>(); + VariableUtilities.getLiveVariables(op, cachedLiveVars); + VariableUtilities.getUsedVariables(op, cachedUsedVars); + } } @Override public boolean transform(Mutable<ILogicalExpression> exprRef) throws AlgebricksException { AbstractLogicalExpression expr = (AbstractLogicalExpression) exprRef.getValue(); + + // Skip variable references and constants early — no CSE benefit, and avoids map lookup cost. + LogicalExpressionTag tag = expr.getExpressionTag(); + if (tag == LogicalExpressionTag.VARIABLE || tag == LogicalExpressionTag.CONSTANT) { + return false; + } + boolean modified = false; ExprEquivalenceClass exprEqClass = exprEqClassMap.get(expr); if (exprEqClass != null) { // Replace common subexpression with existing variable. if (exprEqClass.variableIsSet()) { if (expr.isFunctional()) { - Set<LogicalVariable> liveVars = new HashSet<>(); - List<LogicalVariable> usedVars = new ArrayList<>(); - VariableUtilities.getLiveVariables(op, liveVars); - VariableUtilities.getUsedVariables(op, usedVars); + ensureLiveVarsCached(); // Check if the replacing variable is live at this op. // However, if the op is already using variables that are not live, // then a replacement may enable fixing the plan. // This behavior is necessary to, e.g., properly deal with distinct by. // Also just replace the expr if we are replacing common exprs from within the same operator. - if (liveVars.contains(exprEqClass.getVariable()) || !liveVars.containsAll(usedVars) + if (cachedLiveVars.contains(exprEqClass.getVariable()) + || !cachedLiveVars.containsAll(cachedUsedVars) || op == exprEqClass.getFirstOperator()) { VariableReferenceExpression varRef = new VariableReferenceExpression(exprEqClass.getVariable()); @@ -311,11 +337,13 @@ } else { if (expr.isFunctional() && assignCommonExpression(exprEqClass, expr)) { modified = true; - //re-obtain the live vars after rewriting in the method called in the if condition - Set<LogicalVariable> liveVars = new HashSet<LogicalVariable>(); - VariableUtilities.getLiveVariables(op, liveVars); - //rewrite only when the variable is live - if (liveVars.contains(exprEqClass.getVariable())) { + // assignCommonExpression inserts a new AssignOperator into the plan, so the + // previously cached live variables are stale — recompute them now. + cachedLiveVars = null; + cachedUsedVars = null; + ensureLiveVarsCached(); + // Rewrite only when the variable is live. + if (cachedLiveVars.contains(exprEqClass.getVariable())) { VariableReferenceExpression varRef = new VariableReferenceExpression(exprEqClass.getVariable()); varRef.setSourceLocation(expr.getSourceLocation()); @@ -326,15 +354,17 @@ } } } else { - if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE - && expr.getExpressionTag() != LogicalExpressionTag.CONSTANT) { + // Guard against unbounded map growth for very large expression trees (e.g. IN with a + // long list expands into O(N) OR nodes). Beyond the limit the map lookups themselves + // become O(N) due to equals() chain traversal, making the whole pass O(N^2). + if (exprEqClassMap.size() < context.getPhysicalOptimizationConfig().getMaxExpressionTreeSize()) { exprEqClass = new ExprEquivalenceClass(op, exprRef); exprEqClassMap.put(expr, exprEqClass); } } // Descend into function arguments. - if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) { + if (tag == LogicalExpressionTag.FUNCTION_CALL) { AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr; for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) { if (transform(arg)) { diff --git a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java index ab6d91a..629ac92 100644 --- a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java +++ b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java @@ -103,16 +103,11 @@ * variables if they are used afterwards. */ private Set<LogicalVariable> removeAssignVarFromConsideration(Mutable<ILogicalOperator> opRef) { + ILogicalOperator op = opRef.getValue(); Set<LogicalVariable> assignVarsSetForThisOp = null; - Set<LogicalVariable> usedVarsSetForThisOp = null; + Set<LogicalVariable> usedVarsSetForThisOp = accumulatedUsedVarFromRootMap.get(opRef); - if (accumulatedUsedVarFromRootMap.containsKey(opRef)) { - usedVarsSetForThisOp = accumulatedUsedVarFromRootMap.get(opRef); - } - - if (assignedVarMap.containsKey(opRef)) { - assignVarsSetForThisOp = assignedVarMap.get(opRef); - } + assignVarsSetForThisOp = assignedVarMap.get(opRef); if (assignVarsSetForThisOp != null && !assignVarsSetForThisOp.isEmpty()) { Iterator<LogicalVariable> varIter = assignVarsSetForThisOp.iterator(); @@ -128,9 +123,9 @@ // The source variables of the UNIONALL operator should be survived // since we are sure that the output of UNIONALL operator is used // afterwards. - if (opRef.getValue().getOperatorTag() == LogicalOperatorTag.UNIONALL) { + if (op.getOperatorTag() == LogicalOperatorTag.UNIONALL) { Iterator<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> iter = - ((UnionAllOperator) opRef.getValue()).getVariableMappings().iterator(); + ((UnionAllOperator) op).getVariableMappings().iterator(); while (iter.hasNext()) { Triple<LogicalVariable, LogicalVariable, LogicalVariable> varMapping = iter.next(); survivedUnionSourceVarSet.add(varMapping.first); @@ -408,8 +403,7 @@ // definition, which is for rebinding the referred variable. VariableReferenceExpression varExpr = (VariableReferenceExpression) decorMapping.second.getValue(); - LogicalVariable reboundDecorVar = varExpr.getVariableReference(); - assignVarsSetInThisOp.add(reboundDecorVar); + assignVarsSetInThisOp.add(varExpr.getVariableReference()); } } break; @@ -418,6 +412,8 @@ assignVarsSetInThisOp.addAll(winOp.getVariables()); targetOpFound = true; break; + default: + break; } if (targetOpFound) { @@ -433,10 +429,10 @@ if (accumulatedUsedVarFromRootMap.containsKey(opRef)) { accumulatedUsedVarFromRootMap.get(opRef).addAll(accumulatedUsedVarFromRootSet); } else { - accumulatedUsedVarFromRootMap.put(opRef, new HashSet<LogicalVariable>(accumulatedUsedVarFromRootSet)); + accumulatedUsedVarFromRootMap.put(opRef, accumulatedUsedVarFromRootSet); } } else { - accumulatedUsedVarFromRootMap.put(opRef, new HashSet<LogicalVariable>(accumulatedUsedVarFromRootSet)); + accumulatedUsedVarFromRootMap.put(opRef, accumulatedUsedVarFromRootSet); } for (Mutable<ILogicalOperator> c : op.getInputs()) { -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20987?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: phoenix Gerrit-Change-Id: Iff383be11f351df3b74657b7128b914b9cb569a4 Gerrit-Change-Number: 20987 Gerrit-PatchSet: 14 Gerrit-Owner: Peeyush Gupta <[email protected]> Gerrit-Reviewer: Ali Alsuliman <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Peeyush Gupta <[email protected]>
