>From Vijay Sarathy <[email protected]>: Vijay Sarathy has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17483 )
Change subject: [ASTERIXDB-3163][COMP] CH2 query was not being optimized by CBO ...................................................................... [ASTERIXDB-3163][COMP] CH2 query was not being optimized by CBO Change-Id: If2aab5785091464be9a529aacdcda3a9bba70310 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17483 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: <[email protected]> Reviewed-by: Vijay Sarathy <[email protected]> --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java 2 files changed, 124 insertions(+), 100 deletions(-) Approvals: [email protected]: Looks good to me, but someone else must approve Vijay Sarathy: Looks good to me, approved Jenkins: Verified; Verified Anon. E. Moose #1000171: diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java index 0889856..b9d6050 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java @@ -46,7 +46,6 @@ import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression; import org.apache.hyracks.algebricks.core.algebra.expressions.HashJoinExpressionAnnotation; import org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation; -import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression; import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator; @@ -95,8 +94,7 @@ // If cboMode is true, then all datasets need to have samples, otherwise the check in doAllDataSourcesHaveSamples() // further below will return false. ILogicalOperator op = opRef.getValue(); - if (!((op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) - || ((op.getOperatorTag() == LogicalOperatorTag.DISTRIBUTE_RESULT)))) { + if (!(joinClause(op) || ((op.getOperatorTag() == LogicalOperatorTag.DISTRIBUTE_RESULT)))) { return false; } @@ -106,17 +104,16 @@ } List<ILogicalOperator> joinOps = new ArrayList<>(); - List<ILogicalOperator> internalEdges = new ArrayList<>(); HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>(); // The data scan operators. Will be in the order of the from clause. // Important for position ordering when assigning bits to join expressions. List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps = new ArrayList<>(); - HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap = new HashMap<>(); + List<AssignOperator> assignOps = new ArrayList<>(); IPlanPrettyPrinter pp = context.getPrettyPrinter(); printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1"); - boolean canTransform = getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, - dataSourceEmptyTupleHashMap, internalEdges, joinOps); + boolean canTransform = + getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, joinOps, assignOps); if (!canTransform) { return false; @@ -133,8 +130,7 @@ int numberOfFromTerms = emptyTupleAndDataSourceOps.size(); joinEnum.initEnum((AbstractLogicalOperator) op, cboMode, cboTestMode, numberOfFromTerms, - emptyTupleAndDataSourceOps, joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps, - context); + emptyTupleAndDataSourceOps, joinLeafInputsHashMap, joinOps, assignOps, context); if (cboMode) { if (!doAllDataSourcesHaveSamples(emptyTupleAndDataSourceOps, context)) { @@ -142,8 +138,18 @@ } } - if (internalEdges.size() > 0) { - pushAssignsIntoLeafInputs(joinLeafInputsHashMap, internalEdges); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("---------------------------- Printing Leaf Inputs1"); + printLeafPlans(pp, joinLeafInputsHashMap); + } + + if (assignOps.size() > 0) { + pushAssignsIntoLeafInputs(pp, joinLeafInputsHashMap, assignOps); + } + + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("---------------------------- Printing Leaf Inputs2"); + printLeafPlans(pp, joinLeafInputsHashMap); } int cheapestPlan = joinEnum.enumerateJoins(); // MAIN CALL INTO CBO @@ -156,7 +162,7 @@ if (numberOfFromTerms > 1) { buildNewTree(cheapestPlanNode, joinLeafInputsHashMap, joinOps, new MutableInt(0)); printPlan(pp, (AbstractLogicalOperator) joinOps.get(0), "New Whole Plan after buildNewTree 1"); - ILogicalOperator root = addConstantInternalEdgesAtTheTop(joinOps.get(0), internalEdges); + ILogicalOperator root = addRemainingAssignsAtTheTop(joinOps.get(0), assignOps); printPlan(pp, (AbstractLogicalOperator) joinOps.get(0), "New Whole Plan after buildNewTree 2"); printPlan(pp, (AbstractLogicalOperator) root, "New Whole Plan after buildNewTree"); // this will be the new root @@ -184,6 +190,14 @@ return true; } + private boolean joinClause(ILogicalOperator op) { + if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) + return true; + //if (op.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN) + //return true; + return false; + } + private boolean getCBOMode(IOptimizationContext context) { PhysicalOptimizationConfig physOptConfig = context.getPhysicalOptimizationConfig(); return physOptConfig.getCBOMode(); @@ -221,12 +235,51 @@ * Check to see if there is only one assign here and nothing below that other than a join. * have not seen cases where there is more than one assign in a leafinput. */ - private boolean onlyOneAssign(ILogicalOperator nextOp) { - if (nextOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) { - return false; + private boolean onlyOneAssign(ILogicalOperator op, List<AssignOperator> assignOps) { + if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) { + AssignOperator aOp = (AssignOperator) op; + assignOps.add(aOp); + op = op.getInputs().get(0).getValue(); } - List<Mutable<ILogicalOperator>> nextOpInputs = nextOp.getInputs(); - return nextOpInputs.get(0).getValue().getOperatorTag() == LogicalOperatorTag.INNERJOIN; + return (joinClause(op)); + } + + // An internal edge must contain only assigns followed by an inner join + private int numVarRefExprs(AssignOperator aOp) { + List<Mutable<ILogicalExpression>> exprs = aOp.getExpressions(); + int count = 0; + for (Mutable<ILogicalExpression> exp : exprs) { + if (exp.getValue().getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) { + AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) exp.getValue(); + for (Mutable<ILogicalExpression> arg : afcExpr.getArguments()) { + if (arg.getValue().getExpressionTag() == LogicalExpressionTag.VARIABLE) { + count++; + } + } + } + } + + return count; + } + + private boolean onlyAssigns(ILogicalOperator op, List<AssignOperator> assignOps) { + while (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) { + AssignOperator aOp = (AssignOperator) op; + int count = numVarRefExprs(aOp); + if (count > 1) { + return false; + } + assignOps.add(aOp); + op = op.getInputs().get(0).getValue(); + } + return (joinClause(op)); + } + + private ILogicalOperator skipPastAssigns(ILogicalOperator nextOp) { + while (nextOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) { + nextOp = nextOp.getInputs().get(0).getValue(); + } + return nextOp; } private ILogicalOperator findSelectOrDataScan(ILogicalOperator op) { @@ -254,20 +307,19 @@ */ private boolean getJoinOpsAndLeafInputs(ILogicalOperator op, List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps, - HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, - HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap, - List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps) { + HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps, + List<AssignOperator> assignOps) { if (op.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN) { return false; } - if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) { + if (joinClause(op)) { joinOps.add(op); for (int i = 0; i < 2; i++) { ILogicalOperator nextOp = op.getInputs().get(i).getValue(); boolean canTransform = getJoinOpsAndLeafInputs(nextOp, emptyTupleAndDataSourceOps, - joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps); + joinLeafInputsHashMap, joinOps, assignOps); if (!canTransform) { return false; } @@ -288,20 +340,18 @@ } else { joinLeafInputsHashMap.put(etsOp, op); } - dataSourceEmptyTupleHashMap.put(dataSourceOp, etsOp); } else { // This must be an internal edge - if (onlyOneAssign(op)) { + if (onlyAssigns(op, assignOps)) { + //if (onlyOneAssign(op, assignOps)) { // currently, will handle only assign statement and nothing else in an internal Edge. // we can lift this restriction later if the need arises. This just makes some code easier. - internalEdges.add(op); - boolean canTransform = - getJoinOpsAndLeafInputs(op.getInputs().get(0).getValue(), emptyTupleAndDataSourceOps, - joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps); + + ILogicalOperator skipAssisgnsOp = skipPastAssigns(op); + boolean canTransform = getJoinOpsAndLeafInputs(skipAssisgnsOp, emptyTupleAndDataSourceOps, + joinLeafInputsHashMap, joinOps, assignOps); if (!canTransform) { return false; } - - //internalEdges.add(op); // better to store the parent; do this soon. } else { return false; } @@ -370,16 +420,12 @@ } } - //Internal edges are assign statements. The RHS has a variable in it. - // We need to find the internal edge that has a variable coming from this leaf leafInput. - private int findInternalEdge(ILogicalOperator leafInput, List<ILogicalOperator> internalEdges) - throws AlgebricksException { + private int findAssignOp(ILogicalOperator leafInput, List<AssignOperator> assignOps) throws AlgebricksException { int i = -1; - for (ILogicalOperator ie : internalEdges) { + for (AssignOperator aOp : assignOps) { i++; // this will be an Assign, so no need to check - AssignOperator aOp = (AssignOperator) ie; List<LogicalVariable> vars = new ArrayList<>(); aOp.getExpressions().get(0).getValue().getUsedVariables(vars); HashSet<LogicalVariable> vars2 = new HashSet<>(); @@ -392,11 +438,9 @@ return -1; } - private ILogicalOperator addAssignToLeafInput(ILogicalOperator leafInput, ILogicalOperator internalEdge) { - ILogicalOperator root = leafInput; + private ILogicalOperator addAssignToLeafInput(ILogicalOperator leafInput, AssignOperator aOp) { // this will be an Assign, so no need to check - AssignOperator aOp = (AssignOperator) internalEdge; - aOp.getInputs().get(0).setValue(root); + aOp.getInputs().get(0).setValue(leafInput); return aOp; } @@ -530,15 +574,11 @@ // in some very rare cases, there is an internal edge that has an assign statement such as $$var = 20 but this variable // is not used anywhere in the current join graph but is used outside the current join graph. So we add this assign to the top of // our plan, so the rest of the code will be happy. Strange that this assign appears in the join graph. - private ILogicalOperator addConstantInternalEdgesAtTheTop(ILogicalOperator op, - List<ILogicalOperator> internalEdges) { - if (internalEdges.size() == 0) { - return op; - } + + private ILogicalOperator addRemainingAssignsAtTheTop(ILogicalOperator op, List<AssignOperator> assignOps) { + ILogicalOperator root = op; - for (ILogicalOperator ie : internalEdges) { - // this will be an Assign, so no need to check - AssignOperator aOp = (AssignOperator) ie; + for (AssignOperator aOp : assignOps) { aOp.getInputs().get(0).setValue(root); root = aOp; } @@ -569,45 +609,25 @@ // for every internal edge assign (again assuming only 1 for now), find the corresponding leafInput and place the assign // on top of that LeafInput. Modify the joinLeafInputsHashMap as well. - private void pushAssignsIntoLeafInputs(HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, - List<ILogicalOperator> internalEdges) throws AlgebricksException { + private void pushAssignsIntoLeafInputs(IPlanPrettyPrinter pp, + HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<AssignOperator> assignOps) + throws AlgebricksException { for (Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) { ILogicalOperator joinLeafInput = mapElement.getValue(); + printPlan(pp, (AbstractLogicalOperator) joinLeafInput, "Incoming leaf Input"); EmptyTupleSourceOperator ets = mapElement.getKey(); - int internalEdgeNumber = findInternalEdge(joinLeafInput, internalEdges); - if (internalEdgeNumber != -1) { - joinLeafInput = addAssignToLeafInput(joinLeafInput, internalEdges.get(internalEdgeNumber)); + int assignNumber = findAssignOp(joinLeafInput, assignOps); + if (assignNumber != -1) { + joinLeafInput = addAssignToLeafInput(joinLeafInput, assignOps.get(assignNumber)); + printPlan(pp, (AbstractLogicalOperator) joinLeafInput, "Modified leaf Input"); joinLeafInputsHashMap.put(ets, joinLeafInput); - internalEdges.remove(internalEdgeNumber); // no longer needed + assignOps.remove(assignNumber); } } } - private boolean substituteVarOnce(ILogicalExpression exp, LogicalVariable oldVar, LogicalVariable newVar) { - switch (exp.getExpressionTag()) { - case FUNCTION_CALL: - AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) exp; - for (int i = 0; i < fun.getArguments().size(); i++) { - ILogicalExpression arg = fun.getArguments().get(i).getValue(); - if (substituteVarOnce(arg, oldVar, newVar)) { - return true; - } - } - return false; - case VARIABLE: - VariableReferenceExpression varExpr = (VariableReferenceExpression) exp; - if (varExpr.getVariableReference().equals(oldVar)) { - varExpr.setVariable(newVar); - return true; - } - return false; - default: - return false; - } - } - // check to see if every dataset has a sample. If not, CBO code cannot run. A warning message must be issued as well. private boolean doAllDataSourcesHaveSamples( List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps, diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java index c8ac57e..9154bd9 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java @@ -81,9 +81,8 @@ protected int jnArraySize; protected List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps; protected Map<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap; - protected Map<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap; protected List<ILogicalExpression> singleDatasetPreds; - protected List<ILogicalOperator> internalEdges; + protected List<AssignOperator> assignOps; protected List<ILogicalOperator> joinOps; protected ILogicalOperator localJoinOp; // used in nestedLoopsApplicable code. protected IOptimizationContext optCtx; @@ -104,12 +103,11 @@ public void initEnum(AbstractLogicalOperator op, boolean cboMode, boolean cboTestMode, int numberOfFromTerms, List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps, - Map<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, - Map<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap, - List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, IOptimizationContext context) { + Map<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps, + List<AssignOperator> assignOps, IOptimizationContext context) { this.singleDatasetPreds = new ArrayList<>(); this.joinConditions = new ArrayList<>(); - this.internalEdges = new ArrayList<>(); + this.assignOps = new ArrayList<>(); this.allPlans = new ArrayList<>(); this.numberOfTerms = numberOfFromTerms; this.cboMode = cboMode; @@ -119,8 +117,7 @@ this.physOptConfig = context.getPhysicalOptimizationConfig(); this.emptyTupleAndDataSourceOps = emptyTupleAndDataSourceOps; this.joinLeafInputsHashMap = joinLeafInputsHashMap; - this.dataSourceEmptyTupleHashMap = dataSourceEmptyTupleHashMap; - this.internalEdges = internalEdges; + this.assignOps = assignOps; this.joinOps = joinOps; this.op = op; this.forceJoinOrderMode = getForceJoinOrderMode(context); @@ -168,10 +165,6 @@ return joinLeafInputsHashMap; } - public Map<DataSourceScanOperator, EmptyTupleSourceOperator> getDataSourceEmptyTupleHashMap() { - return dataSourceEmptyTupleHashMap; - } - public ILogicalOperator findLeafInput(List<LogicalVariable> logicalVars) throws AlgebricksException { Set<LogicalVariable> vars = new HashSet<>(); for (Pair<EmptyTupleSourceOperator, DataSourceScanOperator> emptyTupleAndDataSourceOp : emptyTupleAndDataSourceOps) { @@ -339,16 +332,14 @@ } // so this variable must be in an internal edge in an assign statement. Find the RHS variables there - for (ILogicalOperator op : this.internalEdges) { - if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) { - List<LogicalVariable> vars2 = new ArrayList<>(); - VariableUtilities.getUsedVariables(op, vars2); - int bits = 0; - for (LogicalVariable lv2 : vars2) { - bits |= findBits(lv2); - } - return bits; + for (AssignOperator op : this.assignOps) { + List<LogicalVariable> vars2 = new ArrayList<>(); + VariableUtilities.getUsedVariables(op, vars2); + int bits = 0; + for (LogicalVariable lv2 : vars2) { + bits |= findBits(lv2); } + return bits; } // should never reach this because every variable must exist in some leaf input. return JoinNode.NO_JN; @@ -387,8 +378,7 @@ usedVars.clear(); ILogicalExpression expr = jc.joinCondition; expr.getUsedVariables(usedVars); - for (ILogicalOperator ie : internalEdges) { - AssignOperator aOp = (AssignOperator) ie; + for (AssignOperator aOp : assignOps) { for (int i = 0; i < aOp.getVariables().size(); i++) { if (usedVars.contains(aOp.getVariables().get(i))) { OperatorManipulationUtil.replaceVarWithExpr((AbstractFunctionCallExpression) expr, -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17483 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: If2aab5785091464be9a529aacdcda3a9bba70310 Gerrit-Change-Number: 17483 Gerrit-PatchSet: 8 Gerrit-Owner: [email protected] Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Till Westmann <[email protected]> Gerrit-Reviewer: Vijay Sarathy <[email protected]> Gerrit-Reviewer: [email protected] Gerrit-MessageType: merged
