This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit dd4a7ce3695fab10a736ba3766d4a5ebc0ddb6f0 Author: Dmitry Lychagin <[email protected]> AuthorDate: Fri Oct 18 12:32:58 2019 -0700 [NO ISSUE][COMP] Fix wrong result with indexnl outer join - user model changes: no - storage format changes: no - interface changes: no Details: - Fix duplicate tuples in the result when indexnl hint is applied to a standalone left outer join Change-Id: I0e692d731caa9914438817b299c64569130075b0 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/3805 Tested-by: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> --- .../rules/am/AccessMethodAnalysisContext.java | 22 +-- .../optimizer/rules/am/AccessMethodUtils.java | 180 +++++++++++++++++---- .../optimizer/rules/am/BTreeAccessMethod.java | 9 +- .../asterix/optimizer/rules/am/IAccessMethod.java | 3 +- .../rules/am/IntroduceJoinAccessMethodRule.java | 87 +++++----- .../rules/am/InvertedIndexAccessMethod.java | 12 +- .../optimizer/rules/am/RTreeAccessMethod.java | 9 +- ...e-pidx-with-join-btree-sidx_03-index-only.sqlpp | 70 ++++++++ ...erjoin-probe-pidx-with-join-btree-sidx_03.sqlpp | 70 ++++++++ .../optimizerts/results/ASTERIXDB-2402.plan | 156 +++++++++--------- ...be-pidx-with-join-btree-sidx_03-index-only.plan | 49 ++++++ ...terjoin-probe-pidx-with-join-btree-sidx_03.plan | 33 ++++ .../probe-pidx-with-join-btree-sidx1.4.query.sqlpp | 33 ++++ .../probe-pidx-with-join-btree-sidx1.5.query.sqlpp | 33 ++++ ....adm => probe-pidx-with-join-btree-sidx1.3.adm} | 0 .../probe-pidx-with-join-btree-sidx1.4.adm | 11 ++ .../probe-pidx-with-join-btree-sidx1.5.adm | 11 ++ .../probe-pidx-with-join-btree-sidx1.4.ast | 68 ++++++++ .../probe-pidx-with-join-btree-sidx1.5.ast | 68 ++++++++ .../asterix/om/functions/BuiltinFunctions.java | 14 +- .../runtime/functions/FunctionCollection.java | 2 + ...MarkFirstMissingRunningAggregateDescriptor.java | 72 +++++++++ ...nMarkFirstMissingRunningAggregateEvaluator.java | 97 +++++++++++ .../algebra/util/OperatorManipulationUtil.java | 10 ++ 24 files changed, 934 insertions(+), 185 deletions(-) diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java index c186687..924c679 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java @@ -52,8 +52,10 @@ public class AccessMethodAnalysisContext { private Map<Index, Integer> indexNumMatchedKeys = new TreeMap<Index, Integer>(); // variables for resetting null placeholder for left-outer-join - private Mutable<ILogicalOperator> lojGroupbyOpRef = null; - private ScalarFunctionCallExpression lojIsMissingFuncInGroupBy = null; + // See AccessMethodUtils#removeUnjoinedDuplicatesInLOJ() for a definition of a special GroupBy + // and extra output processing steps needed when it's not available. + private Mutable<ILogicalOperator> lojSpecialGroupByOpRef = null; + private ScalarFunctionCallExpression lojIsMissingFuncInSpecialGroupBy = null; // For a secondary index, if we use only PK and secondary key field in a plan, it is an index-only plan. // Contains information about index-only plan @@ -134,20 +136,20 @@ public class AccessMethodAnalysisContext { indexNumMatchedKeys.put(index, numMatchedKeys); } - public void setLOJGroupbyOpRef(Mutable<ILogicalOperator> opRef) { - lojGroupbyOpRef = opRef; + public void setLOJSpecialGroupByOpRef(Mutable<ILogicalOperator> opRef) { + lojSpecialGroupByOpRef = opRef; } - public Mutable<ILogicalOperator> getLOJGroupbyOpRef() { - return lojGroupbyOpRef; + public Mutable<ILogicalOperator> getLOJSpecialGroupByOpRef() { + return lojSpecialGroupByOpRef; } - public void setLOJIsMissingFuncInGroupBy(ScalarFunctionCallExpression isMissingFunc) { - lojIsMissingFuncInGroupBy = isMissingFunc; + public void setLOJIsMissingFuncInSpecialGroupBy(ScalarFunctionCallExpression isMissingFunc) { + lojIsMissingFuncInSpecialGroupBy = isMissingFunc; } - public ScalarFunctionCallExpression getLOJIsMissingFuncInGroupBy() { - return lojIsMissingFuncInGroupBy; + public ScalarFunctionCallExpression getLOJIsMissingFuncInSpecialGroupBy() { + return lojIsMissingFuncInSpecialGroupBy; } public Dataset getDatasetFromIndexDatasetMap(Index idx) { diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java index 10037f0..1357fd2 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java @@ -93,6 +93,7 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperat import org.apache.hyracks.algebricks.core.algebra.operators.logical.SplitOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.WindowOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities; import org.apache.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl; import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; @@ -717,29 +718,63 @@ public class AccessMethodUtils { */ public static boolean finalizeJoinPlanTransformation(List<Mutable<ILogicalOperator>> afterJoinRefs, Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree indexSubTree, - AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean isLeftOuterJoin, - boolean hasGroupBy, ILogicalOperator indexSearchOp, LogicalVariable newNullPlaceHolderVar, + OptimizableOperatorSubTree probeSubTree, AccessMethodAnalysisContext analysisCtx, + IOptimizationContext context, boolean isLeftOuterJoin, boolean isLeftOuterJoinWithSpecialGroupBy, + ILogicalOperator indexSearchOp, LogicalVariable newNullPlaceHolderVar, Mutable<ILogicalExpression> conditionRef, Dataset dataset) throws AlgebricksException { + boolean isIndexOnlyPlan = analysisCtx.getIndexOnlyPlanInfo().getFirst(); + List<LogicalVariable> probePKVars = null; ILogicalOperator finalIndexSearchOp = indexSearchOp; - if (isLeftOuterJoin && hasGroupBy) { - ScalarFunctionCallExpression lojFuncExprs = analysisCtx.getLOJIsMissingFuncInGroupBy(); - List<LogicalVariable> lojMissingVariables = new ArrayList<>(); - lojFuncExprs.getUsedVariables(lojMissingVariables); - boolean lojMissingVarExist = false; - if (!lojMissingVariables.isEmpty()) { - lojMissingVarExist = true; + if (isLeftOuterJoin) { + if (isLeftOuterJoinWithSpecialGroupBy) { + ScalarFunctionCallExpression lojFuncExprs = analysisCtx.getLOJIsMissingFuncInSpecialGroupBy(); + List<LogicalVariable> lojMissingVariables = new ArrayList<>(); + lojFuncExprs.getUsedVariables(lojMissingVariables); + boolean lojMissingVarExist = !lojMissingVariables.isEmpty(); + + // Resets the missing place holder variable. + AccessMethodUtils.resetLOJMissingPlaceholderVarInGroupByOp(analysisCtx, newNullPlaceHolderVar, context); + + // For the index-only plan, if newNullPlaceHolderVar is not in the variable map of the union operator + // or if the variable is removed during the above method, we need to refresh the variable mapping in UNION. + if (isIndexOnlyPlan) { + finalIndexSearchOp = AccessMethodUtils.resetVariableMappingInUnionOpInIndexOnlyPlan( + lojMissingVarExist, lojMissingVariables, indexSearchOp, afterJoinRefs, context); + } + } else { + // We'll need to remove unjoined duplicates after the left outer join if there is no special GroupBy, + // but in order to do that we need to know the keys of the probe branch. + // If probe keys are not available then we fail this transformation + // See AccessMethodUtils#removeUnjoinedDuplicatesInLOJ() for a definition of a special GroupBy + // and extra output processing steps needed when it's not available. + if (probeSubTree.hasDataSource()) { + probePKVars = new ArrayList<>(); + probeSubTree.getPrimaryKeyVars(null, probePKVars); + } + if (probePKVars == null || probePKVars.isEmpty()) { + return false; + } + if (isIndexOnlyPlan) { + // re-map probe branch keys after UnionAll introduced by the indexonly plan + if (indexSearchOp.getOperatorTag() != LogicalOperatorTag.UNIONALL) { + return false; + } + UnionAllOperator unionAllOp = (UnionAllOperator) indexSearchOp; + for (int i = 0, ln = probePKVars.size(); i < ln; i++) { + LogicalVariable unionAllOutputVar = findUnionAllOutputVariable(unionAllOp, probePKVars.get(i)); + if (unionAllOutputVar == null) { + return false; + } + probePKVars.set(i, unionAllOutputVar); + } + } } + } - // Resets the missing place holder variable. - AccessMethodUtils.resetLOJMissingPlaceholderVarInGroupByOp(analysisCtx, newNullPlaceHolderVar, context); + SourceLocation sourceLoc = joinRef.getValue().getSourceLocation(); - // For the index-only plan, if newNullPlaceHolderVar is not in the variable map of the union operator - // or if the variable is removed during the above method, we need to refresh the variable mapping in UNION. - finalIndexSearchOp = AccessMethodUtils.resetVariableMappingInUnionOpInIndexOnlyPlan(lojMissingVarExist, - lojMissingVariables, indexSearchOp, afterJoinRefs, context); - } + ILogicalOperator finalOp; - boolean isIndexOnlyPlan = analysisCtx.getIndexOnlyPlanInfo().getFirst(); // If there are any left conditions, add a new select operator on top. indexSubTree.getDataSourceRef().setValue(finalIndexSearchOp); if (conditionRef.getValue() != null) { @@ -754,27 +789,105 @@ public class AccessMethodUtils { indexSubTree.getDataSourceRef().setValue(dataSourceRefOp); } // Replaces the current operator with the newly created UNIONALL operator. - joinRef.setValue(finalIndexSearchOp); + finalOp = finalIndexSearchOp; } else { // Non-index only plan case indexSubTree.getDataSourceRef().setValue(finalIndexSearchOp); SelectOperator topSelectOp = new SelectOperator(conditionRef, isLeftOuterJoin, newNullPlaceHolderVar); - topSelectOp.setSourceLocation(finalIndexSearchOp.getSourceLocation()); + topSelectOp.setSourceLocation(sourceLoc); topSelectOp.getInputs().add(indexSubTree.getRootRef()); topSelectOp.setExecutionMode(ExecutionMode.LOCAL); context.computeAndSetTypeEnvironmentForOperator(topSelectOp); - joinRef.setValue(topSelectOp); + finalOp = topSelectOp; } } else { if (finalIndexSearchOp.getOperatorTag() == LogicalOperatorTag.UNIONALL) { - joinRef.setValue(finalIndexSearchOp); + finalOp = finalIndexSearchOp; } else { - joinRef.setValue(indexSubTree.getRootRef().getValue()); + finalOp = indexSubTree.getRootRef().getValue(); } } + + if (isLeftOuterJoin && !isLeftOuterJoinWithSpecialGroupBy) { + finalOp = removeUnjoinedDuplicatesInLOJ(finalOp, probePKVars, newNullPlaceHolderVar, context, sourceLoc); + } + + joinRef.setValue(finalOp); return true; } + /** + * In case of a left outer join we look for a special GroupBy above the join operator + * (see {@link IntroduceJoinAccessMethodRule#checkAndApplyJoinTransformation(Mutable, IOptimizationContext)}. + * A "Special GroupBy" is a GroupBy that eliminates unjoined duplicates that might be produced by the secondary + * index probe. We probe secondary indexes on each index partition and return a tuple with a right branch variable + * set to MISSING if there's no match on that partition. Therefore if there's more than one partition where + * nothing joined then the index probe plan will produce several such MISSING tuples, however the join result + * must have a single MISSING tuple for each unjoined left branch tuple. If the special GroupBy is available then + * it'll eliminate those MISSING duplicates, otherwise this method is called to produce additional operators to + * achieve that. The special GroupBy operators are introduced by the optimizer when rewriting FROM-LET or + * equivalent patterns into a left outer join with parent a group by. + * <p> + * The plan generated by this method to eliminate unjoined duplicates is as follows: + * <ul> + * <li> SelectOp $m</li> + * <li> WindowOp [$m=win-mark-first-missing-impl(right-branch-non-missing-value)] + * PARTITION BY left-branch-key + * ORDER BY right-branch-non-missing-value DESC</li> + * <li> input_subtree</li> + * </ul> + * <p> + * The {@link org.apache.asterix.runtime.runningaggregates.std.WinMarkFirstMissingRunningAggregateDescriptor + * win-mark-first-missing-impl} internal function assigns {@code TRUE} for each tuple that has a non-MISSING + * value that comes from the right branch or the first MISSING value in the window partition. The remaining + * tuples in each window partition are unjoined duplicate tuples and will be assigned {@code FALSE}. Then + * the Select operator eliminates those unjoined duplicate tuples. + */ + private static SelectOperator removeUnjoinedDuplicatesInLOJ(ILogicalOperator inputOp, + List<LogicalVariable> probePKVars, LogicalVariable newNullPlaceHolderVar, IOptimizationContext context, + SourceLocation sourceLoc) throws AlgebricksException { + if (probePKVars == null || probePKVars.isEmpty()) { + throw new IllegalArgumentException(); + } + List<Mutable<ILogicalExpression>> winPartitionByList = new ArrayList<>(probePKVars.size()); + for (LogicalVariable probePKeyVar : probePKVars) { + VariableReferenceExpression probePKeyVarRef = new VariableReferenceExpression(probePKeyVar); + probePKeyVarRef.setSourceLocation(sourceLoc); + winPartitionByList.add(new MutableObject<>(probePKeyVarRef)); + } + + VariableReferenceExpression winOrderByVarRef = new VariableReferenceExpression(newNullPlaceHolderVar); + winOrderByVarRef.setSourceLocation(sourceLoc); + /* Sort in DESC order, so all MISSING values are at the end */ + Pair<OrderOperator.IOrder, Mutable<ILogicalExpression>> winOrderByPair = + new Pair<>(OrderOperator.DESC_ORDER, new MutableObject<>(winOrderByVarRef)); + + LogicalVariable winVar = context.newVar(); + VariableReferenceExpression winOrderByVarRef2 = new VariableReferenceExpression(newNullPlaceHolderVar); + winOrderByVarRef2.setSourceLocation(sourceLoc); + AbstractFunctionCallExpression winExpr = + BuiltinFunctions.makeWindowFunctionExpression(BuiltinFunctions.WIN_MARK_FIRST_MISSING_IMPL, + Collections.singletonList(new MutableObject<>(winOrderByVarRef2))); + + WindowOperator winOp = new WindowOperator(winPartitionByList, Collections.singletonList(winOrderByPair)); + winOp.getVariables().add(winVar); + winOp.getExpressions().add(new MutableObject<>(winExpr)); + winOp.getInputs().add(new MutableObject<>(inputOp)); + winOp.setExecutionMode(ExecutionMode.PARTITIONED); + winOp.setSourceLocation(sourceLoc); + context.computeAndSetTypeEnvironmentForOperator(winOp); + + VariableReferenceExpression winVarRef = new VariableReferenceExpression(winVar); + winVarRef.setSourceLocation(sourceLoc); + SelectOperator selectOp = new SelectOperator(new MutableObject<>(winVarRef), false, null); + selectOp.getInputs().add(new MutableObject<>(winOp)); + selectOp.setExecutionMode(ExecutionMode.LOCAL); + selectOp.setSourceLocation(sourceLoc); + context.computeAndSetTypeEnvironmentForOperator(selectOp); + + return selectOp; + } + public static ILogicalOperator createSecondaryIndexUnnestMap(Dataset dataset, ARecordType recordType, ARecordType metaRecordType, Index index, ILogicalOperator inputOp, AccessMethodJobGenParams jobGenParams, IOptimizationContext context, boolean retainInput, boolean retainNull, @@ -1583,27 +1696,21 @@ public class AccessMethodUtils { ALogicalPlanImpl subPlan = (ALogicalPlanImpl) lojGroupbyOp.getNestedPlans().get(0); Mutable<ILogicalOperator> subPlanRootOpRef = subPlan.getRoots().get(0); AbstractLogicalOperator subPlanRootOp = (AbstractLogicalOperator) subPlanRootOpRef.getValue(); - ScalarFunctionCallExpression isMissingFuncExpr = findIsMissingInSubplan(subPlanRootOp, rightSubTree); - - if (isMissingFuncExpr == null) { - throw CompilationException.create(ErrorCode.CANNOT_FIND_NON_MISSING_SELECT_OPERATOR, - lojGroupbyOp.getSourceLocation()); - } - return isMissingFuncExpr; + return findIsMissingInSubplan(subPlanRootOp, rightSubTree); } public static void resetLOJMissingPlaceholderVarInGroupByOp(AccessMethodAnalysisContext analysisCtx, LogicalVariable newMissingPlaceholderVaraible, IOptimizationContext context) throws AlgebricksException { //reset the missing placeholder variable in groupby operator - ScalarFunctionCallExpression isMissingFuncExpr = analysisCtx.getLOJIsMissingFuncInGroupBy(); + ScalarFunctionCallExpression isMissingFuncExpr = analysisCtx.getLOJIsMissingFuncInSpecialGroupBy(); isMissingFuncExpr.getArguments().clear(); VariableReferenceExpression newMissingVarRef = new VariableReferenceExpression(newMissingPlaceholderVaraible); newMissingVarRef.setSourceLocation(isMissingFuncExpr.getSourceLocation()); isMissingFuncExpr.getArguments().add(new MutableObject<ILogicalExpression>(newMissingVarRef)); //recompute type environment. - OperatorPropertiesUtil.typeOpRec(analysisCtx.getLOJGroupbyOpRef(), context); + OperatorPropertiesUtil.typeOpRec(analysisCtx.getLOJSpecialGroupByOpRef(), context); } // New < For external datasets indexing> @@ -2558,4 +2665,15 @@ public class AccessMethodUtils { return AbstractIntroduceAccessMethodRule.NO_INDEX_ONLY_PLAN_OPTION_DEFAULT_VALUE; } + /** + * Finds an output variable for the given input variable of UnionAllOperator. + */ + static LogicalVariable findUnionAllOutputVariable(UnionAllOperator unionAllOp, LogicalVariable inputVar) { + for (Triple<LogicalVariable, LogicalVariable, LogicalVariable> t : unionAllOp.getVariableMappings()) { + if (t.first.equals(inputVar) || t.second.equals(inputVar)) { + return t.third; + } + } + return null; + } } diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java index 85e0c72..0bcc202 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java @@ -239,7 +239,8 @@ public class BTreeAccessMethod implements IAccessMethod { public boolean applyJoinPlanTransformation(List<Mutable<ILogicalOperator>> afterJoinRefs, Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, - IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException { + IOptimizationContext context, boolean isLeftOuterJoin, boolean isLeftOuterJoinWithSpecialGroupBy) + throws AlgebricksException { AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) joinRef.getValue(); Mutable<ILogicalExpression> conditionRef = joinOp.getCondition(); @@ -268,6 +269,7 @@ public class BTreeAccessMethod implements IAccessMethod { if (isLeftOuterJoin) { // Gets a new null place holder variable that is the first field variable of the primary key // from the indexSubTree's datasourceScanOp. + // We need this for all left outer joins, even those that do not have a special GroupBy newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0); } @@ -285,8 +287,9 @@ public class BTreeAccessMethod implements IAccessMethod { return false; } - return AccessMethodUtils.finalizeJoinPlanTransformation(afterJoinRefs, joinRef, indexSubTree, analysisCtx, - context, isLeftOuterJoin, hasGroupBy, indexSearchOp, newNullPlaceHolderVar, conditionRef, dataset); + return AccessMethodUtils.finalizeJoinPlanTransformation(afterJoinRefs, joinRef, indexSubTree, probeSubTree, + analysisCtx, context, isLeftOuterJoin, isLeftOuterJoinWithSpecialGroupBy, indexSearchOp, + newNullPlaceHolderVar, conditionRef, dataset); } /** diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java index 559f336..94de169 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java @@ -104,7 +104,8 @@ public interface IAccessMethod extends Comparable<IAccessMethod> { public boolean applyJoinPlanTransformation(List<Mutable<ILogicalOperator>> afterJoinRefs, Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, - IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException; + IOptimizationContext context, boolean isLeftOuterJoin, boolean isLeftOuterJoinWithSpecialGroupBy) + throws AlgebricksException; /** * Analyzes expr to see whether it is optimizable by the given concrete index. diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java index 577754a..199f878 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java @@ -43,6 +43,7 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogi import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator; +import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil; import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; /** @@ -87,7 +88,6 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod protected final OptimizableOperatorSubTree leftSubTree = new OptimizableOperatorSubTree(); protected final OptimizableOperatorSubTree rightSubTree = new OptimizableOperatorSubTree(); protected IVariableTypeEnvironment typeEnvironment = null; - protected boolean hasGroupBy = true; protected List<Mutable<ILogicalOperator>> afterJoinRefs = null; // Registers access methods. @@ -191,22 +191,10 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod } /** - * Checks whether the given operator is LEFTOUTERJOIN. - * If so, also checks that GROUPBY is placed after LEFTOUTERJOIN. + * Checks whether the given operator has a single child which is a LEFTOUTERJOIN. */ - // Check whether (Groupby)? <-- Leftouterjoin - private boolean isLeftOuterJoin(AbstractLogicalOperator op1) { - if (op1.getInputs().size() != 1) { - return false; - } - if (op1.getInputs().get(0).getValue().getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) { - return false; - } - if (op1.getOperatorTag() == LogicalOperatorTag.GROUP) { - return true; - } - hasGroupBy = false; - return true; + private int findLeftOuterJoinChild(AbstractLogicalOperator op) { + return OperatorManipulationUtil.findChild(op, LogicalOperatorTag.LEFTOUTERJOIN); } /** @@ -254,35 +242,37 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod // Now, we are sure that transformation attempts for earlier joins have been failed. // Checks the current operator pattern to see whether it is a JOIN or not. - boolean isThisOpInnerJoin = isInnerJoin(op); - boolean isThisOpLeftOuterJoin = isLeftOuterJoin(op); - boolean isParentOpGroupBy = hasGroupBy; + boolean isInnerJoin = false; + boolean isLeftOuterJoin = false; + int leftOuterJoinChildIdx; Mutable<ILogicalOperator> joinRefFromThisOp = null; AbstractBinaryJoinOperator joinOpFromThisOp = null; // operators that need to be removed from the afterJoinRefs list. Mutable<ILogicalOperator> opRefRemove = opRef; - if (isThisOpInnerJoin) { - // Sets the join operator. + if (isInnerJoin(op)) { + // Sets the inner join operator. + isInnerJoin = true; joinRef = opRef; joinOp = (InnerJoinOperator) op; joinRefFromThisOp = opRef; joinOpFromThisOp = (InnerJoinOperator) op; - } else if (isThisOpLeftOuterJoin) { + } else if ((leftOuterJoinChildIdx = findLeftOuterJoinChild(op)) >= 0) { // Sets the left-outer-join operator. - // The current operator is GROUP and the child of this operator is LEFTOUERJOIN. - joinRef = op.getInputs().get(0); + // A child of the current operator is LEFTOUTERJOIN. + isLeftOuterJoin = true; + joinRef = op.getInputs().get(leftOuterJoinChildIdx); joinOp = (LeftOuterJoinOperator) joinRef.getValue(); - joinRefFromThisOp = op.getInputs().get(0); + joinRefFromThisOp = op.getInputs().get(leftOuterJoinChildIdx); joinOpFromThisOp = (LeftOuterJoinOperator) joinRefFromThisOp.getValue(); - - // Group-by should not be removed at this point since the given left-outer-join can be transformed. - opRefRemove = op.getInputs().get(0); + // Left outer join's parent operator should not be removed at this point since the given left-outer-join + // can be transformed. + opRefRemove = op.getInputs().get(leftOuterJoinChildIdx); } afterJoinRefs.remove(opRefRemove); // For a JOIN case, tries to transform the given plan. - if (isThisOpInnerJoin || isThisOpLeftOuterJoin) { + if (isInnerJoin || isLeftOuterJoin) { // Restores the information from this operator since it might have been be set to null // if there are other join operators in the earlier path. @@ -375,13 +365,24 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod // Applies the plan transformation using chosen index. AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndex.first); - // For LOJ with GroupBy, prepare objects to reset LOJ nullPlaceHolderVariable - // in GroupByOp. - if (isThisOpLeftOuterJoin && isParentOpGroupBy) { - analysisCtx.setLOJGroupbyOpRef(opRef); - ScalarFunctionCallExpression isNullFuncExpr = AccessMethodUtils - .findLOJIsMissingFuncInGroupBy((GroupByOperator) opRef.getValue(), rightSubTree); - analysisCtx.setLOJIsMissingFuncInGroupBy(isNullFuncExpr); + // For a left outer join with a special GroupBy, prepare objects to reset LOJ's + // nullPlaceHolderVariable in that GroupBy's nested plan. + // See AccessMethodUtils#removeUnjoinedDuplicatesInLOJ() for a definition of a special GroupBy + // and extra output processing steps needed when it's not available. + boolean isLeftOuterJoinWithSpecialGroupBy; + if (isLeftOuterJoin && op.getOperatorTag() == LogicalOperatorTag.GROUP) { + GroupByOperator groupByOp = (GroupByOperator) opRef.getValue(); + ScalarFunctionCallExpression isNullFuncExpr = + AccessMethodUtils.findLOJIsMissingFuncInGroupBy(groupByOp, rightSubTree); + // TODO:(dmitry) do we need additional checks to ensure that this is a special GroupBy, + // i.e. that this GroupBy will eliminate unjoined duplicates? + isLeftOuterJoinWithSpecialGroupBy = isNullFuncExpr != null; + if (isLeftOuterJoinWithSpecialGroupBy) { + analysisCtx.setLOJSpecialGroupByOpRef(opRef); + analysisCtx.setLOJIsMissingFuncInSpecialGroupBy(isNullFuncExpr); + } + } else { + isLeftOuterJoinWithSpecialGroupBy = false; } Dataset indexDataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex.second); @@ -396,8 +397,8 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod // Finally, tries to apply plan transformation using the chosen index. boolean res = chosenIndex.first.applyJoinPlanTransformation(afterJoinRefs, joinRef, leftSubTree, - rightSubTree, chosenIndex.second, analysisCtx, context, isThisOpLeftOuterJoin, - isParentOpGroupBy); + rightSubTree, chosenIndex.second, analysisCtx, context, isLeftOuterJoin, + isLeftOuterJoinWithSpecialGroupBy); // If the plan transformation is successful, we don't need to traverse the plan // any more, since if there are more JOIN operators, the next trigger on this plan @@ -412,10 +413,10 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod joinOp = null; } - // Checked the given left-outer-join operator and it is not transformed. So, this group-by operator - // after the left-outer-join operator should be removed from the afterJoinRefs list - // since the current operator is a group-by operator. - if (isThisOpLeftOuterJoin) { + // Checked the given left-outer-join operator and it is not transformed. + // So, the left-outer-join's parent operator should be removed from the afterJoinRefs list + // since the current operator is that parent operator. + if (isLeftOuterJoin) { afterJoinRefs.remove(opRef); } @@ -425,8 +426,6 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod /** * After the pattern is matched, checks the condition and initializes the data source * from the right (inner) sub tree. - * - * @throws AlgebricksException */ protected boolean checkJoinOpConditionAndInitSubTree(IOptimizationContext context) throws AlgebricksException { diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java index ed73333..ca65a9e 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java @@ -497,7 +497,8 @@ public class InvertedIndexAccessMethod implements IAccessMethod { public boolean applyJoinPlanTransformation(List<Mutable<ILogicalOperator>> afterJoinRefs, Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, - IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException { + IOptimizationContext context, boolean isLeftOuterJoin, boolean isLeftOuterJoinWithSpecialGroupBy) + throws AlgebricksException { Dataset dataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex); OptimizableOperatorSubTree indexSubTree; OptimizableOperatorSubTree probeSubTree; @@ -524,13 +525,16 @@ public class InvertedIndexAccessMethod implements IAccessMethod { //if LOJ, reset null place holder variable LogicalVariable newNullPlaceHolderVar = null; - if (isLeftOuterJoin && hasGroupBy) { + if (isLeftOuterJoin) { //get a new null place holder variable that is the first field variable of the primary key //from the indexSubTree's datasourceScanOp + // We need this for all left outer joins, even those that do not have a special GroupBy newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0); - //reset the null place holder variable - AccessMethodUtils.resetLOJMissingPlaceholderVarInGroupByOp(analysisCtx, newNullPlaceHolderVar, context); + if (isLeftOuterJoinWithSpecialGroupBy) { + //reset the null place holder variable + AccessMethodUtils.resetLOJMissingPlaceholderVarInGroupByOp(analysisCtx, newNullPlaceHolderVar, context); + } } AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) joinRef.getValue(); diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java index 19558aa..7a24a8b 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java @@ -306,7 +306,8 @@ public class RTreeAccessMethod implements IAccessMethod { public boolean applyJoinPlanTransformation(List<Mutable<ILogicalOperator>> afterJoinRefs, Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, - IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException { + IOptimizationContext context, boolean isLeftOuterJoin, boolean isLeftOuterJoinWithSpecialGroupBy) + throws AlgebricksException { AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) joinRef.getValue(); Mutable<ILogicalExpression> conditionRef = joinOp.getCondition(); @@ -335,6 +336,7 @@ public class RTreeAccessMethod implements IAccessMethod { if (isLeftOuterJoin) { // Gets a new null place holder variable that is the first field variable of the primary key // from the indexSubTree's datasourceScanOp. + // We need this for all left outer joins, even those that do not have a special GroupBy newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0); } @@ -352,8 +354,9 @@ public class RTreeAccessMethod implements IAccessMethod { return false; } - return AccessMethodUtils.finalizeJoinPlanTransformation(afterJoinRefs, joinRef, indexSubTree, analysisCtx, - context, isLeftOuterJoin, hasGroupBy, indexSearchOp, newNullPlaceHolderVar, conditionRef, dataset); + return AccessMethodUtils.finalizeJoinPlanTransformation(afterJoinRefs, joinRef, indexSubTree, probeSubTree, + analysisCtx, context, isLeftOuterJoin, isLeftOuterJoinWithSpecialGroupBy, indexSearchOp, + newNullPlaceHolderVar, conditionRef, dataset); } @Override diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.sqlpp new file mode 100644 index 0000000..e693b46 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.sqlpp @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test indexnl left outer join without special groupby, + * : disallowing indexonly + * Expected Res : Success + * Runtime test : probe-pidx-with-join-btree-sidx1.5.query + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.TwitterUserType as + closed { + `screen-name` : string, + lang : string, + `friends-count` : bigint, + `statuses-count` : bigint, + name : string, + `followers-count` : bigint +}; + +create type test.TweetMessageType as + closed { + tweetid : bigint, + user : TwitterUserType, + `sender-location` : point, + `send-time` : datetime, + `referred-topics` : {{string}}, + `message-text` : string, + countA : bigint, + countB : bigint +}; + +create dataset TweetMessages(TweetMessageType) primary key tweetid; + +create index twmSndLocIx on TweetMessages (`sender-location`) type rtree; + +create index msgCountAIx on TweetMessages (countA) type btree; + +create index msgCountBIx on TweetMessages (countB) type btree; + +create index msgTextIx on TweetMessages (`message-text`) type keyword; + +set noindexonly "false"; + +select t1.tweetid as tweetid1, t1.countA as count1, t2.tweetid as tweetid2, t2.countB as count2 +from TweetMessages as t1 left outer join TweetMessages as t2 on t1.countA /*+ indexnl */ = t2.countB +where t1.tweetid < 10 +order by t1.tweetid, t2.tweetid +; diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03.sqlpp new file mode 100644 index 0000000..24a84dc --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03.sqlpp @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test indexnl left outer join without special groupby, + * : disallowing indexonly + * Expected Res : Success + * Runtime test : probe-pidx-with-join-btree-sidx1.4.query + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.TwitterUserType as + closed { + `screen-name` : string, + lang : string, + `friends-count` : bigint, + `statuses-count` : bigint, + name : string, + `followers-count` : bigint +}; + +create type test.TweetMessageType as + closed { + tweetid : bigint, + user : TwitterUserType, + `sender-location` : point, + `send-time` : datetime, + `referred-topics` : {{string}}, + `message-text` : string, + countA : bigint, + countB : bigint +}; + +create dataset TweetMessages(TweetMessageType) primary key tweetid; + +create index twmSndLocIx on TweetMessages (`sender-location`) type rtree; + +create index msgCountAIx on TweetMessages (countA) type btree; + +create index msgCountBIx on TweetMessages (countB) type btree; + +create index msgTextIx on TweetMessages (`message-text`) type keyword; + +set noindexonly "true"; + +select t1.tweetid as tweetid1, t1.countA as count1, t2.tweetid as tweetid2, t2.countB as count2 +from TweetMessages as t1 left outer join TweetMessages as t2 on t1.countA /*+ indexnl */ = t2.countB +where t1.tweetid < 10 +order by t1.tweetid, t2.tweetid +; diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan index f3dbc01..b6d1d39 100644 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan @@ -48,17 +48,17 @@ -- HASH_PARTITION_EXCHANGE [$$238] |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$285][$$207] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$285] |PARTITIONED| + -- HYBRID_HASH_JOIN [$$294][$$207] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$294] |PARTITIONED| -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- UNNEST |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- PRE_CLUSTERED_GROUP_BY[$$289] |PARTITIONED| + -- PRE_CLUSTERED_GROUP_BY[$$298] |PARTITIONED| { -- AGGREGATE |LOCAL| - -- MICRO_PRE_CLUSTERED_GROUP_BY[$$291, $$293] |LOCAL| + -- MICRO_PRE_CLUSTERED_GROUP_BY[$$300, $$302] |LOCAL| { -- AGGREGATE |LOCAL| -- STREAM_SELECT |LOCAL| @@ -68,8 +68,8 @@ -- NESTED_TUPLE_SOURCE |LOCAL| } -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$289(ASC), $$291(ASC), $$293(ASC)] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$289] |PARTITIONED| + -- STABLE_SORT [$$298(ASC), $$300(ASC), $$302(ASC)] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$298] |PARTITIONED| -- UNION_ALL |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| @@ -86,45 +86,40 @@ -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- RTREE_SEARCH |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- ASSIGN |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$306(ASC)] |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- RTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- NESTED_LOOP |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |UNPARTITIONED| - -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$245(ASC)] |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |UNPARTITIONED| + -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$245(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- STREAM_SELECT |PARTITIONED| @@ -137,45 +132,40 @@ -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- RTREE_SEARCH |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- ASSIGN |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$306(ASC)] |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- RTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- NESTED_LOOP |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |UNPARTITIONED| - -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$245(ASC)] |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |UNPARTITIONED| + -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$245(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| -- HASH_PARTITION_EXCHANGE [$$207] |PARTITIONED| -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| @@ -194,4 +184,4 @@ -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- DATASOURCE_SCAN |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| \ No newline at end of file + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.plan new file mode 100644 index 0000000..c6142aa --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.plan @@ -0,0 +1,49 @@ +-- DISTRIBUTE_RESULT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- SORT_MERGE_EXCHANGE [$$63(ASC), $$38(ASC) ] |PARTITIONED| + -- STABLE_SORT [$$63(ASC), $$38(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- WINDOW_STREAM |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$63(ASC), $$38(DESC)] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$63] |PARTITIONED| + -- UNION_ALL |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- SPLIT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- SPLIT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03.plan new file mode 100644 index 0000000..ce24784 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03.plan @@ -0,0 +1,33 @@ +-- DISTRIBUTE_RESULT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- SORT_MERGE_EXCHANGE [$$37(ASC), $$38(ASC) ] |PARTITIONED| + -- STABLE_SORT [$$37(ASC), $$38(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- WINDOW_STREAM |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$37(ASC), $$38(DESC)] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$37] |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$52(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.query.sqlpp new file mode 100644 index 0000000..e1a91bc --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.query.sqlpp @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test indexnl left outer join without special groupby, + * : disallowing indexonly + * Expected Res : Success + */ + +use test; + +set noindexonly "true"; + +select t1.tweetid as tweetid1, t1.countA as count1, t2.tweetid as tweetid2, t2.countB as count2 +from TweetMessages as t1 left outer join TweetMessages as t2 on t1.countA /*+ indexnl */ = t2.countB +where t1.tweetid < 10 +order by t1.tweetid, t2.tweetid +; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.query.sqlpp new file mode 100644 index 0000000..6596c92 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.query.sqlpp @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test indexnl left outer join without special groupby, + * : allowing indexonly + * Expected Res : Success + */ + +use test; + +set noindexonly "false"; + +select t1.tweetid as tweetid1, t1.countA as count1, t2.tweetid as tweetid2, t2.countB as count2 +from TweetMessages as t1 left outer join TweetMessages as t2 on t1.countA /*+ indexnl */ = t2.countB +where t1.tweetid < 10 +order by t1.tweetid, t2.tweetid +; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.3.adm similarity index 100% rename from asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.1.adm rename to asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.3.adm diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.adm new file mode 100644 index 0000000..75fa339 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.adm @@ -0,0 +1,11 @@ +{ "tweetid1": 1, "count1": 1 } +{ "tweetid1": 2, "count1": 2, "tweetid2": 60, "count2": 2 } +{ "tweetid1": 3, "count1": 3, "tweetid2": 105, "count2": 3 } +{ "tweetid1": 3, "count1": 3, "tweetid2": 206, "count2": 3 } +{ "tweetid1": 4, "count1": 4 } +{ "tweetid1": 5, "count1": 5, "tweetid2": 138, "count2": 5 } +{ "tweetid1": 5, "count1": 5, "tweetid2": 175, "count2": 5 } +{ "tweetid1": 6, "count1": 6, "tweetid2": 148, "count2": 6 } +{ "tweetid1": 7, "count1": 7, "tweetid2": 125, "count2": 7 } +{ "tweetid1": 8, "count1": 8 } +{ "tweetid1": 9, "count1": 9, "tweetid2": 141, "count2": 9 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.adm new file mode 100644 index 0000000..75fa339 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.adm @@ -0,0 +1,11 @@ +{ "tweetid1": 1, "count1": 1 } +{ "tweetid1": 2, "count1": 2, "tweetid2": 60, "count2": 2 } +{ "tweetid1": 3, "count1": 3, "tweetid2": 105, "count2": 3 } +{ "tweetid1": 3, "count1": 3, "tweetid2": 206, "count2": 3 } +{ "tweetid1": 4, "count1": 4 } +{ "tweetid1": 5, "count1": 5, "tweetid2": 138, "count2": 5 } +{ "tweetid1": 5, "count1": 5, "tweetid2": 175, "count2": 5 } +{ "tweetid1": 6, "count1": 6, "tweetid2": 148, "count2": 6 } +{ "tweetid1": 7, "count1": 7, "tweetid2": 125, "count2": 7 } +{ "tweetid1": 8, "count1": 8 } +{ "tweetid1": 9, "count1": 9, "tweetid2": 141, "count2": 9 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.ast b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.ast new file mode 100644 index 0000000..4e1ebd6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.4.ast @@ -0,0 +1,68 @@ +DataverseUse test +Set noindexonly=true +Query: +SELECT [ +FieldAccessor [ + Variable [ Name=$t1 ] + Field=tweetid +] +tweetid1 +FieldAccessor [ + Variable [ Name=$t1 ] + Field=countA +] +count1 +FieldAccessor [ + Variable [ Name=$t2 ] + Field=tweetid +] +tweetid2 +FieldAccessor [ + Variable [ Name=$t2 ] + Field=countB +] +count2 +] +FROM [ FunctionCall asterix.dataset@1[ + LiteralExpr [STRING] [test.TweetMessages] + ] + AS Variable [ Name=$t1 ] + LEFTOUTER JOIN + FunctionCall asterix.dataset@1[ + LiteralExpr [STRING] [test.TweetMessages] + ] + AS Variable [ Name=$t2 ] + ON + OperatorExpr [ + FieldAccessor [ + Variable [ Name=$t1 ] + Field=countA + ] + = + FieldAccessor [ + Variable [ Name=$t2 ] + Field=countB + ] + ] +] +Where + OperatorExpr [ + FieldAccessor [ + Variable [ Name=$t1 ] + Field=tweetid + ] + < + LiteralExpr [LONG] [10] + ] +Orderby + FieldAccessor [ + Variable [ Name=$t1 ] + Field=tweetid + ] + ASC + FieldAccessor [ + Variable [ Name=$t2 ] + Field=tweetid + ] + ASC + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.ast b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.ast new file mode 100644 index 0000000..965536d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/index-leftouterjoin/probe-pidx-with-join-btree-sidx1/probe-pidx-with-join-btree-sidx1.5.ast @@ -0,0 +1,68 @@ +DataverseUse test +Set noindexonly=false +Query: +SELECT [ +FieldAccessor [ + Variable [ Name=$t1 ] + Field=tweetid +] +tweetid1 +FieldAccessor [ + Variable [ Name=$t1 ] + Field=countA +] +count1 +FieldAccessor [ + Variable [ Name=$t2 ] + Field=tweetid +] +tweetid2 +FieldAccessor [ + Variable [ Name=$t2 ] + Field=countB +] +count2 +] +FROM [ FunctionCall asterix.dataset@1[ + LiteralExpr [STRING] [test.TweetMessages] + ] + AS Variable [ Name=$t1 ] + LEFTOUTER JOIN + FunctionCall asterix.dataset@1[ + LiteralExpr [STRING] [test.TweetMessages] + ] + AS Variable [ Name=$t2 ] + ON + OperatorExpr [ + FieldAccessor [ + Variable [ Name=$t1 ] + Field=countA + ] + = + FieldAccessor [ + Variable [ Name=$t2 ] + Field=countB + ] + ] +] +Where + OperatorExpr [ + FieldAccessor [ + Variable [ Name=$t1 ] + Field=tweetid + ] + < + LiteralExpr [LONG] [10] + ] +Orderby + FieldAccessor [ + Variable [ Name=$t1 ] + Field=tweetid + ] + ASC + FieldAccessor [ + Variable [ Name=$t2 ] + Field=tweetid + ] + ASC + diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java index 5f42dfb..bf2709a 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java @@ -1016,8 +1016,8 @@ public class BuiltinFunctions { new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "percent_rank", 0); public static final FunctionIdentifier PERCENT_RANK_IMPL = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "percent-rank-impl", FunctionIdentifier.VARARGS); - public static final FunctionIdentifier WIN_PARTITION_LENGTH = - new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "win_partition_length", 0); + public static final FunctionIdentifier WIN_MARK_FIRST_MISSING_IMPL = new FunctionIdentifier( + FunctionConstants.ASTERIX_NS, "win-mark-first-missing-impl", FunctionIdentifier.VARARGS); public static final FunctionIdentifier WIN_PARTITION_LENGTH_IMPL = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "win-partition-length-impl", 0); @@ -2049,7 +2049,7 @@ public class BuiltinFunctions { addFunction(ROW_NUMBER_IMPL, AInt64TypeComputer.INSTANCE, false); addFunction(PERCENT_RANK, ADoubleTypeComputer.INSTANCE, false); addFunction(PERCENT_RANK_IMPL, ADoubleTypeComputer.INSTANCE, false); - addPrivateFunction(WIN_PARTITION_LENGTH, AInt64TypeComputer.INSTANCE, false); + addPrivateFunction(WIN_MARK_FIRST_MISSING_IMPL, ABooleanTypeComputer.INSTANCE, false); addPrivateFunction(WIN_PARTITION_LENGTH_IMPL, AInt64TypeComputer.INSTANCE, false); // Similarity functions @@ -2976,7 +2976,8 @@ public class BuiltinFunctions { addWindowFunction(RANK, RANK_IMPL, NO_FRAME_CLAUSE, INJECT_ORDER_ARGS); addWindowFunction(RATIO_TO_REPORT, RATIO_TO_REPORT_IMPL, HAS_LIST_ARG); addWindowFunction(ROW_NUMBER, ROW_NUMBER_IMPL, NO_FRAME_CLAUSE); - addWindowFunction(WIN_PARTITION_LENGTH, WIN_PARTITION_LENGTH_IMPL, NO_FRAME_CLAUSE, MATERIALIZE_PARTITION); + addWindowFunction(null, WIN_MARK_FIRST_MISSING_IMPL, NO_FRAME_CLAUSE, INJECT_ORDER_ARGS); + addWindowFunction(null, WIN_PARTITION_LENGTH_IMPL, NO_FRAME_CLAUSE, MATERIALIZE_PARTITION); } static { @@ -3184,9 +3185,10 @@ public class BuiltinFunctions { public static void addWindowFunction(FunctionIdentifier sqlfi, FunctionIdentifier winfi, WindowFunctionProperty... properties) { - IFunctionInfo sqlinfo = getAsterixFunctionInfo(sqlfi); IFunctionInfo wininfo = getAsterixFunctionInfo(winfi); - sqlToWindowFunctions.put(sqlinfo, wininfo); + if (sqlfi != null) { + sqlToWindowFunctions.put(getAsterixFunctionInfo(sqlfi), wininfo); + } windowFunctions.add(wininfo); registerFunctionProperties(wininfo, WindowFunctionProperty.class, properties); } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java index 1fa9462..9e9b76e 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java @@ -545,6 +545,7 @@ import org.apache.asterix.runtime.runningaggregates.std.PercentRankRunningAggreg import org.apache.asterix.runtime.runningaggregates.std.RankRunningAggregateDescriptor; import org.apache.asterix.runtime.runningaggregates.std.RowNumberRunningAggregateDescriptor; import org.apache.asterix.runtime.runningaggregates.std.TidRunningAggregateDescriptor; +import org.apache.asterix.runtime.runningaggregates.std.WinMarkFirstMissingRunningAggregateDescriptor; import org.apache.asterix.runtime.runningaggregates.std.WinPartitionLenRunningAggregateDescriptor; import org.apache.asterix.runtime.unnestingfunctions.std.RangeDescriptor; import org.apache.asterix.runtime.unnestingfunctions.std.ScanCollectionDescriptor; @@ -820,6 +821,7 @@ public final class FunctionCollection implements IFunctionCollection { fc.add(RankRunningAggregateDescriptor.FACTORY); fc.add(RowNumberRunningAggregateDescriptor.FACTORY); fc.add(PercentRankRunningAggregateDescriptor.FACTORY); + fc.add(WinMarkFirstMissingRunningAggregateDescriptor.FACTORY); fc.add(WinPartitionLenRunningAggregateDescriptor.FACTORY); // boolean functions diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/WinMarkFirstMissingRunningAggregateDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/WinMarkFirstMissingRunningAggregateDescriptor.java new file mode 100644 index 0000000..3f5f9f6 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/WinMarkFirstMissingRunningAggregateDescriptor.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.runningaggregates.std; + +import org.apache.asterix.om.functions.BuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.runtime.runningaggregates.base.AbstractRunningAggregateFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext; +import org.apache.hyracks.algebricks.runtime.base.IRunningAggregateEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IRunningAggregateEvaluatorFactory; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.exceptions.HyracksDataException; + +/** + * This internal window function returns {@code TRUE} in the following two cases: + * <ol> + * <li>the argument is not MISSING</li> + * <li>the argument is MISSING and it comes from the first tuple in the current window partition</li> + * </ol> + * In all other cases the function returns {@code FALSE}. + * <p> + * The underlying assumption is that tuples in each window partition are sorted on the function's argument in the + * descending order. + */ +public final class WinMarkFirstMissingRunningAggregateDescriptor + extends AbstractRunningAggregateFunctionDynamicDescriptor { + + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = WinMarkFirstMissingRunningAggregateDescriptor::new; + + @Override + public IRunningAggregateEvaluatorFactory createRunningAggregateEvaluatorFactory(IScalarEvaluatorFactory[] args) { + return new IRunningAggregateEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IRunningAggregateEvaluator createRunningAggregateEvaluator(IEvaluatorContext ctx) + throws HyracksDataException { + IScalarEvaluator[] evals = new IScalarEvaluator[args.length]; + for (int i = 0; i < args.length; i++) { + evals[i] = args[i].createScalarEvaluator(ctx); + } + return new WinMarkFirstMissingRunningAggregateEvaluator(evals); + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return BuiltinFunctions.WIN_MARK_FIRST_MISSING_IMPL; + } +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/WinMarkFirstMissingRunningAggregateEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/WinMarkFirstMissingRunningAggregateEvaluator.java new file mode 100644 index 0000000..c872921 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/WinMarkFirstMissingRunningAggregateEvaluator.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.runtime.runningaggregates.std; + +import java.io.DataOutput; + +import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider; +import org.apache.asterix.om.base.ABoolean; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IWindowAggregateEvaluator; +import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.TaggedValuePointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; +import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; + +public final class WinMarkFirstMissingRunningAggregateEvaluator implements IWindowAggregateEvaluator { + + @SuppressWarnings({ "rawtypes" }) + private final ISerializerDeserializer boolSerde = + SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN); + + private final IScalarEvaluator[] argEvals; + + private final TaggedValuePointable argValue; + + private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); + + private final DataOutput dataOutput = resultStorage.getDataOutput(); + + private boolean first; + + private boolean firstAllMissing; + + WinMarkFirstMissingRunningAggregateEvaluator(IScalarEvaluator[] argEvals) { + this.argEvals = argEvals; + argValue = TaggedValuePointable.FACTORY.createPointable(); + } + + @Override + public void init() { + } + + @Override + public void initPartition(long partitionLength) { + first = true; + } + + @Override + public void step(IFrameTupleReference tuple, IPointable result) throws HyracksDataException { + boolean value = compute(tuple); + resultStorage.reset(); + boolSerde.serialize(ABoolean.valueOf(value), dataOutput); + result.set(resultStorage); + } + + private boolean compute(IFrameTupleReference tuple) throws HyracksDataException { + if (first) { + firstAllMissing = everyArgIsMissing(tuple); + first = false; + return true; + } else { + boolean thisAllMissing = firstAllMissing || everyArgIsMissing(tuple); + return !thisAllMissing; + } + } + + private boolean everyArgIsMissing(IFrameTupleReference tuple) throws HyracksDataException { + for (IScalarEvaluator argEval : argEvals) { + argEval.evaluate(tuple, argValue); + if (argValue.getTag() != ATypeTag.SERIALIZED_MISSING_TYPE_TAG) { + return false; + } + } + return true; + } +} diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/util/OperatorManipulationUtil.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/util/OperatorManipulationUtil.java index c98489b..3f5012a 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/util/OperatorManipulationUtil.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/util/OperatorManipulationUtil.java @@ -451,4 +451,14 @@ public class OperatorManipulationUtil { assignExprList.remove(i); } } + + public static int findChild(ILogicalOperator op, LogicalOperatorTag childOpTag) { + List<Mutable<ILogicalOperator>> inputs = op.getInputs(); + for (int i = 0, ln = inputs.size(); i < ln; i++) { + if (inputs.get(i).getValue().getOperatorTag() == childOpTag) { + return i; + } + } + return -1; + } }
