http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c3c23574/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java index 1f4676c..a82e780 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java @@ -46,10 +46,13 @@ import org.apache.commons.lang3.mutable.Mutable; import org.apache.commons.lang3.mutable.MutableObject; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.common.utils.Pair; +import org.apache.hyracks.algebricks.common.utils.Quadruple; +import org.apache.hyracks.algebricks.common.utils.Triple; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; import org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment; @@ -65,7 +68,6 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBina import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractDataSourceOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode; -import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestMapOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterUnnestMapOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator; @@ -86,14 +88,22 @@ public class BTreeAccessMethod implements IAccessMethod { EQUAL } - private static final List<FunctionIdentifier> FUNC_IDENTIFIERS = - Collections.unmodifiableList(Arrays.asList(AlgebricksBuiltinFunctions.EQ, AlgebricksBuiltinFunctions.LE, - AlgebricksBuiltinFunctions.GE, AlgebricksBuiltinFunctions.LT, AlgebricksBuiltinFunctions.GT)); + // The second boolean value tells whether the given function generates false positive results. + // That is, this function can produce false positive results if it is set to true. + // In this case, an index-search alone cannot replace the given SELECT condition and + // that SELECT condition needs to be applied after the index-search to get the correct results. + // For B+Tree indexes, there are no false positive results unless the given index is a composite index. + private static final List<Pair<FunctionIdentifier, Boolean>> FUNC_IDENTIFIERS = Collections + .unmodifiableList(Arrays.asList(new Pair<FunctionIdentifier, Boolean>(AlgebricksBuiltinFunctions.EQ, false), + new Pair<FunctionIdentifier, Boolean>(AlgebricksBuiltinFunctions.LE, false), + new Pair<FunctionIdentifier, Boolean>(AlgebricksBuiltinFunctions.GE, false), + new Pair<FunctionIdentifier, Boolean>(AlgebricksBuiltinFunctions.LT, false), + new Pair<FunctionIdentifier, Boolean>(AlgebricksBuiltinFunctions.GT, false))); public static final BTreeAccessMethod INSTANCE = new BTreeAccessMethod(); @Override - public List<FunctionIdentifier> getOptimizableFunctions() { + public List<Pair<FunctionIdentifier, Boolean>> getOptimizableFunctions() { return FUNC_IDENTIFIERS; } @@ -123,59 +133,124 @@ public class BTreeAccessMethod implements IAccessMethod { public boolean applySelectPlanTransformation(List<Mutable<ILogicalOperator>> afterSelectRefs, Mutable<ILogicalOperator> selectRef, OptimizableOperatorSubTree subTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context) throws AlgebricksException { - SelectOperator select = (SelectOperator) selectRef.getValue(); - Mutable<ILogicalExpression> conditionRef = select.getCondition(); + SelectOperator selectOp = (SelectOperator) selectRef.getValue(); + Mutable<ILogicalExpression> conditionRef = selectOp.getCondition(); + AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) conditionRef.getValue(); - ILogicalOperator primaryIndexUnnestOp = - createSecondaryToPrimaryPlan(conditionRef, subTree, null, chosenIndex, analysisCtx, - AccessMethodUtils.retainInputs(subTree.getDataSourceVariables(), - subTree.getDataSourceRef().getValue(), afterSelectRefs), - false, subTree.getDataSourceRef().getValue().getInputs().get(0).getValue() - .getExecutionMode() == ExecutionMode.UNPARTITIONED, - context); + // Check whether assign (unnest) operator exists before the select operator + Mutable<ILogicalOperator> assignBeforeSelectOpRef = + subTree.getAssignsAndUnnestsRefs().isEmpty() ? null : subTree.getAssignsAndUnnestsRefs().get(0); + ILogicalOperator assignBeforeSelectOp = null; + if (assignBeforeSelectOpRef != null) { + assignBeforeSelectOp = assignBeforeSelectOpRef.getValue(); + } - if (primaryIndexUnnestOp == null) { + Dataset dataset = subTree.getDataset(); + + // To check whether the given plan is an index-only plan. + // index-only plan possible? + boolean isIndexOnlyPlan = false; + + // Whether a verification is required after this secondary index search. + // In other words, can the chosen method generate any false positive results? + // Currently, for the B+ Tree index, there cannot be any false positive results unless it's a composite index. + boolean requireVerificationAfterSIdxSearch = false; + + Pair<Boolean, Boolean> functionFalsePositiveCheck = + AccessMethodUtils.canFunctionGenerateFalsePositiveResultsUsingIndex(funcExpr, FUNC_IDENTIFIERS); + if (functionFalsePositiveCheck.first) { + // An index-utilizable function found? then, get the info about false positive results generation. + requireVerificationAfterSIdxSearch = functionFalsePositiveCheck.second; + } else { return false; } - Mutable<ILogicalOperator> opRef = - subTree.getAssignsAndUnnestsRefs().isEmpty() ? null : subTree.getAssignsAndUnnestsRefs().get(0); - ILogicalOperator op = null; - if (opRef != null) { - op = opRef.getValue(); + + Quadruple<Boolean, Boolean, Boolean, Boolean> indexOnlyPlanInfo = + new Quadruple<>(isIndexOnlyPlan, false, requireVerificationAfterSIdxSearch, false); + + if (dataset.getDatasetType() == DatasetType.INTERNAL && !chosenIndex.isPrimaryIndex()) { + AccessMethodUtils.indexOnlyPlanCheck(afterSelectRefs, selectRef, subTree, null, chosenIndex, analysisCtx, + context, indexOnlyPlanInfo); + isIndexOnlyPlan = indexOnlyPlanInfo.getFirst(); } - // Generate new select using the new condition. + + // Sets the result of index-only plan check into AccessMethodAnalysisContext. + analysisCtx.setIndexOnlyPlanInfo(indexOnlyPlanInfo); + + // Transform the current path to the path that is utilizing the corresponding indexes + ILogicalOperator primaryIndexUnnestOp = createIndexSearchPlan(afterSelectRefs, selectRef, conditionRef, + subTree.getAssignsAndUnnestsRefs(), subTree, null, chosenIndex, analysisCtx, + AccessMethodUtils.retainInputs(subTree.getDataSourceVariables(), subTree.getDataSourceRef().getValue(), + afterSelectRefs), + false, subTree.getDataSourceRef().getValue().getInputs().get(0).getValue() + .getExecutionMode() == ExecutionMode.UNPARTITIONED, + context, null); + + if (primaryIndexUnnestOp == null) { + return false; + } + + // Generate new path using the new condition. if (conditionRef.getValue() != null) { - select.getInputs().clear(); - if (op != null) { - subTree.getDataSourceRef().setValue(primaryIndexUnnestOp); - select.getInputs().add(new MutableObject<>(op)); + if (assignBeforeSelectOp != null) { + if (isIndexOnlyPlan && dataset.getDatasetType() == DatasetType.INTERNAL) { + // Case 1: index-only plan + // The whole plan is now changed. Replace the current path with the given new plan. + // Gets the revised dataSourceRef operator + // - a secondary index-search that generates trustworthy results. + ILogicalOperator dataSourceRefOp = + AccessMethodUtils.findDataSourceFromIndexUtilizationPlan(primaryIndexUnnestOp); + + if (dataSourceRefOp.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP) { + subTree.getDataSourceRef().setValue(dataSourceRefOp); + } + + // Replace the current operator with the newly created operator. + selectRef.setValue(primaryIndexUnnestOp); + } else { + // Case 2: Non-index only plan case + // Right now, the order of operators is: select <- assign <- unnest-map (primary index look-up). + selectOp.getInputs().clear(); + subTree.getDataSourceRef().setValue(primaryIndexUnnestOp); + selectOp.getInputs().add(new MutableObject<ILogicalOperator>(assignBeforeSelectOp)); + } } else { - select.getInputs().add(new MutableObject<>(primaryIndexUnnestOp)); + // A secondary-index-only plan without any assign cannot exist. This is a non-index only plan. + selectOp.getInputs().clear(); + selectOp.getInputs().add(new MutableObject<ILogicalOperator>(primaryIndexUnnestOp)); } } else { + // All condition is now gone. UNNEST-MAP can replace the SELECT operator. ((AbstractLogicalOperator) primaryIndexUnnestOp).setExecutionMode(ExecutionMode.PARTITIONED); - if (op != null) { + if (assignBeforeSelectOp != null) { subTree.getDataSourceRef().setValue(primaryIndexUnnestOp); - selectRef.setValue(op); + selectRef.setValue(assignBeforeSelectOp); } else { selectRef.setValue(primaryIndexUnnestOp); } } + return true; } @Override - public boolean applyJoinPlanTransformation(Mutable<ILogicalOperator> joinRef, - OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, - AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean isLeftOuterJoin, - boolean hasGroupBy) throws AlgebricksException { + public boolean applyJoinPlanTransformation(List<Mutable<ILogicalOperator>> afterJoinRefs, + Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, + OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, + IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException { AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) joinRef.getValue(); Mutable<ILogicalExpression> conditionRef = joinOp.getCondition(); - // Determine if the index is applicable on the left or right side - // (if both, we arbitrarily prefer the left side). - Dataset dataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex); - OptimizableOperatorSubTree indexSubTree; - OptimizableOperatorSubTree probeSubTree; + + AbstractFunctionCallExpression funcExpr = null; + if (conditionRef.getValue().getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) { + funcExpr = (AbstractFunctionCallExpression) conditionRef.getValue(); + } + + Dataset dataset = analysisCtx.getIndexDatasetMap().get(chosenIndex); + + // Determine if the index is applicable on the right (inner) side. + OptimizableOperatorSubTree indexSubTree = null; + OptimizableOperatorSubTree probeSubTree = null; // We assume that the left subtree is the outer branch and the right subtree is the inner branch. // This assumption holds true since we only use an index from the right subtree. // The following is just a sanity check. @@ -189,42 +264,41 @@ public class BTreeAccessMethod implements IAccessMethod { LogicalVariable newNullPlaceHolderVar = null; if (isLeftOuterJoin) { - //get a new null place holder variable that is the first field variable of the primary key - //from the indexSubTree's datasourceScanOp + // Gets a new null place holder variable that is the first field variable of the primary key + // from the indexSubTree's datasourceScanOp. newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0); } - ILogicalOperator primaryIndexUnnestOp = createSecondaryToPrimaryPlan(conditionRef, indexSubTree, probeSubTree, - chosenIndex, analysisCtx, true, isLeftOuterJoin, true, context); - if (primaryIndexUnnestOp == null) { + boolean canContinue = AccessMethodUtils.setIndexOnlyPlanInfo(afterJoinRefs, joinRef, probeSubTree, indexSubTree, + chosenIndex, analysisCtx, context, funcExpr, FUNC_IDENTIFIERS); + if (!canContinue) { return false; } - if (isLeftOuterJoin && hasGroupBy) { - //reset the null place holder variable - AccessMethodUtils.resetLOJNullPlaceholderVariableInGroupByOp(analysisCtx, newNullPlaceHolderVar, context); - } + ILogicalOperator indexSearchOp = createIndexSearchPlan(afterJoinRefs, joinRef, conditionRef, + indexSubTree.getAssignsAndUnnestsRefs(), indexSubTree, probeSubTree, chosenIndex, analysisCtx, true, + isLeftOuterJoin, true, context, newNullPlaceHolderVar); - // If there are conditions left, add a new select operator on top. - indexSubTree.getDataSourceRef().setValue(primaryIndexUnnestOp); - if (conditionRef.getValue() != null) { - SelectOperator topSelect = new SelectOperator(conditionRef, isLeftOuterJoin, newNullPlaceHolderVar); - topSelect.getInputs().add(indexSubTree.getRootRef()); - topSelect.setExecutionMode(ExecutionMode.LOCAL); - context.computeAndSetTypeEnvironmentForOperator(topSelect); - // Replace the original join with the new subtree rooted at the select op. - joinRef.setValue(topSelect); - } else { - joinRef.setValue(indexSubTree.getRootRef().getValue()); + if (indexSearchOp == null) { + return false; } - return true; + + return AccessMethodUtils.finalizeJoinPlanTransformation(afterJoinRefs, joinRef, indexSubTree, analysisCtx, + context, isLeftOuterJoin, hasGroupBy, indexSearchOp, newNullPlaceHolderVar, conditionRef, dataset); } + /** + * Creates an index utilization plan optimization - in case of an index-only select plan: + * union < project < select < assign? < unnest-map(pidx) < split < unnest-map(sidx) < assign? < datasource-scan + * ..... < project < ................................... < + */ @Override - public ILogicalOperator createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef, - OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree, Index chosenIndex, - AccessMethodAnalysisContext analysisCtx, boolean retainInput, boolean retainNull, boolean requiresBroadcast, - IOptimizationContext context) throws AlgebricksException { + public ILogicalOperator createIndexSearchPlan(List<Mutable<ILogicalOperator>> afterTopOpRefs, + Mutable<ILogicalOperator> topOpRef, Mutable<ILogicalExpression> conditionRef, + List<Mutable<ILogicalOperator>> assignBeforeTheOpRefs, OptimizableOperatorSubTree indexSubTree, + OptimizableOperatorSubTree probeSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, + boolean retainInput, boolean retainMissing, boolean requiresBroadcast, IOptimizationContext context, + LogicalVariable newMissingPlaceHolderForLOJ) throws AlgebricksException { Dataset dataset = indexSubTree.getDataset(); ARecordType recordType = indexSubTree.getRecordType(); ARecordType metaRecordType = indexSubTree.getMetaRecordType(); @@ -233,10 +307,20 @@ public class BTreeAccessMethod implements IAccessMethod { (AbstractDataSourceOperator) indexSubTree.getDataSourceRef().getValue(); List<Pair<Integer, Integer>> exprAndVarList = analysisCtx.getIndexExprsFromIndexExprsAndVars(chosenIndex); int numSecondaryKeys = analysisCtx.getNumberOfMatchedKeys(chosenIndex); + + // Whether the given plan is an index-only plan or not. + Quadruple<Boolean, Boolean, Boolean, Boolean> indexOnlyPlanInfo = analysisCtx.getIndexOnlyPlanInfo(); + boolean isIndexOnlyPlan = indexOnlyPlanInfo.getFirst(); + + // We only apply index-only plan for an internal dataset. + boolean generateInstantTrylockResultFromIndexSearch = false; + if (dataset.getDatasetType() == DatasetType.INTERNAL && isIndexOnlyPlan) { + generateInstantTrylockResultFromIndexSearch = true; + } + // List of function expressions that will be replaced by the secondary-index search. // These func exprs will be removed from the select condition at the very end of this method. Set<ILogicalExpression> replacedFuncExprs = new HashSet<>(); - // Info on high and low keys for the BTree search predicate. ILogicalExpression[] lowKeyExprs = new ILogicalExpression[numSecondaryKeys]; ILogicalExpression[] highKeyExprs = new ILogicalExpression[numSecondaryKeys]; @@ -257,6 +341,9 @@ public class BTreeAccessMethod implements IAccessMethod { BitSet setHighKeys = new BitSet(numSecondaryKeys); // Go through the func exprs listed as optimizable by the chosen index, // and formulate a range predicate on the secondary-index keys. + + // Checks whether a type casting happened from a real (FLOAT, DOUBLE) value to an INT value + // since we have a round issue when dealing with LT(<) OR GT(>) operator. for (Pair<Integer, Integer> exprIndex : exprAndVarList) { // Position of the field of matchedFuncExprs.get(exprIndex) in the chosen index's indexed exprs. IOptimizableFuncExpr optFuncExpr = analysisCtx.getMatchedFuncExpr(exprIndex.first); @@ -268,10 +355,16 @@ public class BTreeAccessMethod implements IAccessMethod { if (keyPos < 0) { throw CompilationException.create(ErrorCode.NO_INDEX_FIELD_NAME_FOR_GIVEN_FUNC_EXPR); } + // returnedSearchKeyExpr contains a pair of search expression. + // The second expression will not be null only if we are creating an EQ search predicate + // with a FLOAT or a DOUBLE constant that will be fed into an INTEGER index. + // This is required because of type-casting. Refer to AccessMethodUtils.createSearchKeyExpr for details. IAType indexedFieldType = chosenIndex.getKeyFieldTypes().get(keyPos); - Pair<ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils.createSearchKeyExpr(chosenIndex, - optFuncExpr, indexedFieldType, indexSubTree, probeSubTree); + Triple<ILogicalExpression, ILogicalExpression, Boolean> returnedSearchKeyExpr = + AccessMethodUtils.createSearchKeyExpr(chosenIndex, optFuncExpr, indexedFieldType, probeSubTree); ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first; + ILogicalExpression searchKeyEQExpr = null; + boolean realTypeConvertedToIntegerType = returnedSearchKeyExpr.third; if (searchKeyExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) { constantAtRuntimeExpressions[keyPos] = searchKeyExpr; constAtRuntimeExprVars[keyPos] = context.newVar(); @@ -283,10 +376,13 @@ public class BTreeAccessMethod implements IAccessMethod { return null; } - // checks whether a type casting happened from a real (FLOAT, DOUBLE) value to an INT value - // since we have a round issues when dealing with LT(<) OR GT(>) operator. - boolean realTypeConvertedToIntegerType = returnedSearchKeyExpr.second; + if (limit == LimitType.EQUAL && returnedSearchKeyExpr.second != null) { + // The given search predicate is EQ and + // we have two type-casted values from FLOAT or DOUBLE to an INT constant. + searchKeyEQExpr = returnedSearchKeyExpr.second; + } + // Deals with the non-enforced index case here. if (relaxLimitTypeToInclusive(chosenIndex, keyPos, realTypeConvertedToIntegerType)) { if (limit == LimitType.HIGH_EXCLUSIVE) { limit = LimitType.HIGH_INCLUSIVE; @@ -300,7 +396,16 @@ public class BTreeAccessMethod implements IAccessMethod { if (lowKeyLimits[keyPos] == null && highKeyLimits[keyPos] == null) { lowKeyLimits[keyPos] = highKeyLimits[keyPos] = limit; lowKeyInclusive[keyPos] = highKeyInclusive[keyPos] = true; - lowKeyExprs[keyPos] = highKeyExprs[keyPos] = searchKeyExpr; + if (searchKeyEQExpr == null) { + // No type-casting was happened. + lowKeyExprs[keyPos] = highKeyExprs[keyPos] = searchKeyExpr; + } else { + // We have two type-casted FLOAT or DOUBLE values to be fed into an INT index. + // They contain the same value if their fraction value is 0. + // Refer to AccessMethodUtils.createSearchKeyExpr() for more details. + lowKeyExprs[keyPos] = searchKeyExpr; + highKeyExprs[keyPos] = searchKeyEQExpr; + } setLowKeys.set(keyPos); setHighKeys.set(keyPos); isEqCondition = true; @@ -444,8 +549,8 @@ public class BTreeAccessMethod implements IAccessMethod { highKeyInclusive[0] = true; } - // Here we generate vars and funcs for assigning the secondary-index keys to be fed into the secondary-index - // search. + // Here we generate vars and funcs for assigning the secondary-index keys to be fed into + // the secondary-index search. // List of variables for the assign. ArrayList<LogicalVariable> keyVarList = new ArrayList<>(); // List of variables and expressions for the assign. @@ -477,7 +582,9 @@ public class BTreeAccessMethod implements IAccessMethod { } else { // We are optimizing a join, place the assign op top of the probe subtree. assignSearchKeys.getInputs().add(probeSubTree.getRootRef()); + assignSearchKeys.setExecutionMode(probeSubTree.getRootRef().getValue().getExecutionMode()); } + context.computeAndSetTypeEnvironmentForOperator(assignSearchKeys); inputOp = assignSearchKeys; } else if (probeSubTree == null) { //nonpure case @@ -496,36 +603,60 @@ public class BTreeAccessMethod implements IAccessMethod { inputOp = probeSubTree.getRoot(); } + // Creates an unnest-map for the secondary index search. + // The result: SK, PK, [Optional - the result of an instantTrylock on PK] ILogicalOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset, recordType, - metaRecordType, chosenIndex, inputOp, jobGenParams, context, false, retainInput, retainNull); + metaRecordType, chosenIndex, inputOp, jobGenParams, context, retainInput, retainMissing, + generateInstantTrylockResultFromIndexSearch); // Generate the rest of the upstream plan which feeds the search results into the primary index. - AbstractUnnestMapOperator primaryIndexUnnestOp = null; + ILogicalOperator indexSearchOp = null; boolean isPrimaryIndex = chosenIndex.isPrimaryIndex(); if (dataset.getDatasetType() == DatasetType.EXTERNAL) { // External dataset - UnnestMapOperator externalDataAccessOp = AccessMethodUtils.createExternalDataLookupUnnestMap(dataSourceOp, - dataset, recordType, secondaryIndexUnnestOp, context, retainInput, retainNull); + UnnestMapOperator externalDataAccessOp = + AccessMethodUtils.createExternalDataLookupUnnestMap(dataSourceOp, dataset, recordType, + metaRecordType, secondaryIndexUnnestOp, context, chosenIndex, retainInput, retainMissing); indexSubTree.getDataSourceRef().setValue(externalDataAccessOp); return externalDataAccessOp; } else if (!isPrimaryIndex) { - primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceOp, dataset, recordType, - metaRecordType, secondaryIndexUnnestOp, context, true, retainInput, retainNull, false); - - // Adds equivalence classes --- one equivalent class between a primary key - // variable and a record field-access expression. - EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp, - dataSourceOp.getVariables(), recordType, metaRecordType, dataset, context); + indexSearchOp = AccessMethodUtils.createRestOfIndexSearchPlan(afterTopOpRefs, topOpRef, conditionRef, + assignBeforeTheOpRefs, dataSourceOp, dataset, recordType, metaRecordType, secondaryIndexUnnestOp, + context, true, retainInput, retainMissing, false, chosenIndex, analysisCtx, indexSubTree, + newMissingPlaceHolderForLOJ); + + // Replaces the datasource scan with the new plan rooted at + // Get dataSourceRef operator - + // 1) unnest-map (PK, record) for a non-index only plan + // 2) unnest-map (SK, PK) for an index-only plan + if (isIndexOnlyPlan) { + // Index-only plan + ILogicalOperator dataSourceRefOp = + AccessMethodUtils.findDataSourceFromIndexUtilizationPlan(indexSearchOp); + + if (dataSourceRefOp.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP + || dataSourceRefOp.getOperatorTag() == LogicalOperatorTag.LEFT_OUTER_UNNEST_MAP) { + // Adds equivalence classes --- one equivalent class between a primary key + // variable and a record field-access expression. + EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(indexSearchOp, + dataSourceOp.getVariables(), recordType, metaRecordType, dataset, context); + } + } else { + // Non-indexonly plan cases + // Adds equivalence classes --- one equivalent class between a primary key + // variable and a record field-access expression. + EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(indexSearchOp, + dataSourceOp.getVariables(), recordType, metaRecordType, dataset, context); + } } else { + // Primary index search case List<Object> primaryIndexOutputTypes = new ArrayList<>(); AccessMethodUtils.appendPrimaryIndexTypes(dataset, recordType, metaRecordType, primaryIndexOutputTypes); List<LogicalVariable> scanVariables = dataSourceOp.getVariables(); - // Checks whether the primary index search can replace the given - // SELECT condition. - // If so, condition will be set to null and eventually the SELECT - // operator will be removed. + // Checks whether the primary index search can replace the given SELECT condition. + // If so, the condition will be set to null and eventually the SELECT operator will be removed. // If not, we create a new condition based on remaining ones. if (!primaryIndexPostProccessingIsNeeded) { List<Mutable<ILogicalExpression>> remainingFuncExprs = new ArrayList<>(); @@ -534,7 +665,7 @@ public class BTreeAccessMethod implements IAccessMethod { } catch (CompilationException e) { return null; } - // Generate new condition. + // Generates the new condition. if (!remainingFuncExprs.isEmpty()) { ILogicalExpression pulledCond = createSelectCondition(remainingFuncExprs); conditionRef.setValue(pulledCond); @@ -545,7 +676,7 @@ public class BTreeAccessMethod implements IAccessMethod { // Checks whether LEFT_OUTER_UNNESTMAP operator is required. boolean leftOuterUnnestMapRequired = false; - if (retainNull && retainInput) { + if (retainMissing && retainInput) { leftOuterUnnestMapRequired = true; } else { leftOuterUnnestMapRequired = false; @@ -556,42 +687,41 @@ public class BTreeAccessMethod implements IAccessMethod { // via the UnnestMapOperator's function arguments. List<Mutable<ILogicalExpression>> primaryIndexFuncArgs = new ArrayList<>(); jobGenParams.writeToFuncArgs(primaryIndexFuncArgs); - // An index search is expressed as an unnest-map over an - // index-search function. + // An index search is expressed as an unnest-map over an index-search function. IFunctionInfo primaryIndexSearch = FunctionUtil.getFunctionInfo(BuiltinFunctions.INDEX_SEARCH); UnnestingFunctionCallExpression primaryIndexSearchFunc = new UnnestingFunctionCallExpression(primaryIndexSearch, primaryIndexFuncArgs); primaryIndexSearchFunc.setReturnsUniqueValues(true); if (!leftOuterUnnestMapRequired) { - primaryIndexUnnestOp = new UnnestMapOperator(scanVariables, + indexSearchOp = new UnnestMapOperator(scanVariables, new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes, retainInput); } else { - primaryIndexUnnestOp = new LeftOuterUnnestMapOperator(scanVariables, + indexSearchOp = new LeftOuterUnnestMapOperator(scanVariables, new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes, true); } } else { if (!leftOuterUnnestMapRequired) { - primaryIndexUnnestOp = new UnnestMapOperator(scanVariables, + indexSearchOp = new UnnestMapOperator(scanVariables, ((UnnestMapOperator) secondaryIndexUnnestOp).getExpressionRef(), primaryIndexOutputTypes, retainInput); } else { - primaryIndexUnnestOp = new LeftOuterUnnestMapOperator(scanVariables, + indexSearchOp = new LeftOuterUnnestMapOperator(scanVariables, ((LeftOuterUnnestMapOperator) secondaryIndexUnnestOp).getExpressionRef(), primaryIndexOutputTypes, true); } } - primaryIndexUnnestOp.getInputs().add(new MutableObject<>(inputOp)); + indexSearchOp.getInputs().add(new MutableObject<>(inputOp)); // Adds equivalence classes --- one equivalent class between a primary key // variable and a record field-access expression. - EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(primaryIndexUnnestOp, scanVariables, - recordType, metaRecordType, dataset, context); + EquivalenceClassUtils.addEquivalenceClassesForPrimaryIndexAccess(indexSearchOp, scanVariables, recordType, + metaRecordType, dataset, context); } - return primaryIndexUnnestOp; + return indexSearchOp; } private int createKeyVarsAndExprs(int numKeys, LimitType[] keyLimits, ILogicalExpression[] searchKeyExprs, @@ -706,6 +836,13 @@ public class BTreeAccessMethod implements IAccessMethod { } private boolean relaxLimitTypeToInclusive(Index chosenIndex, int keyPos, boolean realTypeConvertedToIntegerType) { + // For a non-enforced index or an enforced index that stores a casted value on the given index, + // we need to apply the following transformation. + // For an index on a closed field, this transformation is not necessary since the value between + // the index and the actual record match. + // + // Check AccessMethodUtils.createSearchKeyExpr for more details. + // // If a DOUBLE or FLOAT constant is converted to an INT type value, // we need to check a corner case where two real values are located between an INT value. // For example, for the following query, @@ -721,10 +858,7 @@ public class BTreeAccessMethod implements IAccessMethod { // Therefore, we convert LT(<) to LE(<=) and GT(>) to GE(>=) to find candidates. // This does not change the result of an actual comparison since this conversion is only applied // for finding candidates from an index. - // - // We also need to do this for a non-enforced index that overrides key field type (for a numeric type) - - if (realTypeConvertedToIntegerType) { + if (chosenIndex.isEnforced() && realTypeConvertedToIntegerType) { return true; }
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c3c23574/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java index 870b425..559f336 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java @@ -23,9 +23,11 @@ import java.util.List; import org.apache.asterix.metadata.entities.Index; import org.apache.commons.lang3.mutable.Mutable; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.common.utils.Pair; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; import org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; @@ -41,9 +43,10 @@ public interface IAccessMethod extends Comparable<IAccessMethod> { /** * @return A list of function identifiers that are optimizable by this - * access method. + * access method. Also, the second boolean tells whether that + * function can generate a false-positive result. */ - public List<FunctionIdentifier> getOptimizableFunctions(); + public List<Pair<FunctionIdentifier, Boolean>> getOptimizableFunctions(); /** * Analyzes the arguments of a given optimizable funcExpr to see if this @@ -86,20 +89,22 @@ public interface IAccessMethod extends Comparable<IAccessMethod> { Mutable<ILogicalOperator> selectRef, OptimizableOperatorSubTree subTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context) throws AlgebricksException; - public ILogicalOperator createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef, - OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree, Index chosenIndex, - AccessMethodAnalysisContext analysisCtx, boolean retainInput, boolean retainNull, boolean requiresBroadcast, - IOptimizationContext context) throws AlgebricksException; + public ILogicalOperator createIndexSearchPlan(List<Mutable<ILogicalOperator>> afterTopOpRefs, + Mutable<ILogicalOperator> topOpRef, Mutable<ILogicalExpression> conditionRef, + List<Mutable<ILogicalOperator>> assignBeforeTheOpRefs, OptimizableOperatorSubTree indexSubTree, + OptimizableOperatorSubTree probeSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, + boolean retainInput, boolean retainNull, boolean requiresBroadcast, IOptimizationContext context, + LogicalVariable newNullPlaceHolderForLOJ) throws AlgebricksException; /** * Applies the plan transformation to use chosenIndex to optimize a join query. * In the case of a LeftOuterJoin, there may or may not be a needed groupby operation * If there is, we will need to include it in the transformation */ - public boolean applyJoinPlanTransformation(Mutable<ILogicalOperator> joinRef, - OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, - AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean isLeftOuterJoin, - boolean hasGroupBy) throws AlgebricksException; + public boolean applyJoinPlanTransformation(List<Mutable<ILogicalOperator>> afterJoinRefs, + Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, + OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, + IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException; /** * Analyzes expr to see whether it is optimizable by the given concrete index. http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c3c23574/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java index 1171ae5..401ea23 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java @@ -18,6 +18,7 @@ */ package org.apache.asterix.optimizer.rules.am; +import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -48,33 +49,37 @@ import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; /** * This rule optimizes a join with secondary indexes into an indexed nested-loop join. - * Matches the following operator pattern: - * (join) <-- (select)? <-- (assign | unnest)+ <-- (datasource scan) - * <-- (select)? <-- (assign | unnest)+ <-- (datasource scan | unnest-map) - * The order of the join inputs matters (left becomes the outer relation and right becomes the inner relation). + * This rule matches the following operator pattern: + * join <-- select? <-- (assign|unnest)+ <-- (datasource scan|unnest-map) + * The order of the join inputs matters (left branch - outer relation, right branch - inner relation). * This rule tries to utilize an index on the inner relation. * If that's not possible, it stops transforming the given join into an index-nested-loop join. - * Replaces the above pattern with the following simplified plan: - * (select) <-- (assign) <-- (btree search) <-- (sort) <-- (unnest(index search)) <-- (assign) <-- (A) - * (A) <-- (datasource scan | unnest-map) - * The sort is optional, and some access methods may choose not to sort. + * + * This rule replaces the above pattern with the following simplified plan: + * select <-- assign+ <-- unnest-map(pidx) <-- sort <-- unnest-map(sidx) <-- assign+ <-- (datasource scan|unnest-map) + * The sorting PK process is optional, and some access methods may choose not to sort. * Note that for some index-based optimizations we do not remove the triggering * condition from the join, since the secondary index may only act as a filter, and the * final verification must still be done with the original join condition. + * * The basic outline of this rule is: * 1. Match operator pattern. * 2. Analyze join condition to see if there are optimizable functions (delegated to IAccessMethods). * 3. Check metadata to see if there are applicable indexes. * 4. Choose an index to apply (for now only a single index will be chosen). * 5. Rewrite plan using index (delegated to IAccessMethods). + * * For left-outer-join, additional patterns are checked and additional treatment is needed as follows: - * 1. First it checks if there is a groupByOp above the join: (groupby) <-- (leftouterjoin) + * 1. First it checks if there is a groupByOp above the join: groupby <-- leftouterjoin * 2. Inherently, only the right-subtree of the lojOp can be used as indexSubtree. - * So, the right-subtree must have at least one applicable index on join field(s) + * So, the right-subtree must have at least one applicable index on join field(s). * 3. If there is a groupByOp, the null placeholder variable introduced in groupByOp should be taken care of correctly. * Here, the primary key variable from datasourceScanOp replaces the introduced null placeholder variable. - * If the primary key is composite key, then the first variable of the primary key variables becomes the + * If the primary key is a composite key, then the first variable of the primary key variables becomes the * null place holder variable. This null placeholder variable works for all three types of indexes. + * + * If the inner-branch can be transformed as an index-only plan, this rule creates an index-only-plan path + * that is similar to one described in IntroduceSelectAccessMethod Rule. */ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethodRule { @@ -84,10 +89,10 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod protected final OptimizableOperatorSubTree leftSubTree = new OptimizableOperatorSubTree(); protected final OptimizableOperatorSubTree rightSubTree = new OptimizableOperatorSubTree(); protected IVariableTypeEnvironment typeEnvironment = null; - protected boolean isLeftOuterJoin = false; protected boolean hasGroupBy = true; + protected List<Mutable<ILogicalOperator>> afterJoinRefs = null; - // Register access methods. + // Registers access methods. protected static Map<FunctionIdentifier, List<IAccessMethod>> accessMethods = new HashMap<>(); static { @@ -97,8 +102,8 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod } /** - * Recursively check the given plan from the root operator to transform a plan - * with JOIN or LEFT-OUTER-JOIN operator into an index-utilized plan. + * Recursively checks the given plan from the root operator to transform a plan + * with INNERJOIN or LEFT-OUTER-JOIN operator into an index-utilized plan. */ @Override @@ -114,7 +119,7 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod return false; } - // Check whether this operator is the root, which is DISTRIBUTE_RESULT or SINK since + // Checks whether this operator is the root, which is DISTRIBUTE_RESULT or SINK since // we start the process from the root operator. if (op.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT && op.getOperatorTag() != LogicalOperatorTag.SINK @@ -127,7 +132,8 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod return false; } - // Recursively check the given plan whether the desired pattern exists in it. + afterJoinRefs = new ArrayList<>(); + // Recursively checks the given plan whether the desired pattern exists in it. // If so, try to optimize the plan. boolean planTransformed = checkAndApplyJoinTransformation(opRef, context); @@ -227,11 +233,11 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod joinRef = null; joinOp = null; joinCond = null; - isLeftOuterJoin = false; + afterJoinRefs = null; } /** - * Recursively traverse the given plan and check whether a INNERJOIN or LEFTOUTERJOIN operator exists. + * Recursively traverses the given plan and check whether a INNERJOIN or LEFTOUTERJOIN operator exists. * If one is found, maintain the path from the root to the given join operator and * optimize the path from the given join operator to the EMPTY_TUPLE_SOURCE operator * if it is not already optimized. @@ -241,6 +247,10 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); boolean joinFoundAndOptimizationApplied; + // Adds the current operator to the operator list that contains operators after a join operator + // in case there is a descendant join operator and it could be transformed first. + afterJoinRefs.add(opRef); + // Recursively check the plan and try to optimize it. We first check the children of the given operator // to make sure an earlier join in the path is optimized first. for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) { @@ -250,32 +260,39 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod } } - // Check the current operator pattern to see whether it is a JOIN or not. + // Now, we are sure that transformation attempts for earlier joins have been failed. + // Checks the current operator pattern to see whether it is a JOIN or not. boolean isThisOpInnerJoin = isInnerJoin(op); boolean isThisOpLeftOuterJoin = isLeftOuterJoin(op); boolean isParentOpGroupBy = hasGroupBy; Mutable<ILogicalOperator> joinRefFromThisOp = null; AbstractBinaryJoinOperator joinOpFromThisOp = null; - + // operators that need to be removed from the afterJoinRefs list. + Mutable<ILogicalOperator> opRefRemove = opRef; if (isThisOpInnerJoin) { - // Set join operator. + // Sets the join operator. joinRef = opRef; joinOp = (InnerJoinOperator) op; joinRefFromThisOp = opRef; joinOpFromThisOp = (InnerJoinOperator) op; } else if (isThisOpLeftOuterJoin) { - // Set left-outer-join op. - // The current operator is GROUP and the child of this op is LEFTOUERJOIN. + // Sets the left-outer-join operator. + // The current operator is GROUP and the child of this operator is LEFTOUERJOIN. joinRef = op.getInputs().get(0); joinOp = (LeftOuterJoinOperator) joinRef.getValue(); joinRefFromThisOp = op.getInputs().get(0); joinOpFromThisOp = (LeftOuterJoinOperator) joinRefFromThisOp.getValue(); + + // Group-by should not be removed at this point since the given left-outer-join can be transformed. + opRefRemove = op.getInputs().get(0); } + afterJoinRefs.remove(opRefRemove); - // For a JOIN case, try to transform the given plan. + // For a JOIN case, tries to transform the given plan. if (isThisOpInnerJoin || isThisOpLeftOuterJoin) { - // Restore the information from this operator since it might have been be set to null + + // Restores the information from this operator since it might have been be set to null // if there are other join operators in the earlier path. joinRef = joinRefFromThisOp; joinOp = joinOpFromThisOp; @@ -294,14 +311,14 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod analyzedAMs = new HashMap<>(); } - // Check the condition of JOIN operator is a function call since only function call can be transformed - // using available indexes. If so, initialize the subtree information that will be used later to decide + // Checks the condition of JOIN operator is a function call since only function call can be transformed + // using available indexes. If so, initializes the subtree information that will be used later to decide // whether the given plan is truly optimizable or not. if (continueCheck && !checkJoinOpConditionAndInitSubTree(context)) { continueCheck = false; } - // Analyze the condition of SELECT operator and initialize analyzedAMs. + // Analyzes the condition of SELECT operator and initializes analyzedAMs. // Check whether the function in the SELECT operator can be truly transformed. boolean matchInLeftSubTree = false; boolean matchInRightSubTree = false; @@ -316,7 +333,7 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod } } - // Find the dataset from the data-source and the record type of the dataset from the metadata. + // Finds the dataset from the data-source and the record type of the dataset from the metadata. // This will be used to find an applicable index on the dataset. boolean checkLeftSubTreeMetadata = false; boolean checkRightSubTreeMetadata = false; @@ -338,11 +355,11 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod } fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context); - // Prune the access methods based on the function expression and access methods. + // Prunes the access methods based on the function expression and access methods. pruneIndexCandidates(analyzedAMs, context, typeEnvironment); // If the right subtree (inner branch) has indexes, one of those indexes will be used. - // Remove the indexes from the outer branch in the optimizer's consideration list for this rule. + // Removes the indexes from the outer branch in the optimizer's consideration list for this rule. pruneIndexCandidatesFromOuterBranch(analyzedAMs); // We are going to use indexes from the inner branch. @@ -354,7 +371,16 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod } if (continueCheck) { - // Apply plan transformation using chosen index. + // Finds the field name of each variable in the sub-tree such as variables for order by. + // This step is required when checking index-only plan. + if (checkLeftSubTreeMetadata) { + fillFieldNamesInTheSubTree(leftSubTree); + } + if (checkRightSubTreeMetadata) { + fillFieldNamesInTheSubTree(rightSubTree); + } + + // Applies the plan transformation using chosen index. AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndex.first); // For LOJ with GroupBy, prepare objects to reset LOJ nullPlaceHolderVariable @@ -363,7 +389,7 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod analysisCtx.setLOJGroupbyOpRef(opRef); ScalarFunctionCallExpression isNullFuncExpr = AccessMethodUtils.findLOJIsMissingFuncInGroupBy((GroupByOperator) opRef.getValue()); - analysisCtx.setLOJIsNullFuncInGroupBy(isNullFuncExpr); + analysisCtx.setLOJIsMissingFuncInGroupBy(isNullFuncExpr); } Dataset indexDataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex.second); @@ -376,9 +402,10 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod return false; } - // Finally, try to apply plan transformation using chosen index. - boolean res = chosenIndex.first.applyJoinPlanTransformation(joinRef, leftSubTree, rightSubTree, - chosenIndex.second, analysisCtx, context, isThisOpLeftOuterJoin, isParentOpGroupBy); + // Finally, tries to apply plan transformation using the chosen index. + boolean res = chosenIndex.first.applyJoinPlanTransformation(afterJoinRefs, joinRef, leftSubTree, + rightSubTree, chosenIndex.second, analysisCtx, context, isThisOpLeftOuterJoin, + isParentOpGroupBy); // If the plan transformation is successful, we don't need to traverse the plan // any more, since if there are more JOIN operators, the next trigger on this plan @@ -393,11 +420,19 @@ public class IntroduceJoinAccessMethodRule extends AbstractIntroduceAccessMethod joinOp = null; } + // Checked the given left-outer-join operator and it is not transformed. So, this group-by operator + // after the left-outer-join operator should be removed from the afterJoinRefs list + // since the current operator is a group-by operator. + if (isThisOpLeftOuterJoin) { + afterJoinRefs.remove(opRef); + } + return false; } /** - * After the pattern is matched, check the condition and initialize the data sources from the both sub trees. + * After the pattern is matched, checks the condition and initializes the data source + * from the right (inner) sub tree. * * @throws AlgebricksException */ http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c3c23574/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceLSMComponentFilterRule.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceLSMComponentFilterRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceLSMComponentFilterRule.java index 546652b..7938f49 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceLSMComponentFilterRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceLSMComponentFilterRule.java @@ -62,6 +62,7 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperat import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.IntersectOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.SplitOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator; import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; @@ -79,7 +80,6 @@ public class IntroduceLSMComponentFilterRule implements IAlgebraicRewriteRule { @Override public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { - if (!checkIfRuleIsApplicable(opRef, context)) { return false; } @@ -232,23 +232,23 @@ public class IntroduceLSMComponentFilterRule implements IAlgebraicRewriteRule { queue.addAll(descendantOp.getInputs()); } if (hasSecondaryIndexMap && !primaryUnnestMapOps.isEmpty()) { - propagateFilterToPrimaryIndex(primaryUnnestMapOps, filterType, context); + propagateFilterToPrimaryIndex(primaryUnnestMapOps, filterType, context, false); } } private void propagateFilterToPrimaryIndex(List<UnnestMapOperator> primaryUnnestMapOps, IAType filterType, - IOptimizationContext context) throws AlgebricksException { + IOptimizationContext context, boolean isIndexOnlyPlan) throws AlgebricksException { for (UnnestMapOperator primaryOp : primaryUnnestMapOps) { Mutable<ILogicalOperator> assignOrOrderOrIntersect = primaryOp.getInputs().get(0); - Mutable<ILogicalOperator> intersectOrSort = assignOrOrderOrIntersect; + Mutable<ILogicalOperator> intersectOrSortOrSplit = assignOrOrderOrIntersect; if (assignOrOrderOrIntersect.getValue().getOperatorTag() == LogicalOperatorTag.ASSIGN) { - intersectOrSort = assignOrOrderOrIntersect.getValue().getInputs().get(0); + intersectOrSortOrSplit = assignOrOrderOrIntersect.getValue().getInputs().get(0); } - switch (intersectOrSort.getValue().getOperatorTag()) { + switch (intersectOrSortOrSplit.getValue().getOperatorTag()) { case INTERSECT: - IntersectOperator intersect = (IntersectOperator) (intersectOrSort.getValue()); + IntersectOperator intersect = (IntersectOperator) (intersectOrSortOrSplit.getValue()); List<List<LogicalVariable>> filterVars = new ArrayList<>(intersect.getInputs().size()); for (Mutable<ILogicalOperator> mutableOp : intersect.getInputs()) { ILogicalOperator child = mutableOp.getValue(); @@ -267,13 +267,30 @@ public class IntroduceLSMComponentFilterRule implements IAlgebraicRewriteRule { IntersectOperator intersectWithFilter = createIntersectWithFilter(outputFilterVars, filterVars, intersect); - intersectOrSort.setValue(intersectWithFilter); + intersectOrSortOrSplit.setValue(intersectWithFilter); context.computeAndSetTypeEnvironmentForOperator(intersectWithFilter); setPrimaryFilterVar(primaryOp, outputFilterVars.get(0), outputFilterVars.get(1), context); } break; + case SPLIT: + if (!isIndexOnlyPlan) { + throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_STATE, + intersectOrSortOrSplit.getValue().getOperatorTag().toString()); + } + SplitOperator split = (SplitOperator) (intersectOrSortOrSplit.getValue()); + for (Mutable<ILogicalOperator> childOp : split.getInputs()) { + ILogicalOperator child = childOp.getValue(); + while (!child.getOperatorTag().equals(LogicalOperatorTag.UNNEST_MAP)) { + child = child.getInputs().get(0).getValue(); + } + UnnestMapOperator unnestMap = (UnnestMapOperator) child; + propagateFilterInSecondaryUnnsetMap(unnestMap, filterType, context); + setPrimaryFilterVar(primaryOp, unnestMap.getPropagateIndexMinFilterVar(), + unnestMap.getPropagateIndexMaxFilterVar(), context); + } + break; case ORDER: - ILogicalOperator child = intersectOrSort.getValue().getInputs().get(0).getValue(); + ILogicalOperator child = intersectOrSortOrSplit.getValue().getInputs().get(0).getValue(); if (child.getOperatorTag().equals(LogicalOperatorTag.UNNEST_MAP)) { UnnestMapOperator secondaryMap = (UnnestMapOperator) child; @@ -283,9 +300,10 @@ public class IntroduceLSMComponentFilterRule implements IAlgebraicRewriteRule { secondaryMap.getPropagateIndexMaxFilterVar(), context); } break; + default: throw new CompilationException(ErrorCode.COMPILATION_ILLEGAL_STATE, - intersectOrSort.getValue().getOperatorTag().toString()); + intersectOrSortOrSplit.getValue().getOperatorTag().toString()); } } } @@ -337,6 +355,7 @@ public class IntroduceLSMComponentFilterRule implements IAlgebraicRewriteRule { IOptimizationContext context, IAType filterType) throws AlgebricksException { List<UnnestMapOperator> primaryUnnestMapOps = new ArrayList<>(); boolean hasSecondaryIndexMap = false; + boolean isIndexOnlyPlan = false; Queue<Mutable<ILogicalOperator>> queue = new LinkedList<>(op.getInputs()); while (!queue.isEmpty()) { ILogicalOperator descendantOp = queue.poll().getValue(); @@ -356,6 +375,7 @@ public class IntroduceLSMComponentFilterRule implements IAlgebraicRewriteRule { primaryUnnestMapOps.add(unnestMapOp); } else { hasSecondaryIndexMap = true; + isIndexOnlyPlan = unnestMapOp.getGenerateCallBackProceedResultVar(); } } } @@ -363,7 +383,7 @@ public class IntroduceLSMComponentFilterRule implements IAlgebraicRewriteRule { queue.addAll(descendantOp.getInputs()); } if (hasSecondaryIndexMap && !primaryUnnestMapOps.isEmpty()) { - propagateFilterToPrimaryIndex(primaryUnnestMapOps, filterType, context); + propagateFilterToPrimaryIndex(primaryUnnestMapOps, filterType, context, isIndexOnlyPlan); } } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c3c23574/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroducePrimaryIndexForAggregationRule.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroducePrimaryIndexForAggregationRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroducePrimaryIndexForAggregationRule.java index 7b8b906..5eb0fc6 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroducePrimaryIndexForAggregationRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroducePrimaryIndexForAggregationRule.java @@ -233,7 +233,7 @@ public class IntroducePrimaryIndexForAggregationRule implements IAlgebraicRewrit AbstractUnnestMapOperator primaryIndexUnnestOperator = (AbstractUnnestMapOperator) AccessMethodUtils.createSecondaryIndexUnnestMap(dataset, recordType, metaRecordType, primaryIndex, scanOperator.getInputs().get(0).getValue(), - newBTreeParameters, context, true, retainInput, false); + newBTreeParameters, context, retainInput, false, false); // re-use the PK variables of the original scan operator primaryIndexUnnestOperator.getVariables().clear(); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c3c23574/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java index d0e973f..27e5645 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java @@ -26,6 +26,8 @@ import java.util.Optional; import java.util.TreeMap; import org.apache.asterix.algebra.operators.CommitOperator; +import org.apache.asterix.common.exceptions.CompilationException; +import org.apache.asterix.common.exceptions.ErrorCode; import org.apache.asterix.metadata.declared.MetadataProvider; import org.apache.asterix.metadata.entities.Index; import org.apache.commons.lang3.mutable.Mutable; @@ -51,30 +53,62 @@ import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperat import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; /** - * This rule optimizes simple selections with secondary or primary indexes. The use of an - * index is expressed as an unnest-map over an index-search function which will be + * This rule optimizes simple selections with secondary or primary indexes. + * The use of an index is expressed as an UNNESTMAP operator over an index-search function which will be * replaced with the appropriate embodiment during codegen. - * Matches the following operator patterns: - * Standard secondary index pattern: - * There must be at least one assign, but there may be more, e.g., when matching similarity-jaccard-check(). - * (select) <-- (assign | unnest)+ <-- (datasource scan) - * Primary index lookup pattern: - * Since no assign is necessary to get the primary key fields (they are already stored fields in the BTree tuples). - * (select) <-- (datasource scan) - * Replaces the above patterns with this plan: - * (select) <-- (assign) <-- (btree search) <-- (sort) <-- (unnest-map(index search)) <-- (assign) - * The sort is optional, and some access methods implementations may choose not to sort. + * This rule seeks to change the following patterns. + * For the secondary-index searches, a SELECT operator is followed by one or more ASSIGN / UNNEST operators. + * A DATASOURCE_SCAN operator should be placed before these operators. + * For the primary-index search, a SELECT operator is followed by DATASOURE_SCAN operator since no ASSIGN / UNNEST + * operator is required to get the primary key fields (they are already stored fields in the BTree tuples). + * If the above pattern is found, this rule replaces the pattern with the following pattern. + * If the given plan is both a secondary-index search and an index-only plan, it builds two paths. + * The left path has a UNIONALL operator at the top. And the original SELECT operator is followed. Also, the original + * ASSIGN / UNNEST operators are followed. Then, UNNEST-MAP for the primary-index-search is followed + * to fetch the record. Before that, a SPLIT operator is introduced. Before this, an UNNEST-MAP for + * the secondary-index-search is followed to conduct a secondary-index search. The search key (original ASSIGN/UNNEST) + * to the secondary-index-search (UNNEST-MAP) is placed before that. + * The right path has the above UNIONALL operator at the top. Then, possibly has optional SELECT and/or ASSIGN/UNNEST + * operators for the composite BTree or RTree search cases. Then, the above SPLIT operator is followed. Before the SPLIT + * operator, it shares the same operators with the left path. + * To be qualified as an index-only plan, there are two conditions. + * 1) Search predicate can be covered by a secondary index-search. + * 2) there are only PK and/or SK fields in the return clause. + * If the given query satisfies the above conditions, we call it an index-only plan. + * The benefit of the index-only plan is that we don't need to traverse the primary index + * after fetching SK, PK entries from a secondary index. + * The index-only plan works as follows. + * 1) During a secondary-index search, after fetching <SK, PK> pair that satisfies the given predicate, + * we try to get an instantTryLock on PK to verify that <SK, PK> is a valid pair. + * If it succeeds, the given <SK, PK> pair is trustworthy so that we can return this as a valid output. + * This tuple goes to the right path of UNIONALL operator since we don't need to traverse the primary index. + * If instantTryLock on PK fails, an operation on the PK record is ongoing, so we need to traverse + * the primary index to fetch the entire record and verify the search predicate. So, this <SK, PK> pair + * goes to the left path of UNIONALL operator to traverse the primary index. + * In the left path, we fetch the record using the given PK and fetch SK field and does SELECT verification. + * 2) A UNIONALL operator combines tuples from the left path and the right path and the rest of the plan continues. + * In an index-only plan, sort before the primary index-search is not required since we assume that + * the chances that a tuple (<SK, PK> pair) goes into the left path are low. + * If the given query plan is not an index-only plan, we call this plan as non-index only plan. + * In this case, the original plan will be transformed into the following pattern. + * The original SELECT operator is placed at the top. And the original ASSIGN / UNNEST operators are followed. + * An UNNEST-MAP that conducts the primary-index-search to fetch the primary keys are placed before that. An ORDER + * operator is placed to sort the primary keys before feed them into the primary-index. Then, an UNNEST-MAP is followed + * to conduct a secondary-index search. Then, the search key (ASSIGN / UNNEST) is followed. + * In this case, the sort is optional, and some access methods implementations may choose not to sort. * Note that for some index-based optimizations we do not remove the triggering * condition from the select, since the index may only acts as a filter, and the * final verification must still be done with the original select condition. * The basic outline of this rule is: * 1. Match operator pattern. * 2. Analyze select condition to see if there are optimizable functions (delegated to IAccessMethods). - * 3. Check metadata to see if there are applicable indexes. - * 4. Choose an index to apply (for now only a single index will be chosen). - * 5. Rewrite plan using index (delegated to IAccessMethods). - * If there are multiple secondary index access path available, we will use the intersection operator to get the - * intersected primary key from all the secondary indexes. The detailed documentation is here + * 3. Check meta-data to see if there are applicable indexes. + * 4. Choose an index (or more indexes) to apply. + * 5. Rewrite the plan using index(es) (delegated to IAccessMethods). + * If multiple secondary index access paths are available, the optimizer uses the intersection operator + * to get the intersected primary key from all the chosen secondary indexes. In this case, we don't check + * whether the given plan is an index-only plan. + * The detailed documentation of intersecting multiple secondary indexes is here: * https://cwiki.apache.org/confluence/display/ASTERIXDB/Intersect+multiple+secondary+index */ public class IntroduceSelectAccessMethodRule extends AbstractIntroduceAccessMethodRule { @@ -169,26 +203,15 @@ public class IntroduceSelectAccessMethodRule extends AbstractIntroduceAccessMeth } /** - * Construct all applicable secondary index-based access paths in the given selection plan and - * intersect them using INTERSECT operator to guide to the common primary index search. - * In case where the applicable index is one, we only construct one path. + * Constructs all applicable secondary index-based access paths in the given selection plan and + * intersects them using INTERSECT operator to guide to the common primary-index search. + * This method assumes that there are two or more secondary indexes in the given path. */ private boolean intersectAllSecondaryIndexes(List<Pair<IAccessMethod, Index>> chosenIndexes, Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs, IOptimizationContext context) throws AlgebricksException { - Pair<IAccessMethod, Index> chosenIndex = null; - Optional<Pair<IAccessMethod, Index>> primaryIndex = - chosenIndexes.stream().filter(pair -> pair.second.isPrimaryIndex()).findFirst(); if (chosenIndexes.size() == 1) { - chosenIndex = chosenIndexes.get(0); - } else if (primaryIndex.isPresent()) { - // one primary + secondary indexes, choose the primary index directly. - chosenIndex = primaryIndex.get(); - } - if (chosenIndex != null) { - AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndex.first); - return chosenIndex.first.applySelectPlanTransformation(afterSelectRefs, selectRef, subTree, - chosenIndex.second, analysisCtx, context); + throw CompilationException.create(ErrorCode.CHOSEN_INDEX_COUNT_SHOULD_BE_GREATER_THAN_ONE); } // Intersect all secondary indexes, and postpone the primary index search. @@ -197,12 +220,13 @@ public class IntroduceSelectAccessMethodRule extends AbstractIntroduceAccessMeth List<ILogicalOperator> subRoots = new ArrayList<>(); for (Pair<IAccessMethod, Index> pair : chosenIndexes) { AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(pair.first); - subRoots.add(pair.first.createSecondaryToPrimaryPlan(conditionRef, subTree, null, pair.second, analysisCtx, + subRoots.add(pair.first.createIndexSearchPlan(afterSelectRefs, selectRef, conditionRef, + subTree.getAssignsAndUnnestsRefs(), subTree, null, pair.second, analysisCtx, AccessMethodUtils.retainInputs(subTree.getDataSourceVariables(), subTree.getDataSourceRef().getValue(), afterSelectRefs), false, subTree.getDataSourceRef().getValue().getInputs().get(0).getValue() .getExecutionMode() == ExecutionMode.UNPARTITIONED, - context)); + context, null)); } // Connect each secondary index utilization plan to a common intersect operator. ILogicalOperator primaryUnnestOp = connectAll2ndarySearchPlanWithIntersect(subRoots, context); @@ -212,6 +236,24 @@ public class IntroduceSelectAccessMethodRule extends AbstractIntroduceAccessMeth } /** + * Checks whether the primary index exists among the applicable indexes and return it if is exists. + * + * @param chosenIndexes + * @return Pair<IAccessMethod, Index> for the primary index + * null otherwise + * @throws AlgebricksException + */ + private Pair<IAccessMethod, Index> fetchPrimaryIndexAmongChosenIndexes( + List<Pair<IAccessMethod, Index>> chosenIndexes) throws AlgebricksException { + Optional<Pair<IAccessMethod, Index>> primaryIndex = + chosenIndexes.stream().filter(pair -> pair.second.isPrimaryIndex()).findFirst(); + if (primaryIndex.isPresent()) { + return primaryIndex.get(); + } + return null; + } + + /** * Connect each secondary index utilization plan to a common INTERSECT operator. */ private ILogicalOperator connectAll2ndarySearchPlanWithIntersect(List<ILogicalOperator> subRoots, @@ -352,9 +394,36 @@ public class IntroduceSelectAccessMethodRule extends AbstractIntroduceAccessMeth } // Apply plan transformation using chosen index. - boolean res = intersectAllSecondaryIndexes(chosenIndexes, analyzedAMs, context); - context.addToDontApplySet(this, selectOp); + boolean res; + + // Primary index applicable? + Pair<IAccessMethod, Index> chosenPrimaryIndex = fetchPrimaryIndexAmongChosenIndexes(chosenIndexes); + if (chosenPrimaryIndex != null) { + AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenPrimaryIndex.first); + res = chosenPrimaryIndex.first.applySelectPlanTransformation(afterSelectRefs, selectRef, subTree, + chosenPrimaryIndex.second, analysisCtx, context); + context.addToDontApplySet(this, selectRef.getValue()); + } else if (chosenIndexes.size() == 1) { + // Index-only plan possible? + // Gets the analysis context for the given index. + AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndexes.get(0).first); + + // Finds the field name of each variable in the sub-tree. + fillFieldNamesInTheSubTree(subTree); + + // Finally, try to apply plan transformation using chosen index. + res = chosenIndexes.get(0).first.applySelectPlanTransformation(afterSelectRefs, selectRef, subTree, + chosenIndexes.get(0).second, analysisCtx, context); + context.addToDontApplySet(this, selectRef.getValue()); + } else { + // Multiple secondary indexes applicable? + res = intersectAllSecondaryIndexes(chosenIndexes, analyzedAMs, context); + context.addToDontApplySet(this, selectRef.getValue()); + } + // If the plan transformation is successful, we don't need to traverse + // the plan any more, since if there are more SELECT operators, the next + // trigger on this plan will find them. if (res) { OperatorPropertiesUtil.typeOpRec(opRef, context); return res; @@ -366,7 +435,7 @@ public class IntroduceSelectAccessMethodRule extends AbstractIntroduceAccessMeth afterSelectRefs.add(opRef); } - // Clean the path after SELECT operator by removing the current operator in the list. + // Cleans the path after SELECT operator by removing the current operator in the list. afterSelectRefs.remove(opRef); return false;