Taewoo Kim has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/1350
Change subject: Index-only plan step 3: Top-down Select and Join transformation
rule
......................................................................
Index-only plan step 3: Top-down Select and Join transformation rule
- Converted IntroduceSelectAccessMethodRule and IntroduceJoinAccessMethodRule
from bottom-up approach to top-down approach.
- Index-only plan needs to verify the variables that are live in the select or
join condition
are the only variables to be used unless a variable is generated after the
select or join operator.
- In order to keep this information, top-down approach is introduced.
Change-Id: I60a2a61eb46851d4c16c8f17447e3ac9b0aca779
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
9 files changed, 504 insertions(+), 219 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/50/1350/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
index 8e78d1a..416caca 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
@@ -142,7 +142,7 @@
* process by making it more systematic.
*/
protected Pair<IAccessMethod, Index> chooseBestIndex(Map<IAccessMethod,
AccessMethodAnalysisContext> analyzedAMs) {
- List<Pair<IAccessMethod, Index>> list = chooseAllIndex(analyzedAMs);
+ List<Pair<IAccessMethod, Index>> list = chooseAllIndexes(analyzedAMs);
return list.isEmpty() ? null : list.get(0);
}
@@ -154,7 +154,7 @@
* [InvertedIndexAccessMethod, IndexType.SINGLE_PARTITION_WORD_INVIX ||
SINGLE_PARTITION_NGRAM_INVIX ||
* LENGTH_PARTITIONED_WORD_INVIX || LENGTH_PARTITIONED_NGRAM_INVIX]
*/
- protected List<Pair<IAccessMethod, Index>> chooseAllIndex(
+ protected List<Pair<IAccessMethod, Index>> chooseAllIndexes(
Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) {
List<Pair<IAccessMethod, Index>> result = new
ArrayList<Pair<IAccessMethod, Index>>();
// Use variables (fields) to the index types map to check which type
of indexes are applied for the vars.
@@ -361,25 +361,28 @@
*
* @throws AlgebricksException
*/
- protected boolean analyzeCondition(ILogicalExpression cond,
List<AbstractLogicalOperator> assignsAndUnnests,
+ protected boolean
analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(ILogicalExpression cond,
+ List<AbstractLogicalOperator> assignsAndUnnests,
Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs,
IOptimizationContext context,
IVariableTypeEnvironment typeEnvironment) throws
AlgebricksException {
AbstractFunctionCallExpression funcExpr =
(AbstractFunctionCallExpression) cond;
FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
- // Don't consider optimizing a disjunctive condition with an index (too
- // complicated for now).
+ // TODO: We don't consider a disjunctive condition with an index yet
since it's complex.
if (funcIdent == AlgebricksBuiltinFunctions.OR) {
return false;
}
- boolean found = analyzeFunctionExpr(funcExpr, assignsAndUnnests,
analyzedAMs, context, typeEnvironment);
+ // Find applicable access methods for the given function expression
based on the
+ // function identifier and its arguments. Update the analyzedAMs
accordingly.
+ boolean found = analyzeFunctionExprAndUpdateAnalyzedAM(funcExpr,
assignsAndUnnests, analyzedAMs, context,
+ typeEnvironment);
for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
ILogicalExpression argExpr = arg.getValue();
if (argExpr.getExpressionTag() !=
LogicalExpressionTag.FUNCTION_CALL) {
continue;
}
AbstractFunctionCallExpression argFuncExpr =
(AbstractFunctionCallExpression) argExpr;
- boolean matchFound = analyzeFunctionExpr(argFuncExpr,
assignsAndUnnests, analyzedAMs, context,
- typeEnvironment);
+ boolean matchFound =
analyzeFunctionExprAndUpdateAnalyzedAM(argFuncExpr, assignsAndUnnests,
analyzedAMs,
+ context, typeEnvironment);
found = found || matchFound;
}
return found;
@@ -392,7 +395,7 @@
*
* @throws AlgebricksException
*/
- protected boolean analyzeFunctionExpr(AbstractFunctionCallExpression
funcExpr,
+ protected boolean
analyzeFunctionExprAndUpdateAnalyzedAM(AbstractFunctionCallExpression funcExpr,
List<AbstractLogicalOperator> assignsAndUnnests,
Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs,
IOptimizationContext context,
IVariableTypeEnvironment typeEnvironment) throws
AlgebricksException {
@@ -486,9 +489,9 @@
}
for (IOptimizableFuncExpr optFuncExpr : analysisCtx.matchedFuncExprs) {
// Try to match variables from optFuncExpr to assigns or unnests.
- for (int assignOrUnnestIndex = 0; assignOrUnnestIndex <
subTree.getAssignsAndUnnests()
+ for (int assignOrUnnestIndex = 0; assignOrUnnestIndex <
subTree.getAssignsAndUnnestsOps()
.size(); assignOrUnnestIndex++) {
- AbstractLogicalOperator op =
subTree.getAssignsAndUnnests().get(assignOrUnnestIndex);
+ AbstractLogicalOperator op =
subTree.getAssignsAndUnnestsOps().get(assignOrUnnestIndex);
if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
analyzeAssignOp((AssignOperator) op, optFuncExpr, subTree,
assignOrUnnestIndex, datasetRecordVar,
datasetMetaVar, context, datasetIndexes,
optFuncExprIndex, analysisCtx);
@@ -679,7 +682,7 @@
// Get expression corresponding to opVar at varIndex.
AbstractLogicalExpression expr = null;
AbstractFunctionCallExpression childFuncExpr = null;
- AbstractLogicalOperator op =
subTree.getAssignsAndUnnests().get(opIndex);
+ AbstractLogicalOperator op =
subTree.getAssignsAndUnnestsOps().get(opIndex);
if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
AssignOperator assignOp = (AssignOperator) op;
expr = (AbstractLogicalExpression)
assignOp.getExpressions().get(assignVarIndex).getValue();
@@ -747,8 +750,8 @@
int[] assignAndExpressionIndexes = null;
//go forward through nested assigns until you find the relevant one
- for (int i = opIndex + 1; i <
subTree.getAssignsAndUnnests().size(); i++) {
- AbstractLogicalOperator subOp =
subTree.getAssignsAndUnnests().get(i);
+ for (int i = opIndex + 1; i <
subTree.getAssignsAndUnnestsOps().size(); i++) {
+ AbstractLogicalOperator subOp =
subTree.getAssignsAndUnnestsOps().get(i);
List<LogicalVariable> varList;
if (subOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
@@ -835,9 +838,9 @@
LogicalVariable curVar = ((VariableReferenceExpression)
argExpr).getVariableReference();
// We look for the assign or unnest operator that produces curVar below
// the current operator
- for (int assignOrUnnestIndex = opIndex + 1; assignOrUnnestIndex <
subTree.getAssignsAndUnnests()
+ for (int assignOrUnnestIndex = opIndex + 1; assignOrUnnestIndex <
subTree.getAssignsAndUnnestsOps()
.size(); assignOrUnnestIndex++) {
- AbstractLogicalOperator curOp =
subTree.getAssignsAndUnnests().get(assignOrUnnestIndex);
+ AbstractLogicalOperator curOp =
subTree.getAssignsAndUnnestsOps().get(assignOrUnnestIndex);
if (curOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
AssignOperator assignOp = (AssignOperator) curOp;
List<LogicalVariable> varList = assignOp.getVariables();
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
index ca5da98..c5ab422 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
@@ -23,10 +23,9 @@
import java.util.Map;
import java.util.TreeMap;
-import org.apache.commons.lang3.mutable.Mutable;
-
import org.apache.asterix.metadata.entities.Dataset;
import org.apache.asterix.metadata.entities.Index;
+import org.apache.commons.lang3.mutable.Mutable;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
@@ -44,7 +43,7 @@
public Map<Index, List<Pair<Integer, Integer>>> indexExprsAndVars = new
TreeMap<Index, List<Pair<Integer, Integer>>>();
// Maps from index to the dataset it is indexing.
- public Map<Index, Dataset> indexDatasetMap = new TreeMap<Index, Dataset>();
+ private Map<Index, Dataset> indexDatasetMap = new TreeMap<Index,
Dataset>();
// Maps from an index to the number of matched fields in the query plan
(for performing prefix search)
public Map<Index, Integer> indexNumMatchedKeys = new TreeMap<Index,
Integer>();
@@ -60,7 +59,7 @@
indexExprsAndVars.put(index, exprs);
}
exprs.add(new Pair<Integer, Integer>(exprIndex, varIndex));
- indexDatasetMap.put(index, dataset);
+ getIndexDatasetMap().put(index, dataset);
}
public List<Pair<Integer, Integer>> getIndexExprs(Index index) {
@@ -83,4 +82,12 @@
return lojIsNullFuncInGroupBy;
}
+ public Map<Index, Dataset> getIndexDatasetMap() {
+ return indexDatasetMap;
+ }
+
+ public void setIndexDatasetMap(Map<Index, Dataset> indexDatasetMap) {
+ this.indexDatasetMap = indexDatasetMap;
+ }
+
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
index eb7d3a4..075d9a0 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
@@ -166,7 +166,7 @@
Mutable<ILogicalExpression> conditionRef = joinOp.getCondition();
// Determine if the index is applicable on the left or right side
// (if both, we arbitrarily prefer the left side).
- Dataset dataset = analysisCtx.indexDatasetMap.get(chosenIndex);
+ Dataset dataset = analysisCtx.getIndexDatasetMap().get(chosenIndex);
OptimizableOperatorSubTree indexSubTree;
OptimizableOperatorSubTree probeSubTree;
// We assume that the left subtree is the outer branch and the right
subtree is the inner branch.
@@ -216,8 +216,8 @@
@Override
public ILogicalOperator
createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef,
OptimizableOperatorSubTree indexSubTree,
OptimizableOperatorSubTree probeSubTree, Index chosenIndex,
- AccessMethodAnalysisContext analysisCtx, boolean retainInput,
boolean retainNull,
- boolean requiresBroadcast, IOptimizationContext context) throws
AlgebricksException {
+ AccessMethodAnalysisContext analysisCtx, boolean retainInput,
boolean retainNull, boolean requiresBroadcast,
+ IOptimizationContext context) throws AlgebricksException {
Dataset dataset = indexSubTree.getDataset();
ARecordType recordType = indexSubTree.getRecordType();
ARecordType metaRecordType = indexSubTree.getMetaRecordType();
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
index 5691d57..e0ca121 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
@@ -30,7 +30,6 @@
import
org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import org.apache.hyracks.data.std.api.IDataOutputProvider;
/**
* Interface that an access method should implement to work with the rewrite
@@ -86,13 +85,9 @@
IOptimizationContext context) throws AlgebricksException;
public ILogicalOperator
createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef,
- OptimizableOperatorSubTree indexSubTree,
- OptimizableOperatorSubTree probeSubTree,
- Index chosenIndex,
- AccessMethodAnalysisContext analysisCtx,
- boolean retainInput, boolean retainNull, boolean requiresBroadcast,
- IOptimizationContext context)
- throws AlgebricksException;
+ OptimizableOperatorSubTree indexSubTree,
OptimizableOperatorSubTree probeSubTree, Index chosenIndex,
+ AccessMethodAnalysisContext analysisCtx, boolean retainInput,
boolean retainNull, boolean requiresBroadcast,
+ IOptimizationContext context) throws AlgebricksException;
/**
* Applies the plan transformation to use chosenIndex to optimize a join
query.
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
index ff4d219..9344f3f 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
@@ -24,6 +24,7 @@
import java.util.Map;
import org.apache.asterix.metadata.declared.AqlMetadataProvider;
+import org.apache.asterix.metadata.entities.Dataset;
import org.apache.asterix.metadata.entities.Index;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -53,7 +54,8 @@
* This rule tries to utilize an index on the inner relation.
* If that's not possible, it stops transforming the given join into an
index-nested-loop join.
* Replaces the above pattern with the following simplified plan:
- * (select) <-- (assign) <-- (btree search) <-- (sort) <-- (unnest(index
search)) <-- (assign) <-- (datasource scan | unnest-map)
+ * (select) <-- (assign) <-- (btree search) <-- (sort) <-- (unnest(index
search)) <-- (assign) <-- (A)
+ * (A) <-- (datasource scan | unnest-map)
* The sort is optional, and some access methods may choose not to sort.
* Note that for some index-based optimizations we do not remove the triggering
* condition from the join, since the secondary index may only act as a
filter, and the
@@ -76,16 +78,17 @@
public class IntroduceJoinAccessMethodRule extends
AbstractIntroduceAccessMethodRule {
protected Mutable<ILogicalOperator> joinRef = null;
- protected AbstractBinaryJoinOperator join = null;
+ protected AbstractBinaryJoinOperator joinOp = null;
protected AbstractFunctionCallExpression joinCond = null;
protected final OptimizableOperatorSubTree leftSubTree = new
OptimizableOperatorSubTree();
protected final OptimizableOperatorSubTree rightSubTree = new
OptimizableOperatorSubTree();
protected IVariableTypeEnvironment typeEnvironment = null;
protected boolean isLeftOuterJoin = false;
protected boolean hasGroupBy = true;
+ protected IOptimizationContext context = null;
// Register access methods.
- protected static Map<FunctionIdentifier, List<IAccessMethod>>
accessMethods = new HashMap<FunctionIdentifier, List<IAccessMethod>>();
+ protected static Map<FunctionIdentifier, List<IAccessMethod>>
accessMethods = new HashMap<>();
static {
registerAccessMethod(BTreeAccessMethod.INSTANCE, accessMethods);
@@ -93,90 +96,59 @@
registerAccessMethod(InvertedIndexAccessMethod.INSTANCE,
accessMethods);
}
+ /**
+ * Recursively check the given plan from the root operator to transform a
plan
+ * with JOIN or LEFT-OUTER-JOIN operator into an index-utilized plan.
+ */
+
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
throws AlgebricksException {
clear();
setMetadataDeclarations(context);
+ this.context = context;
- // Match operator pattern and initialize optimizable sub trees.
- if (!matchesOperatorPattern(opRef, context)) {
- return false;
- }
- // Analyze condition on those optimizable subtrees that have a
datasource scan.
- Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = new
HashMap<IAccessMethod, AccessMethodAnalysisContext>();
- boolean matchInLeftSubTree = false;
- boolean matchInRightSubTree = false;
- if (leftSubTree.hasDataSource()) {
- matchInLeftSubTree = analyzeCondition(joinCond,
leftSubTree.getAssignsAndUnnests(), analyzedAMs, context,
- typeEnvironment);
- }
- if (rightSubTree.hasDataSource()) {
- matchInRightSubTree = analyzeCondition(joinCond,
rightSubTree.getAssignsAndUnnests(), analyzedAMs, context,
- typeEnvironment);
- }
- if (!matchInLeftSubTree && !matchInRightSubTree) {
+ AbstractLogicalOperator op = (AbstractLogicalOperator)
opRef.getValue();
+
+ // Already checked?
+ if (context.checkIfInDontApplySet(this, op)) {
return false;
}
- // Set dataset and type metadata.
- AqlMetadataProvider metadataProvider = (AqlMetadataProvider)
context.getMetadataProvider();
- boolean checkLeftSubTreeMetadata = false;
- boolean checkRightSubTreeMetadata = false;
- if (matchInLeftSubTree) {
- checkLeftSubTreeMetadata =
leftSubTree.setDatasetAndTypeMetadata(metadataProvider);
- }
- if (matchInRightSubTree) {
- checkRightSubTreeMetadata =
rightSubTree.setDatasetAndTypeMetadata(metadataProvider);
- }
- if (!checkLeftSubTreeMetadata && !checkRightSubTreeMetadata) {
- return false;
- }
- if (checkLeftSubTreeMetadata) {
- fillSubTreeIndexExprs(leftSubTree, analyzedAMs, context);
- }
- if (checkRightSubTreeMetadata) {
- fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context);
- }
- pruneIndexCandidates(analyzedAMs, context, typeEnvironment);
-
- // We only consider indexes from the inner branch (right subTree).
- // If no index is available, then we stop this optimization.
- removeIndexCandidatesFromOuterBranch(analyzedAMs);
-
- // Choose an index from the inner branch that will be used.
- Pair<IAccessMethod, Index> chosenIndex = chooseBestIndex(analyzedAMs);
- if (chosenIndex == null) {
- context.addToDontApplySet(this, join);
+ // Check whether this operator is the root, which is DISTRIBUTE_RESULT
or SINK since
+ // we start the process from the root operator.
+ // The reason is that in order to qualify as an index-only plan, the
return clause
+ // should only include PK and/or SK fields. And after a JOIN operator,
no other variables
+ // other than PK and/or SK should be used unless variables are
generated after that JOIN operator.
+ if (op.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT
+ && op.getOperatorTag() != LogicalOperatorTag.SINK) {
return false;
}
- // Apply plan transformation using chosen index.
- AccessMethodAnalysisContext analysisCtx =
analyzedAMs.get(chosenIndex.first);
+ // Recursively check the given plan whether the desired pattern exists
in it.
+ // If so, try to optimize the plan.
+ boolean planTransformed = checkAndApplyTheRule(opRef);
- //For LOJ with GroupBy, prepare objects to reset LOJ
nullPlaceHolderVariable in GroupByOp
- if (isLeftOuterJoin && hasGroupBy) {
- analysisCtx.setLOJGroupbyOpRef(opRef);
- ScalarFunctionCallExpression isNullFuncExpr = AccessMethodUtils
- .findLOJIsMissingFuncInGroupBy((GroupByOperator)
opRef.getValue());
- analysisCtx.setLOJIsNullFuncInGroupBy(isNullFuncExpr);
+ if (joinOp != null) {
+ // We found an optimization here. Don't need to optimize this
operator again.
+ context.addToDontApplySet(this, joinOp);
}
- // At this point, we are sure that only an index from the inner branch
is going to be used.
- // So, the left subtree is the outer branch and the right subtree is
the inner branch.
- boolean res = chosenIndex.first.applyJoinPlanTransformation(joinRef,
leftSubTree, rightSubTree,
- chosenIndex.second, analysisCtx, context, isLeftOuterJoin,
hasGroupBy);
- if (res) {
+ if (!planTransformed) {
+ return false;
+ } else {
OperatorPropertiesUtil.typeOpRec(opRef, context);
}
- context.addToDontApplySet(this, join);
- return res;
+
+ return planTransformed;
}
/**
- * Removes indexes from the optimizer's consideration for this rule.
+ * Removes indexes from the outer branch from the optimizer's
consideration for this rule,
+ * since we only use indexes from the inner branch.
*/
- protected void removeIndexCandidatesFromOuterBranch(Map<IAccessMethod,
AccessMethodAnalysisContext> analyzedAMs) {
+ protected void pruneIndexCandidatesFromOuterBranch(Map<IAccessMethod,
AccessMethodAnalysisContext> analyzedAMs) {
+ // Inner branch is the right side branch of the given JOIN operator.
String innerDataset = null;
if (rightSubTree.getDataset() != null) {
innerDataset = rightSubTree.getDataset().getDatasetName();
@@ -200,8 +172,8 @@
Pair<Integer, Integer> exprAndVarIdx =
exprsAndVarIter.next();
IOptimizableFuncExpr optFuncExpr =
amCtx.matchedFuncExprs.get(exprAndVarIdx.first);
- // Does this index come from the inner branch?
- // We check the dataset name and the subtree to make sure
the index is applicable.
+ // We check the dataset name and the subtree to make sure
+ // that this index come from the inner branch.
if
(indexExprAndVarEntry.getKey().getDatasetName().equals(innerDataset)) {
if
(optFuncExpr.getOperatorSubTree(exprAndVarIdx.second).equals(rightSubTree)) {
indexFromInnerBranch = true;
@@ -209,8 +181,8 @@
}
}
- // If the index does not come from the inner branch,
- // We do not consider this index.
+ // If the given index does not come from the inner branch,
+ // prune this index so that the optimizer doesn't consider
this index in this rule.
if (!indexFromInnerBranch) {
indexIt.remove();
}
@@ -218,50 +190,11 @@
}
}
- protected boolean matchesOperatorPattern(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
- throws AlgebricksException {
- // First check that the operator is a join and its condition is a
function call.
- AbstractLogicalOperator op1 = (AbstractLogicalOperator)
opRef.getValue();
- if (context.checkIfInDontApplySet(this, op1)) {
- return false;
- }
-
- boolean isInnerJoin = isInnerJoin(op1);
- isLeftOuterJoin = isLeftOuterJoin(op1);
-
- if (!isInnerJoin && !isLeftOuterJoin) {
- return false;
- }
-
- // Set and analyze select.
- if (isInnerJoin) {
- joinRef = opRef;
- join = (InnerJoinOperator) op1;
- } else {
- joinRef = op1.getInputs().get(0);
- join = (LeftOuterJoinOperator) joinRef.getValue();
- }
-
- typeEnvironment = context.getOutputTypeEnvironment(join);
- // Check that the select's condition is a function call.
- ILogicalExpression condExpr = join.getCondition().getValue();
- if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL)
{
- return false;
- }
- joinCond = (AbstractFunctionCallExpression) condExpr;
- boolean leftSubTreeInitialized =
leftSubTree.initFromSubTree(join.getInputs().get(0));
- boolean rightSubTreeInitialized =
rightSubTree.initFromSubTree(join.getInputs().get(1));
- if (!leftSubTreeInitialized || !rightSubTreeInitialized) {
- return false;
- }
-
- // One of the subtrees must have a datasource scan.
- if (leftSubTree.hasDataSourceScan() ||
rightSubTree.hasDataSourceScan()) {
- return true;
- }
- return false;
- }
-
+ /**
+ * Checks whether the given operator is LEFTOUTERJOIN.
+ * If so, also checks that GROUPBY is placed after LEFTOUTERJOIN.
+ */
+ // Check whether (Groupby)? <-- Leftouterjoin
private boolean isLeftOuterJoin(AbstractLogicalOperator op1) {
if (op1.getInputs().size() != 1) {
return false;
@@ -277,6 +210,9 @@
return true;
}
+ /**
+ * Checks whether the given operator is INNERJOIN.
+ */
private boolean isInnerJoin(AbstractLogicalOperator op1) {
return op1.getOperatorTag() == LogicalOperatorTag.INNERJOIN;
}
@@ -288,8 +224,212 @@
private void clear() {
joinRef = null;
- join = null;
+ joinOp = null;
joinCond = null;
isLeftOuterJoin = false;
+ context = null;
}
+
+ /**
+ * Recursively traverse the given plan and check whether a INNERJOIN or
LEFTOUTERJOIN operator exists.
+ * If one is found, maintain the path from the root to the given join
operator and
+ * optimize the path from the given join operator to the
EMPTY_TUPLE_SOURCE operator
+ * if it is not already optimized.
+ */
+ protected boolean checkAndApplyTheRule(Mutable<ILogicalOperator> opRef)
throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator)
opRef.getValue();
+ boolean joinFoundAndOptimizationApplied = false;
+
+ // Check the current operator pattern to see whether it is a JOIN or
not.
+ boolean isThisOpInnerJoin = isInnerJoin(op);
+ isLeftOuterJoin = isLeftOuterJoin(op);
+
+ boolean isThisOpLeftOuterJoin = isLeftOuterJoin;
+ boolean isParentOpGroupBy = hasGroupBy;
+
+ Mutable<ILogicalOperator> joinRefFromThisOp = null;
+ AbstractBinaryJoinOperator joinOpFromThisOp = null;
+
+ if (isThisOpInnerJoin) {
+ // Set join operator.
+ joinRef = opRef;
+ joinOp = (InnerJoinOperator) op;
+ joinRefFromThisOp = opRef;
+ joinOpFromThisOp = (InnerJoinOperator) op;
+ } else if (isThisOpLeftOuterJoin) {
+ // Set left-outer-join op.
+ // The current operator is GROUP and the child of this op is
LEFTOUERJOIN.
+ joinRef = op.getInputs().get(0);
+ joinOp = (LeftOuterJoinOperator) joinRef.getValue();
+ joinRefFromThisOp = op.getInputs().get(0);
+ joinOpFromThisOp = (LeftOuterJoinOperator)
joinRefFromThisOp.getValue();
+ }
+
+ // Recursively check the plan and try to optimize it. We first check
the children of the given operator
+ // to make sure an earlier join in the path is optimized first.
+ for (int i = 0; i < op.getInputs().size(); i++) {
+ joinFoundAndOptimizationApplied =
checkAndApplyTheRule(op.getInputs().get(i));
+ if (joinFoundAndOptimizationApplied) {
+ return true;
+ }
+ }
+
+ // For a JOIN case, try to transform the given plan.
+ if (isThisOpInnerJoin || isThisOpLeftOuterJoin) {
+ // Restore the information from this operator since it might have
been be set to null
+ // if there are other join operators in the earlier path.
+ joinRef = joinRefFromThisOp;
+ joinOp = joinOpFromThisOp;
+ isLeftOuterJoin = isThisOpLeftOuterJoin;
+ hasGroupBy = isParentOpGroupBy;
+
+ // Already checked? If not, this operator may be optimized.
+ if (!context.checkIfInDontApplySet(this, joinOp)) {
+ // For each access method, contains the information about
+ // whether an available index can be applicable or not.
+ Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs =
new HashMap<>();
+
+ // Check the condition of JOIN operator is a function call
since
+ // only function call can be transformed using available
indexes.
+ // If so, initialize the subtree information that will be used
later to decide whether
+ // the given plan is truly optimizable or not.
+ if (checkJoinOpConditionAndInitSubTree()) {
+
+ // Analyze the condition of SELECT operator and initialize
analyzedAMs.
+ // Check whether the function in the SELECT operator can
be truly transformed.
+ boolean matchInLeftSubTree = false;
+ boolean matchInRightSubTree = false;
+ if (leftSubTree.hasDataSource()) {
+ matchInLeftSubTree =
analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond,
+ leftSubTree.getAssignsAndUnnestsOps(),
analyzedAMs, context, typeEnvironment);
+ }
+ if (rightSubTree.hasDataSource()) {
+ matchInRightSubTree =
analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond,
+ rightSubTree.getAssignsAndUnnestsOps(),
analyzedAMs, context, typeEnvironment);
+ }
+
+ // Find the dataset from the data-source and the record
type of the dataset from the metadata.
+ // This will be used to find an applicable index on the
dataset.
+ if (matchInLeftSubTree || matchInRightSubTree) {
+ // Set dataset and type metadata.
+ AqlMetadataProvider metadataProvider =
(AqlMetadataProvider) context.getMetadataProvider();
+ boolean checkLeftSubTreeMetadata = false;
+ boolean checkRightSubTreeMetadata = false;
+ if (matchInLeftSubTree) {
+ checkLeftSubTreeMetadata =
leftSubTree.setDatasetAndTypeMetadata(metadataProvider);
+ }
+ if (matchInRightSubTree) {
+ checkRightSubTreeMetadata =
rightSubTree.setDatasetAndTypeMetadata(metadataProvider);
+ }
+
+ if (checkLeftSubTreeMetadata ||
checkRightSubTreeMetadata) {
+ // Map variables to the applicable indexes and
find the field name and type.
+ // Then find the applicable indexes for the
variables used in the JOIN condition.
+ if (checkLeftSubTreeMetadata) {
+ fillSubTreeIndexExprs(leftSubTree,
analyzedAMs, context);
+ }
+ if (checkRightSubTreeMetadata) {
+ fillSubTreeIndexExprs(rightSubTree,
analyzedAMs, context);
+ }
+
+ // Prune the access methods based on the function
expression and access methods.
+ pruneIndexCandidates(analyzedAMs, context,
typeEnvironment);
+
+ // If the right subtree (inner branch) has
indexes, one of those indexes will be used.
+ // Remove the indexes from the outer branch
+ // in the optimizer's consideration list for this
rule.
+ pruneIndexCandidatesFromOuterBranch(analyzedAMs);
+
+ // We are going to use indexes from the inner
branch.
+ // If no index is available, then we stop here.
+ Pair<IAccessMethod, Index> chosenIndex =
chooseBestIndex(analyzedAMs);
+ if (chosenIndex != null) {
+ // Apply plan transformation using chosen
index.
+ AccessMethodAnalysisContext analysisCtx =
analyzedAMs.get(chosenIndex.first);
+
+ // For LOJ with GroupBy, prepare objects to
reset LOJ nullPlaceHolderVariable
+ // in GroupByOp.
+ if (isThisOpLeftOuterJoin &&
isParentOpGroupBy) {
+ analysisCtx.setLOJGroupbyOpRef(opRef);
+ ScalarFunctionCallExpression
isNullFuncExpr = AccessMethodUtils
+
.findLOJIsMissingFuncInGroupBy((GroupByOperator) opRef.getValue());
+
analysisCtx.setLOJIsNullFuncInGroupBy(isNullFuncExpr);
+ }
+
+ Dataset indexDataset =
analysisCtx.getIndexDatasetMap().get(chosenIndex.second);
+
+ OptimizableOperatorSubTree indexSubTree = null;
+
+ // We assume that the left subtree is the
outer branch and the right subtree
+ // is the inner branch. This assumption holds
true since we only use an index
+ // from the right subtree. The following is
just a sanity check.
+ if (rightSubTree.hasDataSourceScan() &&
indexDataset.getDatasetName()
+
.equals(rightSubTree.getDataset().getDatasetName())) {
+ indexSubTree = rightSubTree;
+ } else {
+ return false;
+ }
+
+ if (indexSubTree == null) {
+ return false;
+ }
+
+ // Finally, try to apply plan transformation
using chosen index.
+ boolean res =
chosenIndex.first.applyJoinPlanTransformation(joinRef, leftSubTree,
+ rightSubTree, chosenIndex.second,
analysisCtx, context, isLeftOuterJoin,
+ hasGroupBy);
+
+ // If the plan transformation is successful,
we don't need to traverse the plan
+ // any more, since if there are more JOIN
operators, the next trigger on this plan
+ // will find them.
+ if (res) {
+ return res;
+ }
+ } else {
+ context.addToDontApplySet(this, joinOp);
+ }
+
+ }
+
+ }
+ }
+
+ }
+ joinRef = null;
+ joinOp = null;
+ }
+
+ return false;
+ }
+
+ /**
+ * After the pattern is matched, check the condition and initialize the
data sources from the both sub trees.
+ *
+ * @throws AlgebricksException
+ */
+ protected boolean checkJoinOpConditionAndInitSubTree() throws
AlgebricksException {
+
+ typeEnvironment = context.getOutputTypeEnvironment(joinOp);
+
+ // Check that the join's condition is a function call.
+ ILogicalExpression condExpr = joinOp.getCondition().getValue();
+ if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL)
{
+ return false;
+ }
+ joinCond = (AbstractFunctionCallExpression) condExpr;
+
+ boolean leftSubTreeInitialized =
leftSubTree.initFromSubTree(joinOp.getInputs().get(0), context);
+ boolean rightSubTreeInitialized =
rightSubTree.initFromSubTree(joinOp.getInputs().get(1), context);
+
+ if (!leftSubTreeInitialized || !rightSubTreeInitialized) {
+ return false;
+ }
+
+ // One of the subtrees must have a datasource scan.
+ if (leftSubTree.hasDataSourceScan() ||
rightSubTree.hasDataSourceScan()) {
+ return true;
+ }
+ return false;
+ }
+
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
index 2464c06..5fc02db 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
@@ -20,11 +20,12 @@
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.TreeMap;
+import org.apache.asterix.algebra.operators.CommitOperator;
import org.apache.asterix.metadata.declared.AqlMetadataProvider;
import org.apache.asterix.metadata.entities.Index;
import org.apache.commons.lang3.mutable.Mutable;
@@ -42,6 +43,7 @@
import
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.DelegateOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.IntersectOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
@@ -79,13 +81,14 @@
// Operators representing the patterns to be matched:
// These ops are set in matchesPattern()
protected Mutable<ILogicalOperator> selectRef = null;
- protected SelectOperator select = null;
+ protected SelectOperator selectOp = null;
protected AbstractFunctionCallExpression selectCond = null;
protected IVariableTypeEnvironment typeEnvironment = null;
protected final OptimizableOperatorSubTree subTree = new
OptimizableOperatorSubTree();
+ protected IOptimizationContext context = null;
// Register access methods.
- protected static Map<FunctionIdentifier, List<IAccessMethod>>
accessMethods = new HashMap<FunctionIdentifier, List<IAccessMethod>>();
+ protected static Map<FunctionIdentifier, List<IAccessMethod>>
accessMethods = new HashMap<>();
static {
registerAccessMethod(BTreeAccessMethod.INSTANCE, accessMethods);
@@ -93,51 +96,85 @@
registerAccessMethod(InvertedIndexAccessMethod.INSTANCE,
accessMethods);
}
+ /**
+ * Recursively check the given plan from the root operator to transform a
plan
+ * with SELECT operator into an index-utilized plan.
+ */
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
throws AlgebricksException {
clear();
setMetadataDeclarations(context);
+ this.context = context;
- // Match operator pattern and initialize operator members.
- if (!matchesOperatorPattern(opRef, context)) {
+ AbstractLogicalOperator op = (AbstractLogicalOperator)
opRef.getValue();
+
+ // Already checked?
+ if (context.checkIfInDontApplySet(this, op)) {
return false;
}
- // Analyze select condition.
- Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = new
TreeMap<IAccessMethod, AccessMethodAnalysisContext>();
- if (!analyzeCondition(selectCond, subTree.getAssignsAndUnnests(),
analyzedAMs, context, typeEnvironment)) {
+ // Check whether this operator is the root, which is DISTRIBUTE_RESULT
or SINK since
+ // we start the process from the root operator.
+ if (op.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT
+ && op.getOperatorTag() != LogicalOperatorTag.SINK
+ && op.getOperatorTag() !=
LogicalOperatorTag.DELEGATE_OPERATOR) {
return false;
}
- // Set dataset and type metadata.
- if (!subTree.setDatasetAndTypeMetadata((AqlMetadataProvider)
context.getMetadataProvider())) {
+ if (op.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR
+ && !(((DelegateOperator) op).getDelegate() instanceof
CommitOperator)) {
return false;
}
- fillSubTreeIndexExprs(subTree, analyzedAMs, context);
- pruneIndexCandidates(analyzedAMs, context, typeEnvironment);
+ // Recursively check the given plan whether the desired pattern exists
in it.
+ // If so, try to optimize the plan.
+ boolean planTransformed =
checkAndApplyTheSelectTransformationRule(opRef);
- // Choose index to be applied.
- List<Pair<IAccessMethod, Index>> chosenIndexes =
chooseAllIndex(analyzedAMs);
- if (chosenIndexes == null || chosenIndexes.size() == 0) {
- context.addToDontApplySet(this, select);
- return false;
+ if (selectOp != null) {
+ // We found an optimization here. Don't need to optimize this
operator again.
+ context.addToDontApplySet(this, selectOp);
}
- // Apply plan transformation using chosen index.
- boolean res = intersectAllSecondaryIndexes(chosenIndexes, analyzedAMs,
context);
-
- if (res) {
+ if (!planTransformed) {
+ return false;
+ } else {
OperatorPropertiesUtil.typeOpRec(opRef, context);
}
- context.addToDontApplySet(this, select);
- return res;
+
+ return planTransformed;
}
+ /**
+ * Check that the given SELECT condition is a function call.
+ * Call initSubTree() to initialize the optimizable subtree that collects
information from
+ * the operators below the given SELECT operator.
+ * In order to transform the given plan, a datasource should be configured
+ * since we are going to transform a datasource into an unnest-map
operator.
+ */
+ protected boolean checkSelectOpConditionAndInitSubTree() throws
AlgebricksException {
+ // Set and analyze select.
+ ILogicalExpression condExpr = selectOp.getCondition().getValue();
+ typeEnvironment = context.getOutputTypeEnvironment(selectOp);
+ if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL)
{
+ return false;
+ }
+ selectCond = (AbstractFunctionCallExpression) condExpr;
+
+ // Initialize the subtree information.
+ // Match and put assign, unnest, and datasource information.
+ boolean res = subTree.initFromSubTree(selectOp.getInputs().get(0),
context);
+ return res && subTree.hasDataSourceScan();
+ }
+
+ /**
+ * Construct all applicable secondary index-based access paths in the
given selection plan and
+ * intersect them using INTERSECT operator to guide to the common primary
index search.
+ * In case where the applicable index is one, we only construct one path.
+ */
private boolean intersectAllSecondaryIndexes(List<Pair<IAccessMethod,
Index>> chosenIndexes,
Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs,
IOptimizationContext context)
- throws AlgebricksException {
+ throws AlgebricksException {
Pair<IAccessMethod, Index> chosenIndex = null;
Optional<Pair<IAccessMethod, Index>> primaryIndex =
chosenIndexes.stream()
.filter(pair -> pair.second.isPrimaryIndex()).findFirst();
@@ -153,8 +190,8 @@
context);
}
- // Intersect all secondary indexes, and postpone the primary index
search.
- Mutable<ILogicalExpression> conditionRef = select.getCondition();
+ // Intersect all secondary indexes and postpone the primary index
search.
+ Mutable<ILogicalExpression> conditionRef = selectOp.getCondition();
List<ILogicalOperator> subRoots = new ArrayList<>();
for (Pair<IAccessMethod, Index> pair : chosenIndexes) {
@@ -162,22 +199,26 @@
subRoots.add(pair.first.createSecondaryToPrimaryPlan(conditionRef,
subTree, null, pair.second, analysisCtx,
false, false, false, context));
}
- ILogicalOperator primaryUnnest =
connectAll2ndarySearchPlanWithIntersect(subRoots, context);
+ // Connect each secondary index utilization plan to a common intersect
operator.
+ ILogicalOperator primaryUnnestOp =
connectAll2ndarySearchPlanWithIntersect(subRoots, context);
- subTree.getDataSourceRef().setValue(primaryUnnest);
- return primaryUnnest != null;
+ subTree.getDataSourceRef().setValue(primaryUnnestOp);
+ return primaryUnnestOp != null;
}
+ /**
+ * Connect each secondary index utilization plan to a common INTERSECT
operator.
+ */
private ILogicalOperator
connectAll2ndarySearchPlanWithIntersect(List<ILogicalOperator> subRoots,
IOptimizationContext context) throws AlgebricksException {
ILogicalOperator lop = subRoots.get(0);
List<List<LogicalVariable>> inputVars = new
ArrayList<>(subRoots.size());
for (int i = 0; i < subRoots.size(); i++) {
if (lop.getOperatorTag() != subRoots.get(i).getOperatorTag()) {
- throw new AlgebricksException("The data source root should
have the same operator type");
+ throw new AlgebricksException("The data source root should
have the same operator type.");
}
if (lop.getInputs().size() != 1) {
- throw new AlgebricksException("The primary search has multiple
input");
+ throw new AlgebricksException("The primary search has multiple
inputs.");
}
ILogicalOperator curRoot = subRoots.get(i);
@@ -186,7 +227,8 @@
for (Pair<OrderOperator.IOrder, Mutable<ILogicalExpression>>
orderExpression : order
.getOrderExpressions()) {
if (orderExpression.second.getValue().getExpressionTag() !=
LogicalExpressionTag.VARIABLE) {
- throw new AlgebricksException("It should not happen, the
order by expression is not variables");
+ throw new AlgebricksException(
+ "The order by expression should be variables, but
they aren't variables.");
}
VariableReferenceExpression orderedVar =
(VariableReferenceExpression) orderExpression.second
.getValue();
@@ -205,29 +247,102 @@
return lop;
}
- protected boolean matchesOperatorPattern(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
+ /**
+ * Recursively traverse the given plan and check whether a SELECT operator
exists.
+ * If one is found, maintain the path from the root to SELECT operator and
+ * optimize the path from the SELECT operator to the EMPTY_TUPLE_SOURCE
operator
+ * if it is not already optimized.
+ */
+ protected boolean
checkAndApplyTheSelectTransformationRule(Mutable<ILogicalOperator> opRef)
throws AlgebricksException {
- // First check that the operator is a select and its condition is a
function call.
- AbstractLogicalOperator op1 = (AbstractLogicalOperator)
opRef.getValue();
- if (context.checkIfInDontApplySet(this, op1)) {
- return false;
- }
- if (op1.getOperatorTag() != LogicalOperatorTag.SELECT) {
- return false;
- }
- // Set and analyze select.
- selectRef = opRef;
- select = (SelectOperator) op1;
+ AbstractLogicalOperator op = (AbstractLogicalOperator)
opRef.getValue();
+ boolean selectFoundAndOptimizationApplied = false;
- typeEnvironment = context.getOutputTypeEnvironment(op1);
- // Check that the select's condition is a function call.
- ILogicalExpression condExpr = select.getCondition().getValue();
- if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL)
{
- return false;
+ // Traverse the plan until we find a SELECT operator.
+ if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
+ selectRef = opRef;
+ selectOp = (SelectOperator) op;
+
+ // Already checked this SELECT operator? If not, this operator may
be optimized.
+ if (!context.checkIfInDontApplySet(this, selectOp)) {
+ // For each access method, contains the information about
+ // whether an available index can be applicable or not.
+ Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs =
new LinkedHashMap<>();
+
+ // Check the condition of SELECT operator is a function call
since
+ // only function call can be transformed using available
indexes.
+ // If so, initialize the subtree information that will be used
later to decide whether
+ // the given plan is truly optimizable or not.
+ if (checkSelectOpConditionAndInitSubTree()) {
+
+ // We want to transform the SELECT operator in the
bottom-up way.
+ // Thus, if the subtree has SELECT operator, we let the
rule deal with that operator first.
+ if (!subTree.getSelectRefs().isEmpty()) {
+
+ // Analyze the condition of SELECT operator and
initialize analyzedAMs.
+ // Check whether the function in the SELECT operator
can be truly transformed.
+ if
(analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(selectCond,
+ subTree.getAssignsAndUnnestsOps(),
analyzedAMs, context, typeEnvironment)) {
+
+ // Find the dataset from the data-source and
+ // the record type of the dataset from the
metadata.
+ // This will be used to find an applicable index
on the dataset.
+ if (subTree
+
.setDatasetAndTypeMetadata((AqlMetadataProvider)
context.getMetadataProvider())) {
+
+ // Map variables to the applicable indexes and
find the field name and type.
+ // Then find the applicable indexes for the
variables used in the SELECT condition.
+ fillSubTreeIndexExprs(subTree, analyzedAMs,
context);
+
+ // Prune the access methods based on the
function expression and access methods.
+ pruneIndexCandidates(analyzedAMs, context,
typeEnvironment);
+
+ // Choose all indexes that will be applied.
+ List<Pair<IAccessMethod, Index>> chosenIndexes
= chooseAllIndexes(analyzedAMs);
+
+ if (chosenIndexes == null ||
chosenIndexes.isEmpty()) {
+ // We can't apply any index for this
SELECT operator
+ context.addToDontApplySet(this,
selectRef.getValue());
+ } else {
+ boolean res = false;
+
+ // If many secondary indexes are chosen,
then apply the intersection method.
+ if (chosenIndexes.size() > 1) {
+ res =
intersectAllSecondaryIndexes(chosenIndexes, analyzedAMs, context);
+ } else {
+ AccessMethodAnalysisContext
analysisCtx = analyzedAMs
+
.get(chosenIndexes.get(0).first);
+
+ // Finally, try to apply plan
transformation using chosen index.
+ res =
chosenIndexes.get(0).first.applySelectPlanTransformation(selectRef,
+ subTree,
chosenIndexes.get(0).second, analysisCtx, context);
+ }
+
+ // If the plan transformation is
successful, we don't need to traverse
+ // the plan any more, since if there are
more SELECT operators, the next
+ // trigger on this plan will find them.
+ if (res) {
+ return res;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ selectRef = null;
+ selectOp = null;
}
- selectCond = (AbstractFunctionCallExpression) condExpr;
- boolean res = subTree.initFromSubTree(op1.getInputs().get(0));
- return res && subTree.hasDataSourceScan();
+
+ // Recursively check the plan and try to optimize it.
+ for (int i = 0; i < op.getInputs().size(); i++) {
+ selectFoundAndOptimizationApplied =
checkAndApplyTheSelectTransformationRule(op.getInputs().get(i));
+ if (selectFoundAndOptimizationApplied) {
+ return true;
+ }
+ }
+
+ return false;
}
@Override
@@ -237,7 +352,9 @@
private void clear() {
selectRef = null;
- select = null;
+ selectOp = null;
selectCond = null;
+ context = null;
+ subTree.reset();
}
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
index 8f42904..9c034a2 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
@@ -452,7 +452,7 @@
AccessMethodAnalysisContext analysisCtx, IOptimizationContext
context, boolean isLeftOuterJoin,
boolean hasGroupBy) throws AlgebricksException {
// Figure out if the index is applicable on the left or right side (if
both, we arbitrarily prefer the left side).
- Dataset dataset = analysisCtx.indexDatasetMap.get(chosenIndex);
+ Dataset dataset = analysisCtx.getIndexDatasetMap().get(chosenIndex);
OptimizableOperatorSubTree indexSubTree;
OptimizableOperatorSubTree probeSubTree;
@@ -581,7 +581,7 @@
ILogicalExpression joinCond, IOptimizableFuncExpr optFuncExpr,
List<LogicalVariable> originalSubTreePKs,
List<LogicalVariable> surrogateSubTreePKs, IOptimizationContext
context) throws AlgebricksException {
- probeSubTree.getPrimaryKeyVars(originalSubTreePKs);
+ probeSubTree.getPrimaryKeyVars(null, originalSubTreePKs);
// Create two copies of the original probe subtree.
// The first copy, which becomes the new probe subtree, will retain
the primary-key and secondary-search key variables,
@@ -630,7 +630,7 @@
// Replace the original probe subtree with its copy.
Dataset origDataset = probeSubTree.getDataset();
ARecordType origRecordType = probeSubTree.getRecordType();
- probeSubTree.initFromSubTree(newProbeSubTreeRootRef);
+ probeSubTree.initFromSubTree(newProbeSubTreeRootRef, context);
probeSubTree.setDataset(origDataset);
probeSubTree.setRecordType(origRecordType);
@@ -1022,7 +1022,7 @@
return null;
}
- for (AbstractLogicalOperator op : subTree.getAssignsAndUnnests()) {
+ for (AbstractLogicalOperator op : subTree.getAssignsAndUnnestsOps()) {
if (op.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
continue;
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
index 01476e7..9998092 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
@@ -36,6 +36,7 @@
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
@@ -43,6 +44,7 @@
import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractScanOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestMapOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -65,7 +67,7 @@
private ILogicalOperator root = null;
private Mutable<ILogicalOperator> rootRef = null;
private final List<Mutable<ILogicalOperator>> assignsAndUnnestsRefs = new
ArrayList<>();
- private final List<AbstractLogicalOperator> assignsAndUnnests = new
ArrayList<>();
+ private final List<AbstractLogicalOperator> assignsAndUnnestsOps = new
ArrayList<>();
private Mutable<ILogicalOperator> dataSourceRef = null;
private DataSourceType dataSourceType = DataSourceType.NO_DATASOURCE;
@@ -81,19 +83,24 @@
private List<Dataset> ixJoinOuterAdditionalDatasets = null;
private List<ARecordType> ixJoinOuterAdditionalRecordTypes = null;
- public boolean initFromSubTree(Mutable<ILogicalOperator> subTreeOpRef)
throws AlgebricksException {
+ // SELECT operators
+ private List<Mutable<ILogicalOperator>> selectRefs = new ArrayList<>();
+
+ public boolean initFromSubTree(Mutable<ILogicalOperator> subTreeOpRef,
IOptimizationContext context)
+ throws AlgebricksException {
reset();
rootRef = subTreeOpRef;
root = subTreeOpRef.getValue();
// Examine the op's children to match the expected patterns.
AbstractLogicalOperator subTreeOp = (AbstractLogicalOperator)
subTreeOpRef.getValue();
do {
- // Skip select operator.
+ // Keep SELECT information if one is present.
if (subTreeOp.getOperatorTag() == LogicalOperatorTag.SELECT) {
+ getSelectRefs().add(subTreeOpRef);
subTreeOpRef = subTreeOp.getInputs().get(0);
subTreeOp = (AbstractLogicalOperator) subTreeOpRef.getValue();
}
- // Check primary-index pattern.
+ // Match datasource information if there are no (assign | unnest)*
if (subTreeOp.getOperatorTag() != LogicalOperatorTag.ASSIGN
&& subTreeOp.getOperatorTag() !=
LogicalOperatorTag.UNNEST) {
// Pattern may still match if we are looking for primary index
matches as well.
@@ -106,7 +113,7 @@
return false;
} else {
getAssignsAndUnnestsRefs().add(subTreeOpRef);
- getAssignsAndUnnests().add(subTreeOp);
+ getAssignsAndUnnestsOps().add(subTreeOp);
}
subTreeOpRef = subTreeOp.getInputs().get(0);
subTreeOp = (AbstractLogicalOperator) subTreeOpRef.getValue();
@@ -342,7 +349,7 @@
setRoot(null);
setRootRef(null);
getAssignsAndUnnestsRefs().clear();
- getAssignsAndUnnests().clear();
+ getAssignsAndUnnestsOps().clear();
setDataSourceRef(null);
setDataSourceType(DataSourceType.NO_DATASOURCE);
setIxJoinOuterAdditionalDataSourceRefs(null);
@@ -351,29 +358,37 @@
setIxJoinOuterAdditionalDatasets(null);
setRecordType(null);
setIxJoinOuterAdditionalRecordTypes(null);
+ clearSelectRefs();
}
- public void getPrimaryKeyVars(List<LogicalVariable> target) throws
AlgebricksException {
- switch (getDataSourceType()) {
+ /**
+ * Get primary key variables from the given data-source.
+ */
+ public void getPrimaryKeyVars(Mutable<ILogicalOperator>
dataSourceRefToFetch, List<LogicalVariable> target)
+ throws AlgebricksException {
+ Mutable<ILogicalOperator> dataSourceRefToFetchKey =
(dataSourceRefToFetch == null) ? dataSourceRef
+ : dataSourceRefToFetch;
+ switch (dataSourceType) {
case DATASOURCE_SCAN:
- DataSourceScanOperator dataSourceScan =
(DataSourceScanOperator) getDataSourceRef().getValue();
- int numPrimaryKeys =
DatasetUtils.getPartitioningKeys(getDataset()).size();
+ DataSourceScanOperator dataSourceScan =
(DataSourceScanOperator) dataSourceRefToFetchKey.getValue();
+ int numPrimaryKeys =
DatasetUtils.getPartitioningKeys(dataset).size();
for (int i = 0; i < numPrimaryKeys; i++) {
target.add(dataSourceScan.getVariables().get(i));
}
break;
case PRIMARY_INDEX_LOOKUP:
- UnnestMapOperator unnestMapOp = (UnnestMapOperator)
getDataSourceRef().getValue();
+ AbstractUnnestMapOperator unnestMapOp =
(AbstractUnnestMapOperator) dataSourceRefToFetchKey.getValue();
List<LogicalVariable> primaryKeys = null;
- primaryKeys =
AccessMethodUtils.getPrimaryKeyVarsFromPrimaryUnnestMap(getDataset(),
unnestMapOp);
+ primaryKeys =
AccessMethodUtils.getPrimaryKeyVarsFromPrimaryUnnestMap(dataset, unnestMapOp);
target.addAll(primaryKeys);
+ break;
+ case EXTERNAL_SCAN:
break;
case NO_DATASOURCE:
default:
throw new AlgebricksException("The subtree does not have any
data source.");
}
}
-
public List<LogicalVariable> getDataSourceVariables() throws
AlgebricksException {
switch (getDataSourceType()) {
case DATASOURCE_SCAN:
@@ -438,8 +453,8 @@
return assignsAndUnnestsRefs;
}
- public List<AbstractLogicalOperator> getAssignsAndUnnests() {
- return assignsAndUnnests;
+ public List<AbstractLogicalOperator> getAssignsAndUnnestsOps() {
+ return assignsAndUnnestsOps;
}
public Mutable<ILogicalOperator> getDataSourceRef() {
@@ -515,4 +530,12 @@
this.ixJoinOuterAdditionalRecordTypes =
ixJoinOuterAdditionalRecordTypes;
}
+ public void clearSelectRefs() {
+ selectRefs.clear();
+ }
+
+ public List<Mutable<ILogicalOperator>> getSelectRefs() {
+ return selectRefs;
+ }
+
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
index c3c162e..bc884d7 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
@@ -125,7 +125,7 @@
boolean hasGroupBy) throws AlgebricksException {
// Determine if the index is applicable on the left or right side (if
both, we arbitrarily prefer the left
// side).
- Dataset dataset = analysisCtx.indexDatasetMap.get(chosenIndex);
+ Dataset dataset = analysisCtx.getIndexDatasetMap().get(chosenIndex);
OptimizableOperatorSubTree indexSubTree;
OptimizableOperatorSubTree probeSubTree;
--
To view, visit https://asterix-gerrit.ics.uci.edu/1350
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I60a2a61eb46851d4c16c8f17447e3ac9b0aca779
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Taewoo Kim <[email protected]>