Change in asterixdb[master]: Index-only plan step 3: Top-down Select and Join transformat...

Taewoo Kim (Code Review) Thu, 17 Nov 2016 16:47:03 -0800

Taewoo Kim has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/1350


Change subject: Index-only plan step 3: Top-down Select and Join transformation 
rule
......................................................................

Index-only plan step 3: Top-down Select and Join transformation rule

 - Converted IntroduceSelectAccessMethodRule and IntroduceJoinAccessMethodRule
   from bottom-up approach to top-down approach.
 - Index-only plan needs to verify the variables that are live in the select or 
join condition
   are the only variables to be used unless a variable is generated after the 
select or join operator.
 - In order to keep this information, top-down approach is introduced.

Change-Id: I60a2a61eb46851d4c16c8f17447e3ac9b0aca779
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
9 files changed, 504 insertions(+), 219 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/50/1350/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
index 8e78d1a..416caca 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
@@ -142,7 +142,7 @@
      * process by making it more systematic.
      */
     protected Pair<IAccessMethod, Index> chooseBestIndex(Map<IAccessMethod, 
AccessMethodAnalysisContext> analyzedAMs) {
-        List<Pair<IAccessMethod, Index>> list = chooseAllIndex(analyzedAMs);
+        List<Pair<IAccessMethod, Index>> list = chooseAllIndexes(analyzedAMs);
         return list.isEmpty() ? null : list.get(0);
     }
 
@@ -154,7 +154,7 @@
      * [InvertedIndexAccessMethod, IndexType.SINGLE_PARTITION_WORD_INVIX || 
SINGLE_PARTITION_NGRAM_INVIX ||
      * LENGTH_PARTITIONED_WORD_INVIX || LENGTH_PARTITIONED_NGRAM_INVIX]
      */
-    protected List<Pair<IAccessMethod, Index>> chooseAllIndex(
+    protected List<Pair<IAccessMethod, Index>> chooseAllIndexes(
             Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) {
         List<Pair<IAccessMethod, Index>> result = new 
ArrayList<Pair<IAccessMethod, Index>>();
         // Use variables (fields) to the index types map to check which type 
of indexes are applied for the vars.
@@ -361,25 +361,28 @@
      *
      * @throws AlgebricksException
      */
-    protected boolean analyzeCondition(ILogicalExpression cond, 
List<AbstractLogicalOperator> assignsAndUnnests,
+    protected boolean 
analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(ILogicalExpression cond,
+            List<AbstractLogicalOperator> assignsAndUnnests,
             Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs, 
IOptimizationContext context,
             IVariableTypeEnvironment typeEnvironment) throws 
AlgebricksException {
         AbstractFunctionCallExpression funcExpr = 
(AbstractFunctionCallExpression) cond;
         FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
-        // Don't consider optimizing a disjunctive condition with an index (too
-        // complicated for now).
+        // TODO: We don't consider a disjunctive condition with an index yet 
since it's complex.
         if (funcIdent == AlgebricksBuiltinFunctions.OR) {
             return false;
         }
-        boolean found = analyzeFunctionExpr(funcExpr, assignsAndUnnests, 
analyzedAMs, context, typeEnvironment);
+        // Find applicable access methods for the given function expression 
based on the
+        // function identifier and its arguments. Update the analyzedAMs 
accordingly.
+        boolean found = analyzeFunctionExprAndUpdateAnalyzedAM(funcExpr, 
assignsAndUnnests, analyzedAMs, context,
+                typeEnvironment);
         for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
             ILogicalExpression argExpr = arg.getValue();
             if (argExpr.getExpressionTag() != 
LogicalExpressionTag.FUNCTION_CALL) {
                 continue;
             }
             AbstractFunctionCallExpression argFuncExpr = 
(AbstractFunctionCallExpression) argExpr;
-            boolean matchFound = analyzeFunctionExpr(argFuncExpr, 
assignsAndUnnests, analyzedAMs, context,
-                    typeEnvironment);
+            boolean matchFound = 
analyzeFunctionExprAndUpdateAnalyzedAM(argFuncExpr, assignsAndUnnests, 
analyzedAMs,
+                    context, typeEnvironment);
             found = found || matchFound;
         }
         return found;
@@ -392,7 +395,7 @@
      *
      * @throws AlgebricksException
      */
-    protected boolean analyzeFunctionExpr(AbstractFunctionCallExpression 
funcExpr,
+    protected boolean 
analyzeFunctionExprAndUpdateAnalyzedAM(AbstractFunctionCallExpression funcExpr,
             List<AbstractLogicalOperator> assignsAndUnnests,
             Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs, 
IOptimizationContext context,
             IVariableTypeEnvironment typeEnvironment) throws 
AlgebricksException {
@@ -486,9 +489,9 @@
         }
         for (IOptimizableFuncExpr optFuncExpr : analysisCtx.matchedFuncExprs) {
             // Try to match variables from optFuncExpr to assigns or unnests.
-            for (int assignOrUnnestIndex = 0; assignOrUnnestIndex < 
subTree.getAssignsAndUnnests()
+            for (int assignOrUnnestIndex = 0; assignOrUnnestIndex < 
subTree.getAssignsAndUnnestsOps()
                     .size(); assignOrUnnestIndex++) {
-                AbstractLogicalOperator op = 
subTree.getAssignsAndUnnests().get(assignOrUnnestIndex);
+                AbstractLogicalOperator op = 
subTree.getAssignsAndUnnestsOps().get(assignOrUnnestIndex);
                 if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
                     analyzeAssignOp((AssignOperator) op, optFuncExpr, subTree, 
assignOrUnnestIndex, datasetRecordVar,
                             datasetMetaVar, context, datasetIndexes, 
optFuncExprIndex, analysisCtx);
@@ -679,7 +682,7 @@
         // Get expression corresponding to opVar at varIndex.
         AbstractLogicalExpression expr = null;
         AbstractFunctionCallExpression childFuncExpr = null;
-        AbstractLogicalOperator op = 
subTree.getAssignsAndUnnests().get(opIndex);
+        AbstractLogicalOperator op = 
subTree.getAssignsAndUnnestsOps().get(opIndex);
         if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
             AssignOperator assignOp = (AssignOperator) op;
             expr = (AbstractLogicalExpression) 
assignOp.getExpressions().get(assignVarIndex).getValue();
@@ -747,8 +750,8 @@
             int[] assignAndExpressionIndexes = null;
 
             //go forward through nested assigns until you find the relevant one
-            for (int i = opIndex + 1; i < 
subTree.getAssignsAndUnnests().size(); i++) {
-                AbstractLogicalOperator subOp = 
subTree.getAssignsAndUnnests().get(i);
+            for (int i = opIndex + 1; i < 
subTree.getAssignsAndUnnestsOps().size(); i++) {
+                AbstractLogicalOperator subOp = 
subTree.getAssignsAndUnnestsOps().get(i);
                 List<LogicalVariable> varList;
 
                 if (subOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
@@ -835,9 +838,9 @@
         LogicalVariable curVar = ((VariableReferenceExpression) 
argExpr).getVariableReference();
         // We look for the assign or unnest operator that produces curVar below
         // the current operator
-        for (int assignOrUnnestIndex = opIndex + 1; assignOrUnnestIndex < 
subTree.getAssignsAndUnnests()
+        for (int assignOrUnnestIndex = opIndex + 1; assignOrUnnestIndex < 
subTree.getAssignsAndUnnestsOps()
                 .size(); assignOrUnnestIndex++) {
-            AbstractLogicalOperator curOp = 
subTree.getAssignsAndUnnests().get(assignOrUnnestIndex);
+            AbstractLogicalOperator curOp = 
subTree.getAssignsAndUnnestsOps().get(assignOrUnnestIndex);
             if (curOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
                 AssignOperator assignOp = (AssignOperator) curOp;
                 List<LogicalVariable> varList = assignOp.getVariables();
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
index ca5da98..c5ab422 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodAnalysisContext.java
@@ -23,10 +23,9 @@
 import java.util.Map;
 import java.util.TreeMap;
 
-import org.apache.commons.lang3.mutable.Mutable;
-
 import org.apache.asterix.metadata.entities.Dataset;
 import org.apache.asterix.metadata.entities.Index;
+import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
@@ -44,7 +43,7 @@
     public Map<Index, List<Pair<Integer, Integer>>> indexExprsAndVars = new 
TreeMap<Index, List<Pair<Integer, Integer>>>();
 
     // Maps from index to the dataset it is indexing.
-    public Map<Index, Dataset> indexDatasetMap = new TreeMap<Index, Dataset>();
+    private Map<Index, Dataset> indexDatasetMap = new TreeMap<Index, 
Dataset>();
 
     // Maps from an index to the number of matched fields in the query plan 
(for performing prefix search)
     public Map<Index, Integer> indexNumMatchedKeys = new TreeMap<Index, 
Integer>();
@@ -60,7 +59,7 @@
             indexExprsAndVars.put(index, exprs);
         }
         exprs.add(new Pair<Integer, Integer>(exprIndex, varIndex));
-        indexDatasetMap.put(index, dataset);
+        getIndexDatasetMap().put(index, dataset);
     }
 
     public List<Pair<Integer, Integer>> getIndexExprs(Index index) {
@@ -83,4 +82,12 @@
         return lojIsNullFuncInGroupBy;
     }
 
+    public Map<Index, Dataset> getIndexDatasetMap() {
+        return indexDatasetMap;
+    }
+
+    public void setIndexDatasetMap(Map<Index, Dataset> indexDatasetMap) {
+        this.indexDatasetMap = indexDatasetMap;
+    }
+
 }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
index eb7d3a4..075d9a0 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/BTreeAccessMethod.java
@@ -166,7 +166,7 @@
         Mutable<ILogicalExpression> conditionRef = joinOp.getCondition();
         // Determine if the index is applicable on the left or right side
         // (if both, we arbitrarily prefer the left side).
-        Dataset dataset = analysisCtx.indexDatasetMap.get(chosenIndex);
+        Dataset dataset = analysisCtx.getIndexDatasetMap().get(chosenIndex);
         OptimizableOperatorSubTree indexSubTree;
         OptimizableOperatorSubTree probeSubTree;
         // We assume that the left subtree is the outer branch and the right 
subtree is the inner branch.
@@ -216,8 +216,8 @@
     @Override
     public ILogicalOperator 
createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef,
             OptimizableOperatorSubTree indexSubTree, 
OptimizableOperatorSubTree probeSubTree, Index chosenIndex,
-            AccessMethodAnalysisContext analysisCtx, boolean retainInput, 
boolean retainNull,
-            boolean requiresBroadcast, IOptimizationContext context) throws 
AlgebricksException {
+            AccessMethodAnalysisContext analysisCtx, boolean retainInput, 
boolean retainNull, boolean requiresBroadcast,
+            IOptimizationContext context) throws AlgebricksException {
         Dataset dataset = indexSubTree.getDataset();
         ARecordType recordType = indexSubTree.getRecordType();
         ARecordType metaRecordType = indexSubTree.getMetaRecordType();
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
index 5691d57..e0ca121 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IAccessMethod.java
@@ -30,7 +30,6 @@
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import org.apache.hyracks.data.std.api.IDataOutputProvider;
 
 /**
  * Interface that an access method should implement to work with the rewrite
@@ -86,13 +85,9 @@
             IOptimizationContext context) throws AlgebricksException;
 
     public ILogicalOperator 
createSecondaryToPrimaryPlan(Mutable<ILogicalExpression> conditionRef,
-            OptimizableOperatorSubTree indexSubTree,
-            OptimizableOperatorSubTree probeSubTree,
-            Index chosenIndex,
-            AccessMethodAnalysisContext analysisCtx,
-            boolean retainInput, boolean retainNull, boolean requiresBroadcast,
-            IOptimizationContext context)
-                    throws AlgebricksException;
+            OptimizableOperatorSubTree indexSubTree, 
OptimizableOperatorSubTree probeSubTree, Index chosenIndex,
+            AccessMethodAnalysisContext analysisCtx, boolean retainInput, 
boolean retainNull, boolean requiresBroadcast,
+            IOptimizationContext context) throws AlgebricksException;
 
     /**
      * Applies the plan transformation to use chosenIndex to optimize a join 
query.
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
index ff4d219..9344f3f 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java
@@ -24,6 +24,7 @@
 import java.util.Map;
 
 import org.apache.asterix.metadata.declared.AqlMetadataProvider;
+import org.apache.asterix.metadata.entities.Dataset;
 import org.apache.asterix.metadata.entities.Index;
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -53,7 +54,8 @@
  * This rule tries to utilize an index on the inner relation.
  * If that's not possible, it stops transforming the given join into an 
index-nested-loop join.
  * Replaces the above pattern with the following simplified plan:
- * (select) <-- (assign) <-- (btree search) <-- (sort) <-- (unnest(index 
search)) <-- (assign) <-- (datasource scan | unnest-map)
+ * (select) <-- (assign) <-- (btree search) <-- (sort) <-- (unnest(index 
search)) <-- (assign) <-- (A)
+ * (A) <-- (datasource scan | unnest-map)
  * The sort is optional, and some access methods may choose not to sort.
  * Note that for some index-based optimizations we do not remove the triggering
  * condition from the join, since the secondary index may only act as a 
filter, and the
@@ -76,16 +78,17 @@
 public class IntroduceJoinAccessMethodRule extends 
AbstractIntroduceAccessMethodRule {
 
     protected Mutable<ILogicalOperator> joinRef = null;
-    protected AbstractBinaryJoinOperator join = null;
+    protected AbstractBinaryJoinOperator joinOp = null;
     protected AbstractFunctionCallExpression joinCond = null;
     protected final OptimizableOperatorSubTree leftSubTree = new 
OptimizableOperatorSubTree();
     protected final OptimizableOperatorSubTree rightSubTree = new 
OptimizableOperatorSubTree();
     protected IVariableTypeEnvironment typeEnvironment = null;
     protected boolean isLeftOuterJoin = false;
     protected boolean hasGroupBy = true;
+    protected IOptimizationContext context = null;
 
     // Register access methods.
-    protected static Map<FunctionIdentifier, List<IAccessMethod>> 
accessMethods = new HashMap<FunctionIdentifier, List<IAccessMethod>>();
+    protected static Map<FunctionIdentifier, List<IAccessMethod>> 
accessMethods = new HashMap<>();
 
     static {
         registerAccessMethod(BTreeAccessMethod.INSTANCE, accessMethods);
@@ -93,90 +96,59 @@
         registerAccessMethod(InvertedIndexAccessMethod.INSTANCE, 
accessMethods);
     }
 
+    /**
+     * Recursively check the given plan from the root operator to transform a 
plan
+     * with JOIN or LEFT-OUTER-JOIN operator into an index-utilized plan.
+     */
+
     @Override
     public boolean rewritePost(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
             throws AlgebricksException {
         clear();
         setMetadataDeclarations(context);
+        this.context = context;
 
-        // Match operator pattern and initialize optimizable sub trees.
-        if (!matchesOperatorPattern(opRef, context)) {
-            return false;
-        }
-        // Analyze condition on those optimizable subtrees that have a 
datasource scan.
-        Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = new 
HashMap<IAccessMethod, AccessMethodAnalysisContext>();
-        boolean matchInLeftSubTree = false;
-        boolean matchInRightSubTree = false;
-        if (leftSubTree.hasDataSource()) {
-            matchInLeftSubTree = analyzeCondition(joinCond, 
leftSubTree.getAssignsAndUnnests(), analyzedAMs, context,
-                    typeEnvironment);
-        }
-        if (rightSubTree.hasDataSource()) {
-            matchInRightSubTree = analyzeCondition(joinCond, 
rightSubTree.getAssignsAndUnnests(), analyzedAMs, context,
-                    typeEnvironment);
-        }
-        if (!matchInLeftSubTree && !matchInRightSubTree) {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) 
opRef.getValue();
+
+        // Already checked?
+        if (context.checkIfInDontApplySet(this, op)) {
             return false;
         }
 
-        // Set dataset and type metadata.
-        AqlMetadataProvider metadataProvider = (AqlMetadataProvider) 
context.getMetadataProvider();
-        boolean checkLeftSubTreeMetadata = false;
-        boolean checkRightSubTreeMetadata = false;
-        if (matchInLeftSubTree) {
-            checkLeftSubTreeMetadata = 
leftSubTree.setDatasetAndTypeMetadata(metadataProvider);
-        }
-        if (matchInRightSubTree) {
-            checkRightSubTreeMetadata = 
rightSubTree.setDatasetAndTypeMetadata(metadataProvider);
-        }
-        if (!checkLeftSubTreeMetadata && !checkRightSubTreeMetadata) {
-            return false;
-        }
-        if (checkLeftSubTreeMetadata) {
-            fillSubTreeIndexExprs(leftSubTree, analyzedAMs, context);
-        }
-        if (checkRightSubTreeMetadata) {
-            fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context);
-        }
-        pruneIndexCandidates(analyzedAMs, context, typeEnvironment);
-
-        // We only consider indexes from the inner branch (right subTree).
-        // If no index is available, then we stop this optimization.
-        removeIndexCandidatesFromOuterBranch(analyzedAMs);
-
-        // Choose an index from the inner branch that will be used.
-        Pair<IAccessMethod, Index> chosenIndex = chooseBestIndex(analyzedAMs);
-        if (chosenIndex == null) {
-            context.addToDontApplySet(this, join);
+        // Check whether this operator is the root, which is DISTRIBUTE_RESULT 
or SINK since
+        // we start the process from the root operator.
+        // The reason is that in order to qualify as an index-only plan, the 
return clause
+        // should only include PK and/or SK fields. And after a JOIN operator, 
no other variables
+        // other than PK and/or SK should be used unless variables are 
generated after that JOIN operator.
+        if (op.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT
+                && op.getOperatorTag() != LogicalOperatorTag.SINK) {
             return false;
         }
 
-        // Apply plan transformation using chosen index.
-        AccessMethodAnalysisContext analysisCtx = 
analyzedAMs.get(chosenIndex.first);
+        // Recursively check the given plan whether the desired pattern exists 
in it.
+        // If so, try to optimize the plan.
+        boolean planTransformed = checkAndApplyTheRule(opRef);
 
-        //For LOJ with GroupBy, prepare objects to reset LOJ 
nullPlaceHolderVariable in GroupByOp
-        if (isLeftOuterJoin && hasGroupBy) {
-            analysisCtx.setLOJGroupbyOpRef(opRef);
-            ScalarFunctionCallExpression isNullFuncExpr = AccessMethodUtils
-                    .findLOJIsMissingFuncInGroupBy((GroupByOperator) 
opRef.getValue());
-            analysisCtx.setLOJIsNullFuncInGroupBy(isNullFuncExpr);
+        if (joinOp != null) {
+            // We found an optimization here. Don't need to optimize this 
operator again.
+            context.addToDontApplySet(this, joinOp);
         }
 
-        // At this point, we are sure that only an index from the inner branch 
is going to be used.
-        // So, the left subtree is the outer branch and the right subtree is 
the inner branch.
-        boolean res = chosenIndex.first.applyJoinPlanTransformation(joinRef, 
leftSubTree, rightSubTree,
-                chosenIndex.second, analysisCtx, context, isLeftOuterJoin, 
hasGroupBy);
-        if (res) {
+        if (!planTransformed) {
+            return false;
+        } else {
             OperatorPropertiesUtil.typeOpRec(opRef, context);
         }
-        context.addToDontApplySet(this, join);
-        return res;
+
+        return planTransformed;
     }
 
     /**
-     * Removes indexes from the optimizer's consideration for this rule.
+     * Removes indexes from the outer branch from the optimizer's 
consideration for this rule,
+     * since we only use indexes from the inner branch.
      */
-    protected void removeIndexCandidatesFromOuterBranch(Map<IAccessMethod, 
AccessMethodAnalysisContext> analyzedAMs) {
+    protected void pruneIndexCandidatesFromOuterBranch(Map<IAccessMethod, 
AccessMethodAnalysisContext> analyzedAMs) {
+        // Inner branch is the right side branch of the given JOIN operator.
         String innerDataset = null;
         if (rightSubTree.getDataset() != null) {
             innerDataset = rightSubTree.getDataset().getDatasetName();
@@ -200,8 +172,8 @@
                     Pair<Integer, Integer> exprAndVarIdx = 
exprsAndVarIter.next();
                     IOptimizableFuncExpr optFuncExpr = 
amCtx.matchedFuncExprs.get(exprAndVarIdx.first);
 
-                    // Does this index come from the inner branch?
-                    // We check the dataset name and the subtree to make sure 
the index is applicable.
+                    // We check the dataset name and the subtree to make sure
+                    // that this index come from the inner branch.
                     if 
(indexExprAndVarEntry.getKey().getDatasetName().equals(innerDataset)) {
                         if 
(optFuncExpr.getOperatorSubTree(exprAndVarIdx.second).equals(rightSubTree)) {
                             indexFromInnerBranch = true;
@@ -209,8 +181,8 @@
                     }
                 }
 
-                // If the index does not come from the inner branch,
-                // We do not consider this index.
+                // If the given index does not come from the inner branch,
+                // prune this index so that the optimizer doesn't consider 
this index in this rule.
                 if (!indexFromInnerBranch) {
                     indexIt.remove();
                 }
@@ -218,50 +190,11 @@
         }
     }
 
-    protected boolean matchesOperatorPattern(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
-            throws AlgebricksException {
-        // First check that the operator is a join and its condition is a 
function call.
-        AbstractLogicalOperator op1 = (AbstractLogicalOperator) 
opRef.getValue();
-        if (context.checkIfInDontApplySet(this, op1)) {
-            return false;
-        }
-
-        boolean isInnerJoin = isInnerJoin(op1);
-        isLeftOuterJoin = isLeftOuterJoin(op1);
-
-        if (!isInnerJoin && !isLeftOuterJoin) {
-            return false;
-        }
-
-        // Set and analyze select.
-        if (isInnerJoin) {
-            joinRef = opRef;
-            join = (InnerJoinOperator) op1;
-        } else {
-            joinRef = op1.getInputs().get(0);
-            join = (LeftOuterJoinOperator) joinRef.getValue();
-        }
-
-        typeEnvironment = context.getOutputTypeEnvironment(join);
-        // Check that the select's condition is a function call.
-        ILogicalExpression condExpr = join.getCondition().getValue();
-        if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) 
{
-            return false;
-        }
-        joinCond = (AbstractFunctionCallExpression) condExpr;
-        boolean leftSubTreeInitialized = 
leftSubTree.initFromSubTree(join.getInputs().get(0));
-        boolean rightSubTreeInitialized = 
rightSubTree.initFromSubTree(join.getInputs().get(1));
-        if (!leftSubTreeInitialized || !rightSubTreeInitialized) {
-            return false;
-        }
-
-        // One of the subtrees must have a datasource scan.
-        if (leftSubTree.hasDataSourceScan() || 
rightSubTree.hasDataSourceScan()) {
-            return true;
-        }
-        return false;
-    }
-
+    /**
+     * Checks whether the given operator is LEFTOUTERJOIN.
+     * If so, also checks that GROUPBY is placed after LEFTOUTERJOIN.
+     */
+    // Check whether (Groupby)? <-- Leftouterjoin
     private boolean isLeftOuterJoin(AbstractLogicalOperator op1) {
         if (op1.getInputs().size() != 1) {
             return false;
@@ -277,6 +210,9 @@
         return true;
     }
 
+    /**
+     * Checks whether the given operator is INNERJOIN.
+     */
     private boolean isInnerJoin(AbstractLogicalOperator op1) {
         return op1.getOperatorTag() == LogicalOperatorTag.INNERJOIN;
     }
@@ -288,8 +224,212 @@
 
     private void clear() {
         joinRef = null;
-        join = null;
+        joinOp = null;
         joinCond = null;
         isLeftOuterJoin = false;
+        context = null;
     }
+
+    /**
+     * Recursively traverse the given plan and check whether a INNERJOIN or 
LEFTOUTERJOIN operator exists.
+     * If one is found, maintain the path from the root to the given join 
operator and
+     * optimize the path from the given join operator to the 
EMPTY_TUPLE_SOURCE operator
+     * if it is not already optimized.
+     */
+    protected boolean checkAndApplyTheRule(Mutable<ILogicalOperator> opRef) 
throws AlgebricksException {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) 
opRef.getValue();
+        boolean joinFoundAndOptimizationApplied = false;
+
+        // Check the current operator pattern to see whether it is a JOIN or 
not.
+        boolean isThisOpInnerJoin = isInnerJoin(op);
+        isLeftOuterJoin = isLeftOuterJoin(op);
+
+        boolean isThisOpLeftOuterJoin = isLeftOuterJoin;
+        boolean isParentOpGroupBy = hasGroupBy;
+
+        Mutable<ILogicalOperator> joinRefFromThisOp = null;
+        AbstractBinaryJoinOperator joinOpFromThisOp = null;
+
+        if (isThisOpInnerJoin) {
+            // Set join operator.
+            joinRef = opRef;
+            joinOp = (InnerJoinOperator) op;
+            joinRefFromThisOp = opRef;
+            joinOpFromThisOp = (InnerJoinOperator) op;
+        } else if (isThisOpLeftOuterJoin) {
+            // Set left-outer-join op.
+            // The current operator is GROUP and the child of this op is 
LEFTOUERJOIN.
+            joinRef = op.getInputs().get(0);
+            joinOp = (LeftOuterJoinOperator) joinRef.getValue();
+            joinRefFromThisOp = op.getInputs().get(0);
+            joinOpFromThisOp = (LeftOuterJoinOperator) 
joinRefFromThisOp.getValue();
+        }
+
+        // Recursively check the plan and try to optimize it. We first check 
the children of the given operator
+        // to make sure an earlier join in the path is optimized first.
+        for (int i = 0; i < op.getInputs().size(); i++) {
+            joinFoundAndOptimizationApplied = 
checkAndApplyTheRule(op.getInputs().get(i));
+            if (joinFoundAndOptimizationApplied) {
+                return true;
+            }
+        }
+
+        // For a JOIN case, try to transform the given plan.
+        if (isThisOpInnerJoin || isThisOpLeftOuterJoin) {
+            // Restore the information from this operator since it might have 
been be set to null
+            // if there are other join operators in the earlier path.
+            joinRef = joinRefFromThisOp;
+            joinOp = joinOpFromThisOp;
+            isLeftOuterJoin = isThisOpLeftOuterJoin;
+            hasGroupBy = isParentOpGroupBy;
+
+            // Already checked? If not, this operator may be optimized.
+            if (!context.checkIfInDontApplySet(this, joinOp)) {
+                // For each access method, contains the information about
+                // whether an available index can be applicable or not.
+                Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = 
new HashMap<>();
+
+                // Check the condition of JOIN operator is a function call 
since
+                // only function call can be transformed using available 
indexes.
+                // If so, initialize the subtree information that will be used 
later to decide whether
+                // the given plan is truly optimizable or not.
+                if (checkJoinOpConditionAndInitSubTree()) {
+
+                    // Analyze the condition of SELECT operator and initialize 
analyzedAMs.
+                    // Check whether the function in the SELECT operator can 
be truly transformed.
+                    boolean matchInLeftSubTree = false;
+                    boolean matchInRightSubTree = false;
+                    if (leftSubTree.hasDataSource()) {
+                        matchInLeftSubTree = 
analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond,
+                                leftSubTree.getAssignsAndUnnestsOps(), 
analyzedAMs, context, typeEnvironment);
+                    }
+                    if (rightSubTree.hasDataSource()) {
+                        matchInRightSubTree = 
analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond,
+                                rightSubTree.getAssignsAndUnnestsOps(), 
analyzedAMs, context, typeEnvironment);
+                    }
+
+                    // Find the dataset from the data-source and the record 
type of the dataset from the metadata.
+                    // This will be used to find an applicable index on the 
dataset.
+                    if (matchInLeftSubTree || matchInRightSubTree) {
+                        // Set dataset and type metadata.
+                        AqlMetadataProvider metadataProvider = 
(AqlMetadataProvider) context.getMetadataProvider();
+                        boolean checkLeftSubTreeMetadata = false;
+                        boolean checkRightSubTreeMetadata = false;
+                        if (matchInLeftSubTree) {
+                            checkLeftSubTreeMetadata = 
leftSubTree.setDatasetAndTypeMetadata(metadataProvider);
+                        }
+                        if (matchInRightSubTree) {
+                            checkRightSubTreeMetadata = 
rightSubTree.setDatasetAndTypeMetadata(metadataProvider);
+                        }
+
+                        if (checkLeftSubTreeMetadata || 
checkRightSubTreeMetadata) {
+                            // Map variables to the applicable indexes and 
find the field name and type.
+                            // Then find the applicable indexes for the 
variables used in the JOIN condition.
+                            if (checkLeftSubTreeMetadata) {
+                                fillSubTreeIndexExprs(leftSubTree, 
analyzedAMs, context);
+                            }
+                            if (checkRightSubTreeMetadata) {
+                                fillSubTreeIndexExprs(rightSubTree, 
analyzedAMs, context);
+                            }
+
+                            // Prune the access methods based on the function 
expression and access methods.
+                            pruneIndexCandidates(analyzedAMs, context, 
typeEnvironment);
+
+                            // If the right subtree (inner branch) has 
indexes, one of those indexes will be used.
+                            // Remove the indexes from the outer branch
+                            // in the optimizer's consideration list for this 
rule.
+                            pruneIndexCandidatesFromOuterBranch(analyzedAMs);
+
+                            // We are going to use indexes from the inner 
branch.
+                            // If no index is available, then we stop here.
+                            Pair<IAccessMethod, Index> chosenIndex = 
chooseBestIndex(analyzedAMs);
+                            if (chosenIndex != null) {
+                                // Apply plan transformation using chosen 
index.
+                                AccessMethodAnalysisContext analysisCtx = 
analyzedAMs.get(chosenIndex.first);
+
+                                // For LOJ with GroupBy, prepare objects to 
reset LOJ nullPlaceHolderVariable
+                                // in GroupByOp.
+                                if (isThisOpLeftOuterJoin && 
isParentOpGroupBy) {
+                                    analysisCtx.setLOJGroupbyOpRef(opRef);
+                                    ScalarFunctionCallExpression 
isNullFuncExpr = AccessMethodUtils
+                                            
.findLOJIsMissingFuncInGroupBy((GroupByOperator) opRef.getValue());
+                                    
analysisCtx.setLOJIsNullFuncInGroupBy(isNullFuncExpr);
+                                }
+
+                                Dataset indexDataset = 
analysisCtx.getIndexDatasetMap().get(chosenIndex.second);
+
+                                OptimizableOperatorSubTree indexSubTree = null;
+
+                                // We assume that the left subtree is the 
outer branch and the right subtree
+                                // is the inner branch. This assumption holds 
true since we only use an index
+                                // from the right subtree. The following is 
just a sanity check.
+                                if (rightSubTree.hasDataSourceScan() && 
indexDataset.getDatasetName()
+                                        
.equals(rightSubTree.getDataset().getDatasetName())) {
+                                    indexSubTree = rightSubTree;
+                                } else {
+                                    return false;
+                                }
+
+                                if (indexSubTree == null) {
+                                    return false;
+                                }
+
+                                // Finally, try to apply plan transformation 
using chosen index.
+                                boolean res = 
chosenIndex.first.applyJoinPlanTransformation(joinRef, leftSubTree,
+                                        rightSubTree, chosenIndex.second, 
analysisCtx, context, isLeftOuterJoin,
+                                        hasGroupBy);
+
+                                // If the plan transformation is successful, 
we don't need to traverse the plan
+                                // any more, since if there are more JOIN 
operators, the next trigger on this plan
+                                // will find them.
+                                if (res) {
+                                    return res;
+                                }
+                            } else {
+                                context.addToDontApplySet(this, joinOp);
+                            }
+
+                        }
+
+                    }
+                }
+
+            }
+            joinRef = null;
+            joinOp = null;
+        }
+
+        return false;
+    }
+
+    /**
+     * After the pattern is matched, check the condition and initialize the 
data sources from the both sub trees.
+     *
+     * @throws AlgebricksException
+     */
+    protected boolean checkJoinOpConditionAndInitSubTree() throws 
AlgebricksException {
+
+        typeEnvironment = context.getOutputTypeEnvironment(joinOp);
+
+        // Check that the join's condition is a function call.
+        ILogicalExpression condExpr = joinOp.getCondition().getValue();
+        if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) 
{
+            return false;
+        }
+        joinCond = (AbstractFunctionCallExpression) condExpr;
+
+        boolean leftSubTreeInitialized = 
leftSubTree.initFromSubTree(joinOp.getInputs().get(0), context);
+        boolean rightSubTreeInitialized = 
rightSubTree.initFromSubTree(joinOp.getInputs().get(1), context);
+
+        if (!leftSubTreeInitialized || !rightSubTreeInitialized) {
+            return false;
+        }
+
+        // One of the subtrees must have a datasource scan.
+        if (leftSubTree.hasDataSourceScan() || 
rightSubTree.hasDataSourceScan()) {
+            return true;
+        }
+        return false;
+    }
+
 }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
index 2464c06..5fc02db 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
@@ -20,11 +20,12 @@
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.TreeMap;
 
+import org.apache.asterix.algebra.operators.CommitOperator;
 import org.apache.asterix.metadata.declared.AqlMetadataProvider;
 import org.apache.asterix.metadata.entities.Index;
 import org.apache.commons.lang3.mutable.Mutable;
@@ -42,6 +43,7 @@
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.DelegateOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.IntersectOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
@@ -79,13 +81,14 @@
     // Operators representing the patterns to be matched:
     // These ops are set in matchesPattern()
     protected Mutable<ILogicalOperator> selectRef = null;
-    protected SelectOperator select = null;
+    protected SelectOperator selectOp = null;
     protected AbstractFunctionCallExpression selectCond = null;
     protected IVariableTypeEnvironment typeEnvironment = null;
     protected final OptimizableOperatorSubTree subTree = new 
OptimizableOperatorSubTree();
+    protected IOptimizationContext context = null;
 
     // Register access methods.
-    protected static Map<FunctionIdentifier, List<IAccessMethod>> 
accessMethods = new HashMap<FunctionIdentifier, List<IAccessMethod>>();
+    protected static Map<FunctionIdentifier, List<IAccessMethod>> 
accessMethods = new HashMap<>();
 
     static {
         registerAccessMethod(BTreeAccessMethod.INSTANCE, accessMethods);
@@ -93,51 +96,85 @@
         registerAccessMethod(InvertedIndexAccessMethod.INSTANCE, 
accessMethods);
     }
 
+    /**
+     * Recursively check the given plan from the root operator to transform a 
plan
+     * with SELECT operator into an index-utilized plan.
+     */
     @Override
     public boolean rewritePost(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
             throws AlgebricksException {
         clear();
         setMetadataDeclarations(context);
+        this.context = context;
 
-        // Match operator pattern and initialize operator members.
-        if (!matchesOperatorPattern(opRef, context)) {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) 
opRef.getValue();
+
+        // Already checked?
+        if (context.checkIfInDontApplySet(this, op)) {
             return false;
         }
 
-        // Analyze select condition.
-        Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = new 
TreeMap<IAccessMethod, AccessMethodAnalysisContext>();
-        if (!analyzeCondition(selectCond, subTree.getAssignsAndUnnests(), 
analyzedAMs, context, typeEnvironment)) {
+        // Check whether this operator is the root, which is DISTRIBUTE_RESULT 
or SINK since
+        // we start the process from the root operator.
+        if (op.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT
+                && op.getOperatorTag() != LogicalOperatorTag.SINK
+                && op.getOperatorTag() != 
LogicalOperatorTag.DELEGATE_OPERATOR) {
             return false;
         }
 
-        // Set dataset and type metadata.
-        if (!subTree.setDatasetAndTypeMetadata((AqlMetadataProvider) 
context.getMetadataProvider())) {
+        if (op.getOperatorTag() == LogicalOperatorTag.DELEGATE_OPERATOR
+                && !(((DelegateOperator) op).getDelegate() instanceof 
CommitOperator)) {
             return false;
         }
 
-        fillSubTreeIndexExprs(subTree, analyzedAMs, context);
-        pruneIndexCandidates(analyzedAMs, context, typeEnvironment);
+        // Recursively check the given plan whether the desired pattern exists 
in it.
+        // If so, try to optimize the plan.
+        boolean planTransformed = 
checkAndApplyTheSelectTransformationRule(opRef);
 
-        // Choose index to be applied.
-        List<Pair<IAccessMethod, Index>> chosenIndexes = 
chooseAllIndex(analyzedAMs);
-        if (chosenIndexes == null || chosenIndexes.size() == 0) {
-            context.addToDontApplySet(this, select);
-            return false;
+        if (selectOp != null) {
+            // We found an optimization here. Don't need to optimize this 
operator again.
+            context.addToDontApplySet(this, selectOp);
         }
 
-        // Apply plan transformation using chosen index.
-        boolean res = intersectAllSecondaryIndexes(chosenIndexes, analyzedAMs, 
context);
-
-        if (res) {
+        if (!planTransformed) {
+            return false;
+        } else {
             OperatorPropertiesUtil.typeOpRec(opRef, context);
         }
-        context.addToDontApplySet(this, select);
-        return res;
+
+        return planTransformed;
     }
 
+    /**
+     * Check that the given SELECT condition is a function call.
+     * Call initSubTree() to initialize the optimizable subtree that collects 
information from
+     * the operators below the given SELECT operator.
+     * In order to transform the given plan, a datasource should be configured
+     * since we are going to transform a datasource into an unnest-map 
operator.
+     */
+    protected boolean checkSelectOpConditionAndInitSubTree() throws 
AlgebricksException {
+        // Set and analyze select.
+        ILogicalExpression condExpr = selectOp.getCondition().getValue();
+        typeEnvironment = context.getOutputTypeEnvironment(selectOp);
+        if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) 
{
+            return false;
+        }
+        selectCond = (AbstractFunctionCallExpression) condExpr;
+
+        // Initialize the subtree information.
+        // Match and put assign, unnest, and datasource information.
+        boolean res = subTree.initFromSubTree(selectOp.getInputs().get(0), 
context);
+        return res && subTree.hasDataSourceScan();
+    }
+
+    /**
+     * Construct all applicable secondary index-based access paths in the 
given selection plan and
+     * intersect them using INTERSECT operator to guide to the common primary 
index search.
+     * In case where the applicable index is one, we only construct one path.
+     */
     private boolean intersectAllSecondaryIndexes(List<Pair<IAccessMethod, 
Index>> chosenIndexes,
             Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs, 
IOptimizationContext context)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         Pair<IAccessMethod, Index> chosenIndex = null;
         Optional<Pair<IAccessMethod, Index>> primaryIndex = 
chosenIndexes.stream()
                 .filter(pair -> pair.second.isPrimaryIndex()).findFirst();
@@ -153,8 +190,8 @@
                     context);
         }
 
-        // Intersect all secondary indexes, and postpone the primary index 
search.
-        Mutable<ILogicalExpression> conditionRef = select.getCondition();
+        // Intersect all secondary indexes and postpone the primary index 
search.
+        Mutable<ILogicalExpression> conditionRef = selectOp.getCondition();
 
         List<ILogicalOperator> subRoots = new ArrayList<>();
         for (Pair<IAccessMethod, Index> pair : chosenIndexes) {
@@ -162,22 +199,26 @@
             subRoots.add(pair.first.createSecondaryToPrimaryPlan(conditionRef, 
subTree, null, pair.second, analysisCtx,
                     false, false, false, context));
         }
-        ILogicalOperator primaryUnnest = 
connectAll2ndarySearchPlanWithIntersect(subRoots, context);
+        // Connect each secondary index utilization plan to a common intersect 
operator.
+        ILogicalOperator primaryUnnestOp = 
connectAll2ndarySearchPlanWithIntersect(subRoots, context);
 
-        subTree.getDataSourceRef().setValue(primaryUnnest);
-        return primaryUnnest != null;
+        subTree.getDataSourceRef().setValue(primaryUnnestOp);
+        return primaryUnnestOp != null;
     }
 
+    /**
+     * Connect each secondary index utilization plan to a common INTERSECT 
operator.
+     */
     private ILogicalOperator 
connectAll2ndarySearchPlanWithIntersect(List<ILogicalOperator> subRoots,
             IOptimizationContext context) throws AlgebricksException {
         ILogicalOperator lop = subRoots.get(0);
         List<List<LogicalVariable>> inputVars = new 
ArrayList<>(subRoots.size());
         for (int i = 0; i < subRoots.size(); i++) {
             if (lop.getOperatorTag() != subRoots.get(i).getOperatorTag()) {
-                throw new AlgebricksException("The data source root should 
have the same operator type");
+                throw new AlgebricksException("The data source root should 
have the same operator type.");
             }
             if (lop.getInputs().size() != 1) {
-                throw new AlgebricksException("The primary search has multiple 
input");
+                throw new AlgebricksException("The primary search has multiple 
inputs.");
             }
 
             ILogicalOperator curRoot = subRoots.get(i);
@@ -186,7 +227,8 @@
             for (Pair<OrderOperator.IOrder, Mutable<ILogicalExpression>> 
orderExpression : order
                     .getOrderExpressions()) {
                 if (orderExpression.second.getValue().getExpressionTag() != 
LogicalExpressionTag.VARIABLE) {
-                    throw new AlgebricksException("It should not happen, the 
order by expression is not variables");
+                    throw new AlgebricksException(
+                            "The order by expression should be variables, but 
they aren't variables.");
                 }
                 VariableReferenceExpression orderedVar = 
(VariableReferenceExpression) orderExpression.second
                         .getValue();
@@ -205,29 +247,102 @@
         return lop;
     }
 
-    protected boolean matchesOperatorPattern(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context)
+    /**
+     * Recursively traverse the given plan and check whether a SELECT operator 
exists.
+     * If one is found, maintain the path from the root to SELECT operator and
+     * optimize the path from the SELECT operator to the EMPTY_TUPLE_SOURCE 
operator
+     * if it is not already optimized.
+     */
+    protected boolean 
checkAndApplyTheSelectTransformationRule(Mutable<ILogicalOperator> opRef)
             throws AlgebricksException {
-        // First check that the operator is a select and its condition is a 
function call.
-        AbstractLogicalOperator op1 = (AbstractLogicalOperator) 
opRef.getValue();
-        if (context.checkIfInDontApplySet(this, op1)) {
-            return false;
-        }
-        if (op1.getOperatorTag() != LogicalOperatorTag.SELECT) {
-            return false;
-        }
-        // Set and analyze select.
-        selectRef = opRef;
-        select = (SelectOperator) op1;
+        AbstractLogicalOperator op = (AbstractLogicalOperator) 
opRef.getValue();
+        boolean selectFoundAndOptimizationApplied = false;
 
-        typeEnvironment = context.getOutputTypeEnvironment(op1);
-        // Check that the select's condition is a function call.
-        ILogicalExpression condExpr = select.getCondition().getValue();
-        if (condExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) 
{
-            return false;
+        // Traverse the plan until we find a SELECT operator.
+        if (op.getOperatorTag() == LogicalOperatorTag.SELECT) {
+            selectRef = opRef;
+            selectOp = (SelectOperator) op;
+
+            // Already checked this SELECT operator? If not, this operator may 
be optimized.
+            if (!context.checkIfInDontApplySet(this, selectOp)) {
+                // For each access method, contains the information about
+                // whether an available index can be applicable or not.
+                Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = 
new LinkedHashMap<>();
+
+                // Check the condition of SELECT operator is a function call 
since
+                // only function call can be transformed using available 
indexes.
+                // If so, initialize the subtree information that will be used 
later to decide whether
+                // the given plan is truly optimizable or not.
+                if (checkSelectOpConditionAndInitSubTree()) {
+
+                    // We want to transform the SELECT operator in the 
bottom-up way.
+                    // Thus, if the subtree has SELECT operator, we let the 
rule deal with that operator first.
+                    if (!subTree.getSelectRefs().isEmpty()) {
+
+                        // Analyze the condition of SELECT operator and 
initialize analyzedAMs.
+                        // Check whether the function in the SELECT operator 
can be truly transformed.
+                        if 
(analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(selectCond,
+                                subTree.getAssignsAndUnnestsOps(), 
analyzedAMs, context, typeEnvironment)) {
+
+                            // Find the dataset from the data-source and
+                            // the record type of the dataset from the 
metadata.
+                            // This will be used to find an applicable index 
on the dataset.
+                            if (subTree
+                                    
.setDatasetAndTypeMetadata((AqlMetadataProvider) 
context.getMetadataProvider())) {
+
+                                // Map variables to the applicable indexes and 
find the field name and type.
+                                // Then find the applicable indexes for the 
variables used in the SELECT condition.
+                                fillSubTreeIndexExprs(subTree, analyzedAMs, 
context);
+
+                                // Prune the access methods based on the 
function expression and access methods.
+                                pruneIndexCandidates(analyzedAMs, context, 
typeEnvironment);
+
+                                // Choose all indexes that will be applied.
+                                List<Pair<IAccessMethod, Index>> chosenIndexes 
= chooseAllIndexes(analyzedAMs);
+
+                                if (chosenIndexes == null || 
chosenIndexes.isEmpty()) {
+                                    // We can't apply any index for this 
SELECT operator
+                                    context.addToDontApplySet(this, 
selectRef.getValue());
+                                } else {
+                                    boolean res = false;
+
+                                    // If many secondary indexes are chosen, 
then apply the intersection method.
+                                    if (chosenIndexes.size() > 1) {
+                                        res = 
intersectAllSecondaryIndexes(chosenIndexes, analyzedAMs, context);
+                                    } else {
+                                        AccessMethodAnalysisContext 
analysisCtx = analyzedAMs
+                                                
.get(chosenIndexes.get(0).first);
+
+                                        // Finally, try to apply plan 
transformation using chosen index.
+                                        res = 
chosenIndexes.get(0).first.applySelectPlanTransformation(selectRef,
+                                                subTree, 
chosenIndexes.get(0).second, analysisCtx, context);
+                                    }
+
+                                    // If the plan transformation is 
successful, we don't need to traverse
+                                    // the plan any more, since if there are 
more SELECT operators, the next
+                                    // trigger on this plan will find them.
+                                    if (res) {
+                                        return res;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            selectRef = null;
+            selectOp = null;
         }
-        selectCond = (AbstractFunctionCallExpression) condExpr;
-        boolean res = subTree.initFromSubTree(op1.getInputs().get(0));
-        return res && subTree.hasDataSourceScan();
+
+        // Recursively check the plan and try to optimize it.
+        for (int i = 0; i < op.getInputs().size(); i++) {
+            selectFoundAndOptimizationApplied = 
checkAndApplyTheSelectTransformationRule(op.getInputs().get(i));
+            if (selectFoundAndOptimizationApplied) {
+                return true;
+            }
+        }
+
+        return false;
     }
 
     @Override
@@ -237,7 +352,9 @@
 
     private void clear() {
         selectRef = null;
-        select = null;
+        selectOp = null;
         selectCond = null;
+        context = null;
+        subTree.reset();
     }
 }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
index 8f42904..9c034a2 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
@@ -452,7 +452,7 @@
             AccessMethodAnalysisContext analysisCtx, IOptimizationContext 
context, boolean isLeftOuterJoin,
             boolean hasGroupBy) throws AlgebricksException {
         // Figure out if the index is applicable on the left or right side (if 
both, we arbitrarily prefer the left side).
-        Dataset dataset = analysisCtx.indexDatasetMap.get(chosenIndex);
+        Dataset dataset = analysisCtx.getIndexDatasetMap().get(chosenIndex);
         OptimizableOperatorSubTree indexSubTree;
         OptimizableOperatorSubTree probeSubTree;
 
@@ -581,7 +581,7 @@
             ILogicalExpression joinCond, IOptimizableFuncExpr optFuncExpr, 
List<LogicalVariable> originalSubTreePKs,
             List<LogicalVariable> surrogateSubTreePKs, IOptimizationContext 
context) throws AlgebricksException {
 
-        probeSubTree.getPrimaryKeyVars(originalSubTreePKs);
+        probeSubTree.getPrimaryKeyVars(null, originalSubTreePKs);
 
         // Create two copies of the original probe subtree.
         // The first copy, which becomes the new probe subtree, will retain 
the primary-key and secondary-search key variables,
@@ -630,7 +630,7 @@
         // Replace the original probe subtree with its copy.
         Dataset origDataset = probeSubTree.getDataset();
         ARecordType origRecordType = probeSubTree.getRecordType();
-        probeSubTree.initFromSubTree(newProbeSubTreeRootRef);
+        probeSubTree.initFromSubTree(newProbeSubTreeRootRef, context);
         probeSubTree.setDataset(origDataset);
         probeSubTree.setRecordType(origRecordType);
 
@@ -1022,7 +1022,7 @@
             return null;
         }
 
-        for (AbstractLogicalOperator op : subTree.getAssignsAndUnnests()) {
+        for (AbstractLogicalOperator op : subTree.getAssignsAndUnnestsOps()) {
             if (op.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
                 continue;
             }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
index 01476e7..9998092 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
@@ -36,6 +36,7 @@
 import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
@@ -43,6 +44,7 @@
 import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractScanOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestMapOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractUnnestOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -65,7 +67,7 @@
     private ILogicalOperator root = null;
     private Mutable<ILogicalOperator> rootRef = null;
     private final List<Mutable<ILogicalOperator>> assignsAndUnnestsRefs = new 
ArrayList<>();
-    private final List<AbstractLogicalOperator> assignsAndUnnests = new 
ArrayList<>();
+    private final List<AbstractLogicalOperator> assignsAndUnnestsOps = new 
ArrayList<>();
     private Mutable<ILogicalOperator> dataSourceRef = null;
     private DataSourceType dataSourceType = DataSourceType.NO_DATASOURCE;
 
@@ -81,19 +83,24 @@
     private List<Dataset> ixJoinOuterAdditionalDatasets = null;
     private List<ARecordType> ixJoinOuterAdditionalRecordTypes = null;
 
-    public boolean initFromSubTree(Mutable<ILogicalOperator> subTreeOpRef) 
throws AlgebricksException {
+    // SELECT operators
+    private List<Mutable<ILogicalOperator>> selectRefs = new ArrayList<>();
+
+    public boolean initFromSubTree(Mutable<ILogicalOperator> subTreeOpRef, 
IOptimizationContext context)
+            throws AlgebricksException {
         reset();
         rootRef = subTreeOpRef;
         root = subTreeOpRef.getValue();
         // Examine the op's children to match the expected patterns.
         AbstractLogicalOperator subTreeOp = (AbstractLogicalOperator) 
subTreeOpRef.getValue();
         do {
-            // Skip select operator.
+            // Keep SELECT information if one is present.
             if (subTreeOp.getOperatorTag() == LogicalOperatorTag.SELECT) {
+                getSelectRefs().add(subTreeOpRef);
                 subTreeOpRef = subTreeOp.getInputs().get(0);
                 subTreeOp = (AbstractLogicalOperator) subTreeOpRef.getValue();
             }
-            // Check primary-index pattern.
+            // Match datasource information if there are no (assign | unnest)*
             if (subTreeOp.getOperatorTag() != LogicalOperatorTag.ASSIGN
                     && subTreeOp.getOperatorTag() != 
LogicalOperatorTag.UNNEST) {
                 // Pattern may still match if we are looking for primary index 
matches as well.
@@ -106,7 +113,7 @@
                     return false;
                 } else {
                     getAssignsAndUnnestsRefs().add(subTreeOpRef);
-                    getAssignsAndUnnests().add(subTreeOp);
+                    getAssignsAndUnnestsOps().add(subTreeOp);
                 }
                 subTreeOpRef = subTreeOp.getInputs().get(0);
                 subTreeOp = (AbstractLogicalOperator) subTreeOpRef.getValue();
@@ -342,7 +349,7 @@
         setRoot(null);
         setRootRef(null);
         getAssignsAndUnnestsRefs().clear();
-        getAssignsAndUnnests().clear();
+        getAssignsAndUnnestsOps().clear();
         setDataSourceRef(null);
         setDataSourceType(DataSourceType.NO_DATASOURCE);
         setIxJoinOuterAdditionalDataSourceRefs(null);
@@ -351,29 +358,37 @@
         setIxJoinOuterAdditionalDatasets(null);
         setRecordType(null);
         setIxJoinOuterAdditionalRecordTypes(null);
+        clearSelectRefs();
     }
 
-    public void getPrimaryKeyVars(List<LogicalVariable> target) throws 
AlgebricksException {
-        switch (getDataSourceType()) {
+    /**
+     * Get primary key variables from the given data-source.
+     */
+    public void getPrimaryKeyVars(Mutable<ILogicalOperator> 
dataSourceRefToFetch, List<LogicalVariable> target)
+            throws AlgebricksException {
+        Mutable<ILogicalOperator> dataSourceRefToFetchKey = 
(dataSourceRefToFetch == null) ? dataSourceRef
+                : dataSourceRefToFetch;
+        switch (dataSourceType) {
             case DATASOURCE_SCAN:
-                DataSourceScanOperator dataSourceScan = 
(DataSourceScanOperator) getDataSourceRef().getValue();
-                int numPrimaryKeys = 
DatasetUtils.getPartitioningKeys(getDataset()).size();
+                DataSourceScanOperator dataSourceScan = 
(DataSourceScanOperator) dataSourceRefToFetchKey.getValue();
+                int numPrimaryKeys = 
DatasetUtils.getPartitioningKeys(dataset).size();
                 for (int i = 0; i < numPrimaryKeys; i++) {
                     target.add(dataSourceScan.getVariables().get(i));
                 }
                 break;
             case PRIMARY_INDEX_LOOKUP:
-                UnnestMapOperator unnestMapOp = (UnnestMapOperator) 
getDataSourceRef().getValue();
+                AbstractUnnestMapOperator unnestMapOp = 
(AbstractUnnestMapOperator) dataSourceRefToFetchKey.getValue();
                 List<LogicalVariable> primaryKeys = null;
-                primaryKeys = 
AccessMethodUtils.getPrimaryKeyVarsFromPrimaryUnnestMap(getDataset(), 
unnestMapOp);
+                primaryKeys = 
AccessMethodUtils.getPrimaryKeyVarsFromPrimaryUnnestMap(dataset, unnestMapOp);
                 target.addAll(primaryKeys);
+                break;
+            case EXTERNAL_SCAN:
                 break;
             case NO_DATASOURCE:
             default:
                 throw new AlgebricksException("The subtree does not have any 
data source.");
         }
     }
-
     public List<LogicalVariable> getDataSourceVariables() throws 
AlgebricksException {
         switch (getDataSourceType()) {
             case DATASOURCE_SCAN:
@@ -438,8 +453,8 @@
         return assignsAndUnnestsRefs;
     }
 
-    public List<AbstractLogicalOperator> getAssignsAndUnnests() {
-        return assignsAndUnnests;
+    public List<AbstractLogicalOperator> getAssignsAndUnnestsOps() {
+        return assignsAndUnnestsOps;
     }
 
     public Mutable<ILogicalOperator> getDataSourceRef() {
@@ -515,4 +530,12 @@
         this.ixJoinOuterAdditionalRecordTypes = 
ixJoinOuterAdditionalRecordTypes;
     }
 
+    public void clearSelectRefs() {
+        selectRefs.clear();
+    }
+
+    public List<Mutable<ILogicalOperator>> getSelectRefs() {
+        return selectRefs;
+    }
+
 }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
index c3c162e..bc884d7 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/RTreeAccessMethod.java
@@ -125,7 +125,7 @@
             boolean hasGroupBy) throws AlgebricksException {
         // Determine if the index is applicable on the left or right side (if 
both, we arbitrarily prefer the left
         // side).
-        Dataset dataset = analysisCtx.indexDatasetMap.get(chosenIndex);
+        Dataset dataset = analysisCtx.getIndexDatasetMap().get(chosenIndex);
         OptimizableOperatorSubTree indexSubTree;
         OptimizableOperatorSubTree probeSubTree;
 

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/1350
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I60a2a61eb46851d4c16c8f17447e3ac9b0aca779
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Taewoo Kim <[email protected]>

Change in asterixdb[master]: Index-only plan step 3: Top-down Select and Join transformat...

Reply via email to