>From Preetham Poluparthi <[email protected]>:
Preetham Poluparthi has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20475?usp=email )
Change subject: WIP: Costing changes for index-only plans
......................................................................
WIP: Costing changes for index-only plans
Change-Id: Ifc835f8c4f8386e7b290c68891b4274f1ae87cd5
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/PlanNode.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/indexadvisor/AdvisorConditionParser.java
4 files changed, 123 insertions(+), 50 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/75/20475/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 30b513b..05cb898 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -1190,7 +1190,7 @@
return PlanNode.NO_PLAN;
}
// We may not add any index plans, so need to check for NO_PLAN
- jn.addIndexAccessPlans(EnumerateJoinsRule.removeTrue(leafInput),
indexProvider);
+ jn.addIndexAccessPlans(EnumerateJoinsRule.removeTrue(leafInput),
indexProvider, optCtx);
}
return this.numberOfTerms;
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
index 2abad06..c942ea7 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
@@ -31,7 +31,9 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.Set;
import java.util.TreeMap;
+import java.util.stream.Collectors;
import org.apache.asterix.common.annotations.IndexedNLJoinExpressionAnnotation;
import
org.apache.asterix.common.annotations.SkipSecondaryIndexSearchExpressionAnnotation;
@@ -49,14 +51,16 @@
import org.apache.asterix.optimizer.rules.am.IOptimizableFuncExpr;
import org.apache.asterix.optimizer.rules.am.IntroduceJoinAccessMethodRule;
import org.apache.asterix.optimizer.rules.am.IntroduceSelectAccessMethodRule;
+import
org.apache.asterix.optimizer.rules.cbo.indexadvisor.AdvisorConditionParser;
+import org.apache.asterix.optimizer.rules.cbo.indexadvisor.ScanFilterCondition;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.common.utils.Quadruple;
-import org.apache.hyracks.algebricks.common.utils.Triple;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
@@ -110,7 +114,7 @@
private List<Integer> applicableJoinConditions;
protected ILogicalOperator leafInput;
protected Index.SampleIndexDetails idxDetails;
- private List<Triple<Index, Double, AbstractFunctionCallExpression>>
IndexCostInfo;
+ private List<IndexCostInfo> indexCostInfoList;
// The triple above is : Index, selectivity, and the index expression
protected static int NO_JN = -1;
private static int NO_CARDS = -2;
@@ -657,10 +661,46 @@
return andExpr;
}
+ static class IndexCostInfo {
+ private final Index index;
+ private double selectivity;
+ private final AbstractFunctionCallExpression afce;
+ private final boolean isIndexOnly;
+
+ public IndexCostInfo(Index index, double selectivity,
AbstractFunctionCallExpression afce,
+ boolean isIndexOnly) {
+ this.index = index;
+ this.selectivity = selectivity;
+ this.afce = afce;
+ this.isIndexOnly = isIndexOnly;
+ }
+
+ public AbstractFunctionCallExpression getAfce() {
+ return afce;
+ }
+
+ public Index getIndex() {
+ return index;
+ }
+
+ public double getSelectivity() {
+ return selectivity;
+ }
+
+ public void setSelectivity(double selectivity) {
+ this.selectivity = selectivity;
+ }
+
+ public boolean isIndexOnly() {
+ return isIndexOnly;
+ }
+ }
+
private void setSkipIndexAnnotationsForUnusedIndexes() {
- for (int i = 0; i < IndexCostInfo.size(); i++) {
- if (IndexCostInfo.get(i).second == -1.0) {
- AbstractFunctionCallExpression afce =
IndexCostInfo.get(i).third;
+
+ for (IndexCostInfo indexCostInfo : indexCostInfoList) {
+ if (indexCostInfo.getSelectivity() == -1.0) {
+ AbstractFunctionCallExpression afce = indexCostInfo.getAfce();
SkipSecondaryIndexSearchExpressionAnnotation skipAnno =
joinEnum.findSkipIndexHint(afce);
Collection<String> indexNames = new HashSet<>();
if (skipAnno != null && skipAnno.getIndexNames() != null) {
@@ -669,9 +709,9 @@
if (indexNames.isEmpty()) {
// this index has to be skipped, so find the corresponding
expression
EnumerateJoinsRule.setAnnotation(afce,
SkipSecondaryIndexSearchExpressionAnnotation
-
.newInstance(Collections.singleton(IndexCostInfo.get(i).first.getIndexName())));
+
.newInstance(Collections.singleton(indexCostInfo.getIndex().getIndexName())));
} else {
- indexNames.add(IndexCostInfo.get(i).first.getIndexName());
+ indexNames.add(indexCostInfo.getIndex().getIndexName());
EnumerateJoinsRule.setAnnotation(afce,
SkipSecondaryIndexSearchExpressionAnnotation.newInstance(indexNames));
}
@@ -680,11 +720,12 @@
}
private void costAndChooseIndexPlans(ILogicalOperator leafInput,
- Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs)
throws AlgebricksException {
+ Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs,
IOptimizationContext optctx)
+ throws AlgebricksException {
SelectOperator selOp;
double sel;
- List<Triple<Index, Double, AbstractFunctionCallExpression>>
IndexCostInfo = new ArrayList<>();
+ List<IndexCostInfo> indexCostInfoList = new ArrayList<>();
for (Map.Entry<IAccessMethod, AccessMethodAnalysisContext> amEntry :
analyzedAMs.entrySet()) {
AccessMethodAnalysisContext analysisCtx = amEntry.getValue();
Iterator<Map.Entry<Index, List<Pair<Integer, Integer>>>> indexIt =
@@ -718,50 +759,78 @@
chosenIndex.getIndexType().equals(DatasetConfig.IndexType.ARRAY)
|| joinEnum.findUnnestOp(selOp));
}
- IndexCostInfo.add(new Triple<>(chosenIndex, sel, afce));
+ List<ScanFilterCondition> scanFilterConditions =
+
AdvisorConditionParser.extractPrimitiveConditions(leafInput, optctx);
+ boolean isIndexOnlyApplicable =
checkIfScanVarsIsPrefixOfIndexVars(chosenIndex, scanFilterConditions);
+ indexCostInfoList.add(new IndexCostInfo(chosenIndex, sel,
afce, isIndexOnlyApplicable));
}
}
- this.IndexCostInfo = IndexCostInfo;
- if (IndexCostInfo.size() > 0) {
+ this.indexCostInfoList = indexCostInfoList;
+ if (!indexCostInfoList.isEmpty()) {
buildIndexPlans();
}
setSkipIndexAnnotationsForUnusedIndexes();
}
+ private static boolean checkIfScanVarsIsPrefixOfIndexVars(Index
chosenIndex,List<ScanFilterCondition> scanFilterConditions) {
+ Set<List<String>> scanVars =
scanFilterConditions.stream().map(ScanFilterCondition::getLhsFieldAccessPath).collect(Collectors.toSet());
+ Index.IIndexDetails indexDetails = chosenIndex.getIndexDetails();
+ if(!(indexDetails instanceof Index.ValueIndexDetails
valueIndexDetails)) {
+ return false;
+ }
+ List<List<String>> indexVars = valueIndexDetails.getKeyFieldNames();
+ for(List<String> fieldName : indexVars){
+ if(!scanVars.contains(fieldName)){
+ // Scan Variable is not a prefix of index variable
+ return false;
+ }
+ scanVars.remove(fieldName);
+ }
+
+ // If all scan variables are found in index variables, then scanVars
should be empty
+ return scanVars.isEmpty();
+ }
+
private void buildIndexPlans() {
List<PlanNode> allPlans = joinEnum.getAllPlans();
PlanNode pn;
ICost opCost, totalCost;
- List<Triple<Index, Double, AbstractFunctionCallExpression>>
mandatoryIndexesInfo = new ArrayList<>();
- List<Triple<Index, Double, AbstractFunctionCallExpression>>
optionalIndexesInfo = new ArrayList<>();
+ List<IndexCostInfo> mandatoryIndexesInfoList = new ArrayList<>();
+ List<IndexCostInfo> optionalIndexesInfoList = new ArrayList<>();
double sel = 1.0;
boolean mandatoryArrayIndexUsed = false;
boolean optionalArrayIndexUsed = false;
opCost = this.joinEnum.getCostHandle().zeroCost();
- for (int i = 0; i < IndexCostInfo.size(); i++) {
- if (joinEnum.findUseIndexHint(IndexCostInfo.get(i).third)) {
- mandatoryIndexesInfo.add(IndexCostInfo.get(i));
+ for (IndexCostInfo indexCostInfo : indexCostInfoList) {
+ if (joinEnum.findUseIndexHint(indexCostInfo.getAfce())) {
+ mandatoryIndexesInfoList.add(indexCostInfo);
mandatoryArrayIndexUsed = mandatoryArrayIndexUsed
- || (mandatoryIndexesInfo.get(i).first.getIndexType()
== DatasetConfig.IndexType.ARRAY);
+ || (indexCostInfo.getIndex().getIndexType() ==
DatasetConfig.IndexType.ARRAY);
} else {
- optionalIndexesInfo.add(IndexCostInfo.get(i));
+ optionalIndexesInfoList.add(indexCostInfo);
optionalArrayIndexUsed = optionalArrayIndexUsed
- || (optionalIndexesInfo.get(i).first.getIndexType() ==
DatasetConfig.IndexType.ARRAY);
+ || (indexCostInfo.getIndex().getIndexType() ==
DatasetConfig.IndexType.ARRAY);
}
}
+ boolean isIndexOnlyApplicable = true;
+ int numSecondaryIndexesUsed = 0;
List<ICost> indexCosts = new ArrayList<>(); // these are the costs
associated with the index only
// First cost all the mandatory indexes. These will be in the plan
regardless of the cost
- if (mandatoryIndexesInfo.size() > 0) {
- for (int i = 0; i < mandatoryIndexesInfo.size(); i++) {
-
indexCosts.add(joinEnum.getCostMethodsHandle().costIndexScan(this,
mandatoryIndexesInfo.get(i).second));
+ if (mandatoryIndexesInfoList.size() > 0) {
+ for (int i = 0; i < mandatoryIndexesInfoList.size(); i++) {
+
indexCosts.add(joinEnum.getCostMethodsHandle().costIndexScan(this,
+ mandatoryIndexesInfoList.get(i).getSelectivity()));
+ isIndexOnlyApplicable = isIndexOnlyApplicable &&
mandatoryIndexesInfoList.get(i).isIndexOnly();
+ numSecondaryIndexesUsed =
+ numSecondaryIndexesUsed +
(mandatoryIndexesInfoList.get(i).getIndex().isPrimaryIndex() ? 0 : 1);
}
opCost = this.joinEnum.getCostHandle().zeroCost();
- for (int i = 0; i < mandatoryIndexesInfo.size(); i++) {
+ for (int i = 0; i < mandatoryIndexesInfoList.size(); i++) {
opCost = opCost.costAdd(indexCosts.get(i)); // opCost will
have all the index scan costs
- sel *= mandatoryIndexesInfo.get(i).second; // assuming
selectivities are independent for now
+ sel *= mandatoryIndexesInfoList.get(i).getSelectivity(); //
assuming selectivities are independent for now
}
// Now add the data Scan cost.
@@ -777,29 +846,35 @@
// Now lets deal with the optional indexes. These are the ones without
any hints on them.
List<ICost> dataCosts = new ArrayList<>(); // these are the costs
associated with accessing the data records
indexCosts.clear();
- if (optionalIndexesInfo.size() > 0) {
- optionalIndexesInfo.sort(Comparator.comparingDouble(o ->
o.second)); // sort on selectivity.
+ if (optionalIndexesInfoList.size() > 0) {
+ optionalIndexesInfoList.sort(Comparator.comparingDouble(o ->
o.getSelectivity())); // sort on selectivity.
// find the costs using one index at a time first.
// sel is now the selectivity of all the previous mandatory
indexes.
- for (int i = 0; i < optionalIndexesInfo.size(); i++) {
-
indexCosts.add(joinEnum.getCostMethodsHandle().costIndexScan(this,
optionalIndexesInfo.get(i).second)); // I0; I1; I2; ...
+ for (int i = 0; i < optionalIndexesInfoList.size(); i++) {
+
indexCosts.add(joinEnum.getCostMethodsHandle().costIndexScan(this,
+ optionalIndexesInfoList.get(i).getSelectivity())); //
I0; I1; I2; ...
// Now get the cost of the datascans involved with the
multiplied selectivity
// dataCost (0) will contain the dataScan cost with the first
index
//dataCost (1) will contain the dataScan cost with the first
index and the 2nd index and so on.
- sel *= optionalIndexesInfo.get(i).second; // assuming
selectivities are independent for now
- if (optionalIndexesInfo.get(i).first.isPrimaryIndex()) {
+ sel *= optionalIndexesInfoList.get(i).getSelectivity(); //
assuming selectivities are independent for now
+ if
(optionalIndexesInfoList.get(i).getIndex().isPrimaryIndex()) {
dataCosts.add(joinEnum.getCostHandle().zeroCost());
} else {
dataCosts.add(joinEnum.getCostMethodsHandle().costIndexDataScan(this, sel)); //
D0; D01; D012; ...
+ isIndexOnlyApplicable = isIndexOnlyApplicable &&
optionalIndexesInfoList.get(i).isIndexOnly();
+ numSecondaryIndexesUsed = numSecondaryIndexesUsed
+ +
(optionalIndexesInfoList.get(i).getIndex().isPrimaryIndex() ? 0 : 1);
}
}
// At the of of the above loop, I0, I1, I2 ... have been computed
// Also D0, D01, D012 ... have been computed.
- opCost = indexCosts.get(0).costAdd(dataCosts.get(0));
+ if (!isIndexOnlyApplicable || numSecondaryIndexesUsed != 1) {
+ opCost = indexCosts.get(0).costAdd(dataCosts.get(0));
+ }
//opCost is now the cost of the first (and cheapest) optional
index plus the corresponding data scan
//Intersect the first two and then the first three and so on.
@@ -807,7 +882,7 @@
ICost newIdxCost = indexCosts.get(0); // I0
ICost currentCost;
- for (int i = 1; i < optionalIndexesInfo.size(); i++) {
+ for (int i = 1; i < optionalIndexesInfoList.size(); i++) {
newIdxCost = newIdxCost.costAdd(indexCosts.get(i)); // I0 +
I1; I0 + I1 + I2
currentCost = newIdxCost.costAdd(dataCosts.get(i)); // I0 + I1
+ D01; I0 + I1 + I2 + D012
if (currentCost.costLT(opCost) || level <=
joinEnum.cboFullEnumLevel) { // save this cost and try adding one more index
@@ -815,8 +890,8 @@
} else {
// set the selectivites of the indexes not picked to be
-1.0, so we can set
// the skp index annotations correctly
- for (int j = i; j < optionalIndexesInfo.size(); j++) {
- optionalIndexesInfo.get(j).second = -1.0;
+ for (int j = i; j < optionalIndexesInfoList.size(); j++) {
+ optionalIndexesInfoList.get(j).setSelectivity(-1.0);
}
break; // can't get any cheaper.
}
@@ -828,18 +903,18 @@
// opCost is now the total cost of the indexes chosen along with the
associated data scan cost.
if (opCost.costGT(this.cheapestPlanCost) && level >
joinEnum.cboFullEnumLevel) { // cheapest plan cost is the data scan cost.
- for (int j = 0; j < optionalIndexesInfo.size(); j++) {
- optionalIndexesInfo.get(j).second = -1.0; // remove all
indexes from consideration.
+ for (int j = 0; j < optionalIndexesInfoList.size(); j++) {
+ optionalIndexesInfoList.get(j).setSelectivity(-1.0); // remove
all indexes from consideration.
}
}
totalCost = opCost.costAdd(mandatoryIndexesCost); // cost of all the
indexes chosen
// Now check if any of the indexes were array indexes. If so double
the cost
- boolean forceEnum = mandatoryIndexesInfo.size() > 0 || level <=
joinEnum.cboFullEnumLevel;
+ boolean forceEnum = mandatoryIndexesInfoList.size() > 0 || level <=
joinEnum.cboFullEnumLevel;
if (opCost.costLT(this.cheapestPlanCost) || forceEnum) {
pn = new PlanNode(allPlans.size(), joinEnum, this,
datasetNames.get(0), leafInput);
- pn.setScanAndHintInfo(PlanNode.ScanMethod.INDEX_SCAN,
mandatoryIndexesInfo, optionalIndexesInfo);
+ pn.setScanAndHintInfo(PlanNode.ScanMethod.INDEX_SCAN,
mandatoryIndexesInfoList, optionalIndexesInfoList);
pn.setScanCosts(totalCost);
planIndexesArray.add(pn.allPlansIndex);
allPlans.add(pn);
@@ -917,8 +992,8 @@
return changes;
}
- protected void addIndexAccessPlans(ILogicalOperator leafInput,
IIndexProvider indexProvider)
- throws AlgebricksException {
+ protected void addIndexAccessPlans(ILogicalOperator leafInput,
IIndexProvider indexProvider,
+ IOptimizationContext optctx) throws AlgebricksException {
IntroduceSelectAccessMethodRule tmp = new
IntroduceSelectAccessMethodRule();
List<Pair<IAccessMethod, Index>> chosenIndexes = new ArrayList<>();
Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = new
TreeMap<>();
@@ -933,7 +1008,7 @@
chosenIndexes, analyzedAMs, indexProvider);
restoreSelExprs(selExprs, selOpers);
if (index_access_possible) {
- costAndChooseIndexPlans(leafInput, analyzedAMs);
+ costAndChooseIndexPlans(leafInput, analyzedAMs, optctx);
}
} else {
restoreSelExprs(selExprs, selOpers);
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/PlanNode.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/PlanNode.java
index 2cf9130..109e006 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/PlanNode.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/PlanNode.java
@@ -25,7 +25,6 @@
import org.apache.asterix.metadata.entities.Index;
import org.apache.asterix.optimizer.cost.ICost;
import org.apache.hyracks.algebricks.common.utils.Pair;
-import org.apache.hyracks.algebricks.common.utils.Triple;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
@@ -286,9 +285,8 @@
numHintsUsed = 0;
}
- protected void setScanAndHintInfo(ScanMethod scanMethod,
- List<Triple<Index, Double, AbstractFunctionCallExpression>>
mandatoryIndexesInfo,
- List<Triple<Index, Double, AbstractFunctionCallExpression>>
optionalIndexesInfo) {
+ protected void setScanAndHintInfo(ScanMethod scanMethod,
List<JoinNode.IndexCostInfo> mandatoryIndexesInfo,
+ List<JoinNode.IndexCostInfo> optionalIndexesInfo) {
setScanMethod(scanMethod);
if (mandatoryIndexesInfo.size() > 0) {
indexHint = true;
@@ -298,9 +296,9 @@
// So seeing if only index is used.
if (optionalIndexesInfo.size() + mandatoryIndexesInfo.size() == 1) {
if (optionalIndexesInfo.size() == 1) {
- indexUsed = optionalIndexesInfo.get(0).first;
+ indexUsed = optionalIndexesInfo.get(0).getIndex();
} else {
- indexUsed = mandatoryIndexesInfo.get(0).first;
+ indexUsed = mandatoryIndexesInfo.get(0).getIndex();
}
}
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/indexadvisor/AdvisorConditionParser.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/indexadvisor/AdvisorConditionParser.java
index 3e0e5c4..d7404e0 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/indexadvisor/AdvisorConditionParser.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/indexadvisor/AdvisorConditionParser.java
@@ -191,7 +191,7 @@
return unnestFilterConditions;
}
- private static List<ScanFilterCondition>
extractPrimitiveConditions(ILogicalOperator op,
+ public static List<ScanFilterCondition>
extractPrimitiveConditions(ILogicalOperator op,
IOptimizationContext context) throws AlgebricksException {
List<ExprRef> filterExprRefs = new ArrayList<>();
ILogicalOperator tempOp = op;
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20475?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Ifc835f8c4f8386e7b290c68891b4274f1ae87cd5
Gerrit-Change-Number: 20475
Gerrit-PatchSet: 1
Gerrit-Owner: Preetham Poluparthi <[email protected]>