>From <murali.kris...@couchbase.com>: murali.kris...@couchbase.com has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20213 )
Change subject: [ASTERIXDB-3635][COMP] Tune single dataset index costing ...................................................................... [ASTERIXDB-3635][COMP] Tune single dataset index costing Change-Id: Id538efc7b67fca686fd04294ae79a1be9c7a709e --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java 1 file changed, 57 insertions(+), 74 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/13/20213/1 diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java index 5cbb6f8..ed9f0f3 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java @@ -730,101 +730,78 @@ private void buildIndexPlans() { List<PlanNode> allPlans = joinEnum.getAllPlans(); PlanNode pn; - ICost opCost, totalCost; + double ic, dc, tc, oc; // for debugging + List<Triple<Index, Double, AbstractFunctionCallExpression>> mandatoryIndexesInfo = new ArrayList<>(); List<Triple<Index, Double, AbstractFunctionCallExpression>> optionalIndexesInfo = new ArrayList<>(); double sel = 1.0; - boolean mandatoryArrayIndexUsed = false; - boolean optionalArrayIndexUsed = false; - opCost = this.joinEnum.getCostHandle().zeroCost(); + for (int i = 0; i < IndexCostInfo.size(); i++) { if (joinEnum.findUseIndexHint(IndexCostInfo.get(i).third)) { mandatoryIndexesInfo.add(IndexCostInfo.get(i)); - mandatoryArrayIndexUsed = mandatoryArrayIndexUsed - || (mandatoryIndexesInfo.get(i).first.getIndexType() == DatasetConfig.IndexType.ARRAY); } else { optionalIndexesInfo.add(IndexCostInfo.get(i)); - optionalArrayIndexUsed = optionalArrayIndexUsed - || (optionalIndexesInfo.get(i).first.getIndexType() == DatasetConfig.IndexType.ARRAY); } } - List<ICost> indexCosts = new ArrayList<>(); // these are the costs associated with the index only - // First cost all the mandatory indexes. These will be in the plan regardless of the cost + ICost indexCosts = this.joinEnum.getCostHandle().zeroCost(); + ICost totalCost = this.joinEnum.getCostHandle().zeroCost(); + ICost dataScanCost; + if (mandatoryIndexesInfo.size() > 0) { for (int i = 0; i < mandatoryIndexesInfo.size(); i++) { - indexCosts.add(joinEnum.getCostMethodsHandle().costIndexScan(this, mandatoryIndexesInfo.get(i).second)); + ICost cost = joinEnum.getCostMethodsHandle().costIndexScan(this, mandatoryIndexesInfo.get(i).second); + ic = cost.computeTotalCost(); // for debugging + if ((mandatoryIndexesInfo.get(i).first.getIndexType() == DatasetConfig.IndexType.ARRAY)) { + cost = cost.costAdd(cost); // double the cost for arrays. + } + indexCosts = indexCosts.costAdd(cost); // a running tally + ic = indexCosts.computeTotalCost(); // for debugging + sel *= mandatoryIndexesInfo.get(i).second; } - - opCost = this.joinEnum.getCostHandle().zeroCost(); - - for (int i = 0; i < mandatoryIndexesInfo.size(); i++) { - opCost = opCost.costAdd(indexCosts.get(i)); // opCost will have all the index scan costs - sel *= mandatoryIndexesInfo.get(i).second; // assuming selectivities are independent for now - } - - // Now add the data Scan cost. - ICost dataScanCost = joinEnum.getCostMethodsHandle().costIndexDataScan(this, sel); - opCost = opCost.costAdd(dataScanCost); // opCost now has the total cost of all the mandatory indexes + data costs. - if (mandatoryArrayIndexUsed) { - opCost = opCost.costAdd(opCost); - } + dataScanCost = joinEnum.getCostMethodsHandle().costIndexDataScan(this, sel); + dc = dataScanCost.computeTotalCost(); + totalCost = indexCosts.costAdd(dataScanCost); // this is the total cost of using the mandatory costs. This cost cannot be skipped. + tc = totalCost.computeTotalCost(); // for debugging + int j = 1; } - ICost mandatoryIndexesCost = opCost; // This will be added at the end to the total cost irrespective of optimality. - //opCost = this.joinEnum.getCostHandle().zeroCost(); // compute cost for optional indexes and store in opCost. - // Now lets deal with the optional indexes. These are the ones without any hints on them. - List<ICost> dataCosts = new ArrayList<>(); // these are the costs associated with accessing the data records - indexCosts.clear(); + ICost opCost = totalCost; if (optionalIndexesInfo.size() > 0) { - optionalIndexesInfo.sort(Comparator.comparingDouble(o -> o.second)); // sort on selectivity. + optionalIndexesInfo.sort(Comparator.comparingDouble(o -> o.second)); - // find the costs using one index at a time first. - - // sel is now the selectivity of all the previous mandatory indexes. for (int i = 0; i < optionalIndexesInfo.size(); i++) { - indexCosts.add(joinEnum.getCostMethodsHandle().costIndexScan(this, optionalIndexesInfo.get(i).second)); // I0; I1; I2; ... - // Now get the cost of the datascans involved with the multiplied selectivity - // dataCost (0) will contain the dataScan cost with the first index - //dataCost (1) will contain the dataScan cost with the first index and the 2nd index and so on. - sel *= optionalIndexesInfo.get(i).second; // assuming selectivities are independent for now - if (optionalIndexesInfo.get(i).first.isPrimaryIndex()) { - dataCosts.add(joinEnum.getCostHandle().zeroCost()); - } else { - dataCosts.add(joinEnum.getCostMethodsHandle().costIndexDataScan(this, sel)); // D0; D01; D012; ... + ICost cost = joinEnum.getCostMethodsHandle().costIndexScan(this, optionalIndexesInfo.get(i).second); + ic = cost.computeTotalCost(); // for debugging + if ((optionalIndexesInfo.get(i).first.getIndexType() == DatasetConfig.IndexType.ARRAY)) { + cost = cost.costAdd(cost); // double the cost for arrays. } - } - - // At the of of the above loop, I0, I1, I2 ... have been computed - // Also D0, D01, D012 ... have been computed. - - opCost = indexCosts.get(0).costAdd(dataCosts.get(0)); - //opCost is now the cost of the first (and cheapest) optional index plus the corresponding data scan - - //Intersect the first two and then the first three and so on. - //If the cost does not decrease, then stop - - ICost newIdxCost = indexCosts.get(0); // I0 - ICost currentCost; - for (int i = 1; i < optionalIndexesInfo.size(); i++) { - newIdxCost = newIdxCost.costAdd(indexCosts.get(i)); // I0 + I1; I0 + I1 + I2 - currentCost = newIdxCost.costAdd(dataCosts.get(i)); // I0 + I1 + D01; I0 + I1 + I2 + D012 - if (currentCost.costLT(opCost) || level <= joinEnum.cboFullEnumLevel) { // save this cost and try adding one more index - opCost = currentCost; - } else { - // set the selectivites of the indexes not picked to be -1.0, so we can set - // the skp index annotations correctly - for (int j = i; j < optionalIndexesInfo.size(); j++) { - optionalIndexesInfo.get(j).second = -1.0; + indexCosts = indexCosts.costAdd(cost); + ic = cost.computeTotalCost(); // for debugging + sel *= optionalIndexesInfo.get(i).second; + dataScanCost = joinEnum.getCostMethodsHandle().costIndexDataScan(this, sel); + dc = dataScanCost.computeTotalCost(); + opCost = indexCosts.costAdd(dataScanCost); + oc = opCost.computeTotalCost(); // for debugging + tc = totalCost.computeTotalCost(); + if (tc > 0.0) { + if (opCost.costGT(totalCost)) { // we can stop here since additional indexes are not useful + for (int j = i; j < optionalIndexesInfo.size(); j++) { + optionalIndexesInfo.get(j).second = -1.0; + } + opCost = totalCost; + oc = opCost.computeTotalCost(); // for debugging + break; } - break; // can't get any cheaper. + } else { + totalCost = indexCosts.costAdd(dataScanCost); + tc = totalCost.computeTotalCost(); } } - if (optionalArrayIndexUsed) { - opCost = opCost.costAdd(opCost); - } } - + + oc = opCost.computeTotalCost(); + double thisc = this.cheapestPlanCost.computeTotalCost(); // opCost is now the total cost of the indexes chosen along with the associated data scan cost. if (opCost.costGT(this.cheapestPlanCost) && level > joinEnum.cboFullEnumLevel) { // cheapest plan cost is the data scan cost. for (int j = 0; j < optionalIndexesInfo.size(); j++) { @@ -832,9 +809,6 @@ } } - totalCost = opCost.costAdd(mandatoryIndexesCost); // cost of all the indexes chosen - // Now check if any of the indexes were array indexes. If so double the cost - boolean forceEnum = mandatoryIndexesInfo.size() > 0 || level <= joinEnum.cboFullEnumLevel; if (opCost.costLT(this.cheapestPlanCost) || forceEnum) { pn = new PlanNode(allPlans.size(), joinEnum, this, datasetNames.get(0), leafInput); -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20213 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: Id538efc7b67fca686fd04294ae79a1be9c7a709e Gerrit-Change-Number: 20213 Gerrit-PatchSet: 1 Gerrit-Owner: murali.kris...@couchbase.com Gerrit-MessageType: newchange