>From Vijay Sarathy <[email protected]>:
Vijay Sarathy has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17765 )
Change subject: [ASTERIXDB-3246][COMP]: CBO costing of all physical operators
in a query plan
......................................................................
[ASTERIXDB-3246][COMP]: CBO costing of all physical operators in a query plan
Change-Id: I3196f664d716bb5b3806ec9a5a0dd5c1ea51ff62
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/SetAsterixPhysicalOperatorsRule.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/CostMethods.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EstimatedCostComputationVisitor.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
M
hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/ICostMethods.java
8 files changed, 340 insertions(+), 24 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/65/17765/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/CostMethods.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/CostMethods.java
index 887cc94..48f44af 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/CostMethods.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/CostMethods.java
@@ -22,6 +22,9 @@
import org.apache.asterix.metadata.declared.MetadataProvider;
import org.apache.asterix.optimizer.rules.cbo.JoinNode;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.LimitOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
import
org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
public class CostMethods implements ICostMethods {
@@ -30,14 +33,18 @@
protected PhysicalOptimizationConfig physOptConfig;
protected long blockSize;
protected long DOP;
- protected double maxMemorySize;
+ protected double maxMemorySizeForJoin;
+ protected double maxMemorySizeForGroup;
+ protected double maxMemorySizeForSort;
public CostMethods(IOptimizationContext context) {
optCtx = context;
physOptConfig = context.getPhysicalOptimizationConfig();
blockSize = getBufferCachePageSize();
DOP = getDOP();
- maxMemorySize = getMaxMemorySize();
+ maxMemorySizeForJoin = getMaxMemorySizeForJoin();
+ maxMemorySizeForGroup = getMaxMemorySizeForGroup();
+ maxMemorySizeForJoin = getMaxMemorySizeForSort();
}
private long getBufferCacheSize() {
@@ -54,10 +61,18 @@
return optCtx.getComputationNodeDomain().cardinality();
}
- public double getMaxMemorySize() {
+ public double getMaxMemorySizeForJoin() {
return physOptConfig.getMaxFramesForJoin() *
physOptConfig.getFrameSize();
}
+ public double getMaxMemorySizeForGroup() {
+ return physOptConfig.getMaxFramesForGroupBy() *
physOptConfig.getFrameSize();
+ }
+
+ public double getMaxMemorySizeForSort() {
+ return physOptConfig.getMaxFramesExternalSort() *
physOptConfig.getFrameSize();
+ }
+
// These cost methods are very simple and rudimentary for now. These can
be improved by asterixdb developers as needed.
public Cost costFullScan(JoinNode jn) {
return new Cost(jn.computeJoinCardinality());
@@ -119,4 +134,20 @@
JoinNode rightJn = jn.getRightJn();
return new Cost(DOP * rightJn.computeJoinCardinality());
}
+
+ public Cost costHashGroupBy(GroupByOperator groupByOperator) {
+ return new Cost(100.0);
+ }
+
+ public Cost costSortGroupBy(GroupByOperator groupByOperator) {
+ return new Cost(200.0);
+ }
+
+ public Cost costOrderBy(OrderOperator orderOp) {
+ return null;
+ }
+
+ public Cost costLimit(LimitOperator limitOp) {
+ return null;
+ }
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/ICostMethods.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/ICostMethods.java
index ef4af41..2431dc1 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/ICostMethods.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/cost/ICostMethods.java
@@ -20,6 +20,9 @@
package org.apache.asterix.optimizer.cost;
import org.apache.asterix.optimizer.rules.cbo.JoinNode;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.LimitOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
public interface ICostMethods {
Cost costFullScan(JoinNode jn);
@@ -33,4 +36,12 @@
Cost costIndexNLJoin(JoinNode currentJn);
Cost costCartesianProductJoin(JoinNode currentJn);
+
+ Cost costHashGroupBy(GroupByOperator groupByOperator);
+
+ Cost costSortGroupBy(GroupByOperator groupByOperator);
+
+ Cost costOrderBy(OrderOperator orderOp);
+
+ Cost costLimit(LimitOperator limitOp);
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/SetAsterixPhysicalOperatorsRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/SetAsterixPhysicalOperatorsRule.java
index 5ec1a7f..2377c6e 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/SetAsterixPhysicalOperatorsRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/SetAsterixPhysicalOperatorsRule.java
@@ -36,6 +36,10 @@
import org.apache.asterix.metadata.functions.ExternalFunctionCompilerUtil;
import org.apache.asterix.om.functions.BuiltinFunctions;
import org.apache.asterix.optimizer.base.AnalysisUtil;
+import org.apache.asterix.optimizer.cost.Cost;
+import org.apache.asterix.optimizer.cost.CostMethods;
+import org.apache.asterix.optimizer.cost.ICost;
+import org.apache.asterix.optimizer.cost.ICostMethods;
import org.apache.asterix.optimizer.rules.am.AccessMethodJobGenParams;
import org.apache.asterix.optimizer.rules.am.BTreeJobGenParams;
import org.apache.asterix.optimizer.rules.util.AsterixJoinUtils;
@@ -50,6 +54,7 @@
import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
import
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import
org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
import
org.apache.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory;
@@ -73,7 +78,7 @@
import
org.apache.hyracks.algebricks.core.algebra.visitors.ILogicalOperatorVisitor;
import
org.apache.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
-public final class SetAsterixPhysicalOperatorsRule extends
SetAlgebricksPhysicalOperatorsRule {
+public class SetAsterixPhysicalOperatorsRule extends
SetAlgebricksPhysicalOperatorsRule {
// Disable ASSIGN_BATCH physical operator if this option is set to 'false'
public static final String REWRITE_ATTEMPT_BATCH_ASSIGN =
"rewrite_attempt_batch_assign";
@@ -90,13 +95,32 @@
return
metadataProvider.getBooleanProperty(REWRITE_ATTEMPT_BATCH_ASSIGN,
REWRITE_ATTEMPT_BATCH_ASSIGN_DEFAULT);
}
- private static class AsterixPhysicalOperatorFactoryVisitor extends
AlgebricksPhysicalOperatorFactoryVisitor {
+ protected static class AsterixPhysicalOperatorFactoryVisitor extends
AlgebricksPhysicalOperatorFactoryVisitor {
private final boolean isBatchAssignEnabled;
+ protected ICost cost;
+ protected ICostMethods costMethods;
- private AsterixPhysicalOperatorFactoryVisitor(IOptimizationContext
context) {
+ protected AsterixPhysicalOperatorFactoryVisitor(IOptimizationContext
context) {
super(context);
isBatchAssignEnabled = isBatchAssignEnabled(context);
+ cost = new Cost();
+ costMethods = new CostMethods(context);
+ }
+
+ protected Enum groupByAlgorithm(GroupByOperator gby, Boolean
topLevelOp) {
+ ICost costHashGroupBy = costMethods.costHashGroupBy(gby);
+ ICost costSortGroupBy = costMethods.costSortGroupBy(gby);
+ if (gby.getNestedPlans().size() == 1 &&
gby.getNestedPlans().get(0).getRoots().size() == 1) {
+ if (costHashGroupBy.costLE(costSortGroupBy)) {
+ gby.getAnnotations().put(OperatorAnnotations.OP_COST_LOCAL,
+ (double)
Math.round(costHashGroupBy.computeTotalCost() * 100) / 100);
+ return GroupByAlgorithm.HASH_GROUP_BY;
+ }
+ }
+ gby.getAnnotations().put(OperatorAnnotations.OP_COST_LOCAL,
+ (double) Math.round(costSortGroupBy.computeTotalCost() *
100) / 100);
+ return GroupByAlgorithm.SORT_GROUP_BY;
}
@Override
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
index 4c9a10e..4989f53 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
@@ -49,13 +49,16 @@
import
org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import
org.apache.hyracks.algebricks.core.algebra.expressions.HashJoinExpressionAnnotation;
import
org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation;
+import
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import org.apache.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
@@ -64,6 +67,7 @@
import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
import
org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.SourceLocation;
import org.apache.hyracks.api.exceptions.Warning;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -84,6 +88,9 @@
List<AssignOperator> assignOps;
List<ILogicalExpression> assignJoinExprs; // These are the join
expressions below the assign operator.
+ // The Distinct operators for each Select or DataSourceScan operator (if
applicable)
+ HashMap<DataSourceScanOperator, ILogicalOperator>
dataScanOrSelectAndDistinctOps;
+
public EnumerateJoinsRule(JoinEnum joinEnum) {
this.joinEnum = joinEnum;
}
@@ -118,6 +125,13 @@
return false;
}
+ // The Distinct operators for each Select or DataSourceScan operator
(if applicable)
+ dataScanOrSelectAndDistinctOps = new HashMap<>();
+ // If cboMode or cboTestMode is true, identify each DistinctOp or
GroupByOp for the corresponding DataScanOp
+ if (op.getOperatorTag() == LogicalOperatorTag.DISTRIBUTE_RESULT) {
+ getDistinctOpsForJoinNodes(op);
+ }
+
// if this join has already been seen before, no need to apply the
rule again
if (context.checkIfInDontApplySet(this, op)) {
return false;
@@ -132,7 +146,6 @@
assignOps = new ArrayList<>();
assignJoinExprs = new ArrayList<>();
buildSets = new ArrayList<>();
-
IPlanPrettyPrinter pp = context.getPrettyPrinter();
printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
leafInputNumber = 0;
@@ -158,7 +171,8 @@
// we need to build the smaller sets first. So we need to find
these first.
}
joinEnum.initEnum((AbstractLogicalOperator) op, cboMode, cboTestMode,
numberOfFromTerms, leafInputs, allJoinOps,
- assignOps, outerJoinsDependencyList, buildSets,
varLeafInputIds, context);
+ assignOps, outerJoinsDependencyList, buildSets,
varLeafInputIds, dataScanOrSelectAndDistinctOps,
+ context);
if (cboMode) {
if (!doAllDataSourcesHaveSamples(leafInputs, context)) {
@@ -388,6 +402,147 @@
}
}
+ private List<LogicalVariable> getGroupByDistinctVarList(ILogicalOperator
grpByDistinctOp) {
+ List<LogicalVariable> distinctVars = new ArrayList<>();
+ ILogicalExpression varRef;
+ ILogicalOperator nextOp;
+ if (grpByDistinctOp.getOperatorTag() == LogicalOperatorTag.DISTINCT) {
+ nextOp = grpByDistinctOp.getInputs().get(0).getValue();
+ if (nextOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+ List<Mutable<ILogicalExpression>> argList =
+ ((AbstractFunctionCallExpression) ((AssignOperator)
nextOp).getExpressions().get(0).getValue())
+ .getArguments();
+ for (int i = 0; i < argList.size(); i += 2) {
+ varRef = argList.get(i + 1).getValue();
+ if (varRef.getExpressionTag() ==
LogicalExpressionTag.VARIABLE) {
+ distinctVars.add(((VariableReferenceExpression)
varRef).getVariableReference());
+ }
+ }
+ }
+ } else if (grpByDistinctOp.getOperatorTag() ==
LogicalOperatorTag.GROUP) {
+ distinctVars = ((GroupByOperator)
grpByDistinctOp).getGroupByVarList();
+ }
+ return distinctVars;
+ }
+
+ private boolean
containsAllGroupByDistinctVarsInScanOp(DataSourceScanOperator scanOp,
+ ILogicalOperator grpByDistinctOp) {
+ LogicalOperatorTag tag = grpByDistinctOp.getOperatorTag();
+ if (tag == LogicalOperatorTag.GROUP || tag ==
LogicalOperatorTag.DISTINCT) {
+ List<LogicalVariable> distinctVars =
getGroupByDistinctVarList(grpByDistinctOp);
+ if (distinctVars.size() == 0) {
+ return false; // no Variable expressions in DistinctOp or
GroupByOp
+ }
+ List<LogicalVariable> scanVars = scanOp.getVariables();
+ List<LogicalVariable> foundDistinctVars = new ArrayList<>();
+ for (LogicalVariable scanVar : scanVars) {
+ if (distinctVars.contains(scanVar)) {
+ foundDistinctVars.add(scanVar);
+ }
+ }
+ // discarding the variable for Dataset name or alias from scanOp
+ return ((scanVars.size() - 1) == foundDistinctVars.size());
+ }
+ return false;
+ }
+
+ private void getDistinctOpsForJoinNodes(ILogicalOperator op) {
+ if (op.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT) {
+ return;
+ }
+ ILogicalOperator grpByDistinctOp = null; // null indicates no
DistinctOp or GroupByOp
+ DataSourceScanOperator scanOp;
+ while (true) {
+ LogicalOperatorTag tag = op.getOperatorTag();
+ if (tag == LogicalOperatorTag.DISTINCT || tag ==
LogicalOperatorTag.GROUP) {
+ grpByDistinctOp = op; // GroupByOp Variable expressions (if
any) take over DistinctOp ones
+ } else if (tag == LogicalOperatorTag.INNERJOIN || tag ==
LogicalOperatorTag.LEFTOUTERJOIN) {
+ if (grpByDistinctOp != null) {
+ for (int i = 0; i < op.getInputs().size(); i++) {
+ ILogicalOperator nextOp =
op.getInputs().get(i).getValue();
+ getDistinctOpsForJoinNodes(nextOp, grpByDistinctOp);
+ }
+ }
+ return;
+ } else if (tag == LogicalOperatorTag.DATASOURCESCAN) { // single
table queries
+ scanOp = (DataSourceScanOperator) op;
+ if (grpByDistinctOp != null) {
+ if (!containsAllGroupByDistinctVarsInScanOp(scanOp,
grpByDistinctOp)) {
+ // doesn't contain all PK attribute(s), so CBO can get
estimated cardinality from samples
+ dataScanOrSelectAndDistinctOps.put(scanOp,
grpByDistinctOp);
+ // String viewInPlan = new
ALogicalPlanImpl(new MutableObject<>(grpByDistinctOp)).toString(); //useful
when debugging
+ }
+ }
+ return;
+ }
+ op = op.getInputs().get(0).getValue();
+ if (op.getOperatorTag() == LogicalOperatorTag.EMPTYTUPLESOURCE) {
+ return; // if this happens, there is nothing we can do in CBO
code since there is no DataSourceScan
+ }
+ }
+ }
+
+ private void getDistinctOpsForJoinNodes(ILogicalOperator op,
ILogicalOperator grpByDistinctOp) {
+ List<LogicalVariable> foundDistinctVars = new ArrayList<>();
+ ILogicalOperator selOp = null, assignOp = null;
+
+ LogicalOperatorTag tag = op.getOperatorTag();
+ // add DistinctOp to count distinct values in an attribute (except PK
attribute(s))
+ if (tag == LogicalOperatorTag.ASSIGN || tag ==
LogicalOperatorTag.SELECT) {
+ List<LogicalVariable> distinctVars =
getGroupByDistinctVarList(grpByDistinctOp);
+ if (distinctVars.size() == 0) {
+ return; // no Variable expressions in DistinctOp or GroupByOp
+ }
+
+ DataSourceScanOperator scanOp = null;
+ LogicalVariable assignVar;
+ while (tag != LogicalOperatorTag.EMPTYTUPLESOURCE) {
+ if (tag == LogicalOperatorTag.SELECT) {
+ selOp = op;
+ } else if (tag == LogicalOperatorTag.ASSIGN) {
+ assignVar = ((AssignOperator) op).getVariables().get(0);
+ int idx = distinctVars.indexOf(assignVar);
+ if (idx != -1 && assignOp == null) { // first
corresponding AssignOp found
+ assignOp = op;
+ }
+ if (idx != -1) { // add all Variable expressions of the
DataSourceScanOp
+ foundDistinctVars.add(assignVar);
+ }
+ } else if (tag == LogicalOperatorTag.DATASOURCESCAN) {
+ scanOp = (DataSourceScanOperator) op;
+ }
+ op = op.getInputs().get(0).getValue();
+ tag = op.getOperatorTag();
+ }
+
+ if (assignOp != null && scanOp != null) {
+ SourceLocation sourceLocation =
+ (selOp == null) ? assignOp.getSourceLocation() :
selOp.getSourceLocation();
+ ILogicalOperator inputOp = (selOp == null) ? assignOp : selOp;
+ List<Mutable<ILogicalExpression>> distinctExpr = new
ArrayList<>();
+ for (LogicalVariable var : foundDistinctVars) {
+ VariableReferenceExpression varExpr = new
VariableReferenceExpression(var);
+ varExpr.setSourceLocation(sourceLocation);
+ Mutable<ILogicalExpression> vRef = new
MutableObject<>(varExpr);
+ distinctExpr.add(vRef);
+ }
+
+ // create a Distinct operator
+ DistinctOperator distinctOp = new
DistinctOperator(distinctExpr);
+ distinctOp.setSourceLocation(sourceLocation);
+ distinctOp.getInputs().add(new MutableObject<>(inputOp));
+ distinctOp.setExecutionMode(inputOp.getExecutionMode());
+ dataScanOrSelectAndDistinctOps.put(scanOp, distinctOp);
+ // String viewInPlan = new ALogicalPlanImpl(new
MutableObject<>(distinctOp)).toString(); //useful when debugging
+ }
+ } else if (tag == LogicalOperatorTag.INNERJOIN || tag ==
LogicalOperatorTag.LEFTOUTERJOIN) {
+ for (int i = 0; i < op.getInputs().size(); i++) {
+ ILogicalOperator nextOp = op.getInputs().get(i).getValue();
+ getDistinctOpsForJoinNodes(nextOp, grpByDistinctOp);
+ }
+ }
+ }
+
private int getLeafInputId(LogicalVariable lv) {
if (varLeafInputIds.containsKey(lv))
return varLeafInputIds.get(lv);
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EstimatedCostComputationVisitor.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EstimatedCostComputationVisitor.java
index 4d1e0fb..57abe54 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EstimatedCostComputationVisitor.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EstimatedCostComputationVisitor.java
@@ -91,8 +91,23 @@
@Override
public Pair<Double, Double> visitGroupByOperator(GroupByOperator op,
Double arg) throws AlgebricksException {
- // Needs more work in the cardinality estimation code to estimate
group by cardinality and cost.
- return annotate(this, op, arg);
+
+ double grpByCost = 0.0;
+ Pair<Double, Double> cardCost =
op.getInputs().get(0).getValue().accept(this, arg);
+
+ for (Map.Entry<String, Object> anno : op.getAnnotations().entrySet()) {
+ if (anno.getValue() != null &&
anno.getKey().equals(OperatorAnnotations.OP_OUTPUT_CARDINALITY)) {
+ cardCost.setFirst((Double) anno.getValue());
+ }
+ if (anno.getValue() != null &&
anno.getKey().equals(OperatorAnnotations.OP_COST_LOCAL)) {
+ grpByCost = (double) anno.getValue();
+ }
+ }
+ double totalCost = cardCost.getSecond() + grpByCost;
+ op.getAnnotations().put(OperatorAnnotations.OP_COST_TOTAL, totalCost);
+ cardCost.setSecond(totalCost);
+
+ return cardCost;
}
@Override
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 3ce4e9f..bfbe66f 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -60,6 +60,7 @@
import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
import
org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import
org.apache.hyracks.algebricks.core.algebra.expressions.BroadcastExpressionAnnotation;
import
org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
@@ -103,6 +104,7 @@
protected JoinNode[] jnArray; // array of all join nodes
protected int jnArraySize;
protected List<ILogicalOperator> leafInputs;
+ protected HashMap<DataSourceScanOperator, ILogicalOperator>
dataScanOrSelectAndDistinctOps;
protected List<ILogicalExpression> singleDatasetPreds;
protected List<AssignOperator> assignOps;
List<Quadruple<Integer, Integer, JoinOperator, Integer>>
outerJoinsDependencyList;
@@ -135,6 +137,7 @@
List<ILogicalOperator> leafInputs, List<JoinOperator> allJoinOps,
List<AssignOperator> assignOps,
List<Quadruple<Integer, Integer, JoinOperator, Integer>>
outerJoinsDependencyList,
List<Triple<Integer, Integer, Boolean>> buildSets,
HashMap<LogicalVariable, Integer> varLeafInputIds,
+ HashMap<DataSourceScanOperator, ILogicalOperator>
dataScanOrSelectAndDistinctOps,
IOptimizationContext context) throws AsterixException {
this.singleDatasetPreds = new ArrayList<>();
this.joinConditions = new ArrayList<>();
@@ -154,6 +157,7 @@
this.allJoinOps = allJoinOps;
this.buildSets = buildSets;
this.varLeafInputIds = varLeafInputIds;
+ this.dataScanOrSelectAndDistinctOps = dataScanOrSelectAndDistinctOps;
this.op = op;
this.forceJoinOrderMode = getForceJoinOrderMode(context);
this.queryPlanShape = getQueryPlanShape(context);
@@ -874,6 +878,16 @@
// now switch the input back.
parent.getInputs().get(0).setValue(scanOp);
jn.setCardinality(finalDatasetCard);
+
+ ILogicalOperator grpByDistinctOp =
this.dataScanOrSelectAndDistinctOps.get(scanOp);
+ // This needs to be scaled up, Mehnaz will fill in correct
value.
+ double distinctCardinality =
+ (grpByDistinctOp != null) ?
stats.findDistinctValuesCardinality(grpByDistinctOp) : 0.0;
+
+
grpByDistinctOp.getAnnotations().put(OperatorAnnotations.OP_OUTPUT_CARDINALITY,
+ (double) Math.round(distinctCardinality * 100) /
100);
+
grpByDistinctOp.getAnnotations().put(OperatorAnnotations.OP_INPUT_CARDINALITY,
+ (double) Math.round(finalDatasetCard * 100) / 100);
}
}
dataScanPlan = jn.addSingleDatasetPlans();
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index d09783d..b0a9e71 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -523,6 +523,52 @@
private void transformtoAnyInPlan(SelectOperator newSelOp) {
}
+ protected double findDistinctValuesCardinality(ILogicalOperator
grpByDistinctOp) throws AlgebricksException {
+ ILogicalOperator parent =
joinEnum.findDataSourceScanOperatorParent(grpByDistinctOp);
+ DataSourceScanOperator scanOp = (DataSourceScanOperator)
parent.getInputs().get(0).getValue();
+
+ if (scanOp == null) {
+ return 1.0; // what happens to the cards and sizes then? this may
happen in case of in lists
+ }
+
+ Index index = findSampleIndex(scanOp, optCtx);
+ if (index == null) {
+ return 1.0;
+ }
+
+ Index.SampleIndexDetails idxDetails = (Index.SampleIndexDetails)
index.getIndexDetails();
+ double origDatasetCard = idxDetails.getSourceCardinality();
+ // origDatasetCard must be equal to datasetCard. So we do not need
datasetCard passed in here. VIJAY check if
+ // this parameter can be removed.
+ double sampleCard = Math.min(idxDetails.getSampleCardinalityTarget(),
origDatasetCard);
+ if (sampleCard == 0) {
+ sampleCard = 1;
+ IWarningCollector warningCollector = optCtx.getWarningCollector();
+ if (warningCollector.shouldWarn()) {
+ warningCollector.warn(Warning.of(scanOp.getSourceLocation(),
+
org.apache.asterix.common.exceptions.ErrorCode.SAMPLE_HAS_ZERO_ROWS));
+ }
+ }
+
+ // replace the DataSourceScanOp with the sampling source
+ SampleDataSource sampledatasource =
joinEnum.getSampleDataSource(scanOp);
+ DataSourceScanOperator deepCopyOfScan =
+ (DataSourceScanOperator)
OperatorManipulationUtil.bottomUpCopyOperators(scanOp);
+ deepCopyOfScan.setDataSource(sampledatasource);
+
+ // insert this in place of the DataSourceScanOp.
+ parent.getInputs().get(0).setValue(deepCopyOfScan);
+
+ List<List<IAObject>> result = runSamplingQuery(optCtx,
grpByDistinctOp);
+ double cardinality = ((double) ((AInt64)
result.get(0).get(0)).getLongValue());
+ // TODO: Do cardinality estimation (Jackknife maybe?)
+
+ // switch the scanOp back
+ parent.getInputs().get(0).setValue(scanOp);
+
+ return cardinality;
+ }
+
protected List<List<IAObject>> runSamplingQuery(IOptimizationContext ctx,
ILogicalOperator logOp)
throws AlgebricksException {
LOGGER.info("***running sample query***");
diff --git
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
index 2784a6a..a81527f 100644
---
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
+++
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
@@ -127,6 +127,11 @@
public class SetAlgebricksPhysicalOperatorsRule implements
IAlgebraicRewriteRule {
+ protected enum GroupByAlgorithm {
+ HASH_GROUP_BY,
+ SORT_GROUP_BY
+ }
+
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef,
IOptimizationContext context)
throws AlgebricksException {
@@ -144,7 +149,7 @@
return true;
}
- private static void computeDefaultPhysicalOp(AbstractLogicalOperator op,
boolean topLevelOp,
+ protected static void computeDefaultPhysicalOp(AbstractLogicalOperator op,
boolean topLevelOp,
ILogicalOperatorVisitor<IPhysicalOperator, Boolean> physOpFactory)
throws AlgebricksException {
if (op.getPhysicalOperator() == null) {
IPhysicalOperator physOp = op.accept(physOpFactory, topLevelOp);
@@ -213,19 +218,15 @@
throws AlgebricksException {
ensureAllVariables(gby.getGroupByList(), Pair::getSecond);
-
- if (gby.getNestedPlans().size() == 1 &&
gby.getNestedPlans().get(0).getRoots().size() == 1) {
- if (topLevelOp &&
((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) ==
Boolean.TRUE)
- ||
(gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) ==
Boolean.TRUE))) {
- ExternalGroupByPOperator extGby =
createExternalGroupByPOperator(gby);
- if (extGby != null) {
- return extGby;
- } else if (gby.getSourceLocation() != null) {
- IWarningCollector warningCollector =
context.getWarningCollector();
- if (warningCollector.shouldWarn()) {
-
warningCollector.warn(Warning.of(gby.getSourceLocation(),
ErrorCode.INAPPLICABLE_HINT,
- "Group By", "hash"));
- }
+ if (groupByAlgorithm(gby, topLevelOp) ==
GroupByAlgorithm.HASH_GROUP_BY) {
+ ExternalGroupByPOperator extGby =
createExternalGroupByPOperator(gby);
+ if (extGby != null) {
+ return extGby;
+ } else if (gby.getSourceLocation() != null) {
+ IWarningCollector warningCollector =
context.getWarningCollector();
+ if (warningCollector.shouldWarn()) {
+ warningCollector.warn(
+ Warning.of(gby.getSourceLocation(),
ErrorCode.INAPPLICABLE_HINT, "Group By", "hash"));
}
}
}
@@ -237,6 +238,16 @@
}
}
+ protected Enum groupByAlgorithm(GroupByOperator gby, Boolean
topLevelOp) {
+ if (gby.getNestedPlans().size() == 1 &&
gby.getNestedPlans().get(0).getRoots().size() == 1) {
+ if (topLevelOp &&
((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) ==
Boolean.TRUE)
+ ||
(gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) ==
Boolean.TRUE))) {
+ return GroupByAlgorithm.HASH_GROUP_BY;
+ }
+ }
+ return GroupByAlgorithm.SORT_GROUP_BY;
+ }
+
protected ExternalGroupByPOperator
createExternalGroupByPOperator(GroupByOperator gby)
throws AlgebricksException {
boolean hasIntermediateAgg =
generateMergeAggregationExpressions(gby);
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17765
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I3196f664d716bb5b3806ec9a5a0dd5c1ea51ff62
Gerrit-Change-Number: 17765
Gerrit-PatchSet: 1
Gerrit-Owner: Vijay Sarathy <[email protected]>
Gerrit-MessageType: newchange