>From <[email protected]>:
[email protected] has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17967 )
Change subject: [ASTERIXDB-3303][COMP] Projection Sizes continued
......................................................................
[ASTERIXDB-3303][COMP] Projection Sizes continued
Change-Id: Iba8bf1171750994195a3426f22c25a99720f0983
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
M
asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
M
asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
5 files changed, 100 insertions(+), 40 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/67/17967/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
index 82e7b32..442b9e9 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
@@ -94,6 +94,8 @@
// The OrderBy operator at root of the query tree (if exists)
private ILogicalOperator rootOrderByOp;
+ private List<LogicalVariable> resultAndJoinVars = new ArrayList();
+
public EnumerateJoinsRule(JoinEnum joinEnum) {
this.joinEnum = joinEnum;
dataScanAndGroupByDistinctOps = new HashMap<>(); // initialized only
once at the beginning of the rule
@@ -138,6 +140,17 @@
// Find the order by op, so we can annotate cost/cards
findOrderByOp(op);
+
+ // Find the topmost assign, so we can find all the final projected
variables.
+ ILogicalOperator tmp = op;
+
+ while (tmp.getOperatorTag() !=
LogicalOperatorTag.EMPTYTUPLESOURCE) {
+ if (tmp.getOperatorTag().equals(LogicalOperatorTag.ASSIGN)) {
+ addAllAssignExprVars(resultAndJoinVars, (AssignOperator)
tmp);
+ break;
+ }
+ tmp = tmp.getInputs().get(0).getValue();
+ }
}
// if this join has already been seen before, no need to apply the
rule again
@@ -158,6 +171,7 @@
printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
leafInputNumber = 0;
boolean canTransform = getJoinOpsAndLeafInputs(op);
+ collectJoinConditionsVariables();
if (!canTransform) {
return false;
@@ -180,7 +194,7 @@
}
joinEnum.initEnum((AbstractLogicalOperator) op, cboMode, cboTestMode,
numberOfFromTerms, leafInputs, allJoinOps,
assignOps, outerJoinsDependencyList, buildSets,
varLeafInputIds, dataScanAndGroupByDistinctOps,
- rootGroupByDistinctOp, rootOrderByOp, context);
+ rootGroupByDistinctOp, rootOrderByOp, resultAndJoinVars,
context);
if (cboMode) {
if (!doAllDataSourcesHaveSamples(leafInputs, context)) {
@@ -265,6 +279,24 @@
return true;
}
+ private void collectJoinConditionsVariables() {
+ for (JoinOperator jOp : allJoinOps) {
+ AbstractBinaryJoinOperator joinOp = jOp.getAbstractJoinOp();
+ ILogicalExpression expr = joinOp.getCondition().getValue();
+ List<LogicalVariable> vars = new ArrayList<>();
+ expr.getUsedVariables(vars);
+ resultAndJoinVars.addAll(vars); // collect all the variables used
in join expressions. These will be projected from the base level
+ }
+ }
+
+ private void addAllAssignExprVars(List<LogicalVariable> resultAndJoinVars,
AssignOperator op) {
+ for (Mutable<ILogicalExpression> exp : op.getExpressions()) {
+ List<LogicalVariable> vars = new ArrayList<>();
+ exp.getValue().getUsedVariables(vars);
+ resultAndJoinVars.addAll(vars);
+ }
+ }
+
private void pushAssignsAboveJoins(ILogicalOperator op, AssignOperator
aOp, ILogicalExpression jexpr,
MutableBoolean removed) {
System.out.println("op " + op.toString());
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 3a5dda7..846b521 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -138,6 +138,7 @@
protected String queryPlanShape;
protected ICost cost;
protected ICostMethods costMethods;
+ List<LogicalVariable> resultAndJoinVars;
public JoinEnum() {
}
@@ -147,8 +148,8 @@
List<Quadruple<Integer, Integer, JoinOperator, Integer>>
outerJoinsDependencyList,
List<Triple<Integer, Integer, Boolean>> buildSets,
HashMap<LogicalVariable, Integer> varLeafInputIds,
HashMap<DataSourceScanOperator, ILogicalOperator>
dataScanAndGroupByDistinctOps,
- ILogicalOperator grpByDistinctOp, ILogicalOperator orderByOp,
IOptimizationContext context)
- throws AsterixException {
+ ILogicalOperator grpByDistinctOp, ILogicalOperator orderByOp,
List<LogicalVariable> resultAndJoinVars,
+ IOptimizationContext context) throws AsterixException {
this.singleDatasetPreds = new ArrayList<>();
this.joinConditions = new ArrayList<>();
this.joinHints = new HashMap<>();
@@ -170,6 +171,7 @@
this.dataScanAndGroupByDistinctOps = dataScanAndGroupByDistinctOps;
this.rootGroupByDistinctOp = grpByDistinctOp;
this.rootOrderByOp = orderByOp;
+ this.resultAndJoinVars = resultAndJoinVars;
this.op = op;
this.forceJoinOrderMode = getForceJoinOrderMode(context);
this.queryPlanShape = getQueryPlanShape(context);
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 95f2da6..530a482 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -42,6 +42,7 @@
import org.apache.asterix.optimizer.base.AnalysisUtil;
import
org.apache.asterix.optimizer.rules.am.array.AbstractOperatorFromSubplanRewrite;
import org.apache.asterix.translator.ConstantHelper;
+import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -577,8 +578,12 @@
// assign [$$68, $$69, $$70, $$71, $$72] <- [serialized-size($$60),
serialized-size($$str), serialized-size($$61), serialized-size($$65),
serialized-size($$67)]
// add the assign [$$56, ..., ] <- [encoded-size($$67), ..., ] on top
of newAggOp
- List<LogicalVariable> vars = new ArrayList<>();
- VariableUtilities.getLiveVariables(logOp, vars);
+ List<LogicalVariable> vars1 = new ArrayList<>();
+ VariableUtilities.getLiveVariables(logOp, vars1);
+ List<LogicalVariable> vars3 =
+ new ArrayList<>(CollectionUtils.subtract(vars1,
joinEnum.resultAndJoinVars /* vars2 */));
+ List<LogicalVariable> vars = new
ArrayList<>(CollectionUtils.subtract(vars1, vars3));
+
LogicalVariable newVar;
// array to keep track of the assigns
List<LogicalVariable> newVars = new ArrayList<>();
@@ -659,6 +664,18 @@
}
+ private List<LogicalVariable> projectedFields(ILogicalOperator op) {
+ List<LogicalVariable> vars = new ArrayList<>();
+ while (op != null && op.getOperatorTag() !=
LogicalOperatorTag.EMPTYTUPLESOURCE) {
+ if (op.getOperatorTag().equals(LogicalOperatorTag.ASSIGN)) {
+ AssignOperator aOp = (AssignOperator) op;
+ vars.add(aOp.getVariables().get(0));
+ }
+ op = op.getInputs().get(0).getValue();
+ }
+ return vars;
+ }
+
private List<MutableObject> createMutableObjectArray(List<LogicalVariable>
vars) {
List<MutableObject> arr = new ArrayList<>();
for (int i = 0; i < vars.size(); i++) {
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
index 7db992d..aa774b6 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
@@ -46,12 +46,30 @@
-- BTREE_SEARCH |PARTITIONED|
exchange [cardinality: 0.0, op-cost: 0.0,
total-cost: 0.0]
-- BROADCAST_EXCHANGE |PARTITIONED|
- project ([$$120, $$128, $$124])
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ project ([$$124, $$120, $$128])
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-- STREAM_PROJECT |PARTITIONED|
exchange [cardinality: 0.0, op-cost:
0.0, total-cost: 0.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (eq($$123, $$136))
[cardinality: 248.35, op-cost: 398.35, total-cost: 2821.71]
- -- HYBRID_HASH_JOIN [$$123][$$136]
|PARTITIONED|
+ -- HYBRID_HASH_JOIN [$$136][$$123]
|PARTITIONED|
+ exchange [cardinality: 0.0,
op-cost: 0.0, total-cost: 0.0]
+ -- HASH_PARTITION_EXCHANGE [$$136]
|PARTITIONED|
+ project ([$$124, $$136])
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ select (and(lt($$121,
"1994-01-01"), ge($$121, "1993-01-01"))) [cardinality: 248.35, op-cost: 0.0,
total-cost: 1500.0]
+ -- STREAM_SELECT |PARTITIONED|
+ project ([$$124, $$136,
$$121]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- STREAM_PROJECT
|PARTITIONED|
+ assign [$$136, $$121] <-
[$$o.getField(1), $$o.getField(4)] [cardinality: 0.0, op-cost: 0.0, total-cost:
0.0]
+ -- ASSIGN |PARTITIONED|
+ exchange [cardinality:
0.0, op-cost: 0.0, total-cost: 0.0]
+ -- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
+ data-scan []<-[$$124,
$$o] <- tpch.Orders [cardinality: 1500.0, op-cost: 1500.0, total-cost: 1500.0]
+ -- DATASOURCE_SCAN
|PARTITIONED|
+ exchange
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ --
ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ --
EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange [cardinality: 0.0,
op-cost: 0.0, total-cost: 0.0]
-- HASH_PARTITION_EXCHANGE [$$123]
|PARTITIONED|
project ([$$120, $$128, $$123])
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
@@ -88,24 +106,6 @@
--
ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
--
EMPTY_TUPLE_SOURCE |PARTITIONED|
- exchange [cardinality: 0.0,
op-cost: 0.0, total-cost: 0.0]
- -- HASH_PARTITION_EXCHANGE [$$136]
|PARTITIONED|
- project ([$$124, $$136])
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
- -- STREAM_PROJECT |PARTITIONED|
- select (and(lt($$121,
"1994-01-01"), ge($$121, "1993-01-01"))) [cardinality: 248.35, op-cost: 0.0,
total-cost: 1500.0]
- -- STREAM_SELECT |PARTITIONED|
- project ([$$124, $$136,
$$121]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
- -- STREAM_PROJECT
|PARTITIONED|
- assign [$$136, $$121] <-
[$$o.getField(1), $$o.getField(4)] [cardinality: 0.0, op-cost: 0.0, total-cost:
0.0]
- -- ASSIGN |PARTITIONED|
- exchange [cardinality:
0.0, op-cost: 0.0, total-cost: 0.0]
- -- ONE_TO_ONE_EXCHANGE
|PARTITIONED|
- data-scan []<-[$$124,
$$o] <- tpch.Orders [cardinality: 1500.0, op-cost: 1500.0, total-cost: 1500.0]
- -- DATASOURCE_SCAN
|PARTITIONED|
- exchange
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
- --
ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
- --
EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange [cardinality: 10.0, op-cost: 40.0,
total-cost: 50.0]
-- BROADCAST_EXCHANGE |PARTITIONED|
project ([$$130, $$127]) [cardinality: 10.0,
op-cost: 0.0, total-cost: 10.0]
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
index d1f713d..640f9b7 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
@@ -19,21 +19,7 @@
exchange [cardinality: 8.0, op-cost: 0.0, total-cost: 45.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (eq($$33, $$34)) [cardinality: 8.0, op-cost: 15.0,
total-cost: 45.0]
- -- HYBRID_HASH_JOIN [$$34][$$33] |PARTITIONED|
- exchange [cardinality: 8.0, op-cost: 8.0, total-cost:
16.0]
- -- HASH_PARTITION_EXCHANGE [$$34] |PARTITIONED|
- project ([$$39, $$34]) [cardinality: 8.0, op-cost:
0.0, total-cost: 8.0]
- -- STREAM_PROJECT |PARTITIONED|
- assign [$$39] <- [$$p2.getField("name")]
[cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
- -- ASSIGN |PARTITIONED|
- exchange [cardinality: 8.0, op-cost: 8.0,
total-cost: 16.0]
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan []<-[$$34, $$p2] <-
test.ColumnDataset3 project ({name:any}) [cardinality: 8.0, op-cost: 8.0,
total-cost: 8.0]
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange [cardinality: 0.0, op-cost: 0.0,
total-cost: 0.0]
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source [cardinality: 0.0,
op-cost: 0.0, total-cost: 0.0]
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- HYBRID_HASH_JOIN [$$33][$$34] |PARTITIONED|
exchange [cardinality: 7.0, op-cost: 7.0, total-cost:
14.0]
-- HASH_PARTITION_EXCHANGE [$$33] |PARTITIONED|
project ([$$38, $$33]) [cardinality: 7.0, op-cost:
0.0, total-cost: 7.0]
@@ -48,3 +34,17 @@
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source [cardinality: 0.0,
op-cost: 0.0, total-cost: 0.0]
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange [cardinality: 8.0, op-cost: 8.0, total-cost:
16.0]
+ -- HASH_PARTITION_EXCHANGE [$$34] |PARTITIONED|
+ project ([$$39, $$34]) [cardinality: 8.0, op-cost:
0.0, total-cost: 8.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$39] <- [$$p2.getField("name")]
[cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
+ -- ASSIGN |PARTITIONED|
+ exchange [cardinality: 8.0, op-cost: 8.0,
total-cost: 16.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$34, $$p2] <-
test.ColumnDataset3 project ({name:any}) [cardinality: 8.0, op-cost: 8.0,
total-cost: 8.0]
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange [cardinality: 0.0, op-cost: 0.0,
total-cost: 0.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source [cardinality: 0.0,
op-cost: 0.0, total-cost: 0.0]
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17967
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Iba8bf1171750994195a3426f22c25a99720f0983
Gerrit-Change-Number: 17967
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange