>From <[email protected]>:

[email protected] has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17967 )


Change subject: [ASTERIXDB-3303][COMP] Projection Sizes continued
......................................................................

[ASTERIXDB-3303][COMP] Projection Sizes continued

Change-Id: Iba8bf1171750994195a3426f22c25a99720f0983
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
M 
asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
M 
asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
5 files changed, 100 insertions(+), 40 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/67/17967/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
index 82e7b32..442b9e9 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/EnumerateJoinsRule.java
@@ -94,6 +94,8 @@
     // The OrderBy operator at root of the query tree (if exists)
     private ILogicalOperator rootOrderByOp;

+    private List<LogicalVariable> resultAndJoinVars = new ArrayList();
+
     public EnumerateJoinsRule(JoinEnum joinEnum) {
         this.joinEnum = joinEnum;
         dataScanAndGroupByDistinctOps = new HashMap<>(); // initialized only 
once at the beginning of the rule
@@ -138,6 +140,17 @@

             // Find the order by op, so we can annotate cost/cards
             findOrderByOp(op);
+
+            // Find the topmost assign, so we can find all the final projected 
variables.
+            ILogicalOperator tmp = op;
+
+            while (tmp.getOperatorTag() != 
LogicalOperatorTag.EMPTYTUPLESOURCE) {
+                if (tmp.getOperatorTag().equals(LogicalOperatorTag.ASSIGN)) {
+                    addAllAssignExprVars(resultAndJoinVars, (AssignOperator) 
tmp);
+                    break;
+                }
+                tmp = tmp.getInputs().get(0).getValue();
+            }
         }

         // if this join has already been seen before, no need to apply the 
rule again
@@ -158,6 +171,7 @@
         printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
         leafInputNumber = 0;
         boolean canTransform = getJoinOpsAndLeafInputs(op);
+        collectJoinConditionsVariables();

         if (!canTransform) {
             return false;
@@ -180,7 +194,7 @@
         }
         joinEnum.initEnum((AbstractLogicalOperator) op, cboMode, cboTestMode, 
numberOfFromTerms, leafInputs, allJoinOps,
                 assignOps, outerJoinsDependencyList, buildSets, 
varLeafInputIds, dataScanAndGroupByDistinctOps,
-                rootGroupByDistinctOp, rootOrderByOp, context);
+                rootGroupByDistinctOp, rootOrderByOp, resultAndJoinVars, 
context);

         if (cboMode) {
             if (!doAllDataSourcesHaveSamples(leafInputs, context)) {
@@ -265,6 +279,24 @@
         return true;
     }

+    private void collectJoinConditionsVariables() {
+        for (JoinOperator jOp : allJoinOps) {
+            AbstractBinaryJoinOperator joinOp = jOp.getAbstractJoinOp();
+            ILogicalExpression expr = joinOp.getCondition().getValue();
+            List<LogicalVariable> vars = new ArrayList<>();
+            expr.getUsedVariables(vars);
+            resultAndJoinVars.addAll(vars); // collect all the variables used 
in join expressions. These will be projected from the base level
+        }
+    }
+
+    private void addAllAssignExprVars(List<LogicalVariable> resultAndJoinVars, 
AssignOperator op) {
+        for (Mutable<ILogicalExpression> exp : op.getExpressions()) {
+            List<LogicalVariable> vars = new ArrayList<>();
+            exp.getValue().getUsedVariables(vars);
+            resultAndJoinVars.addAll(vars);
+        }
+    }
+
     private void pushAssignsAboveJoins(ILogicalOperator op, AssignOperator 
aOp, ILogicalExpression jexpr,
             MutableBoolean removed) {
         System.out.println("op " + op.toString());
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 3a5dda7..846b521 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -138,6 +138,7 @@
     protected String queryPlanShape;
     protected ICost cost;
     protected ICostMethods costMethods;
+    List<LogicalVariable> resultAndJoinVars;

     public JoinEnum() {
     }
@@ -147,8 +148,8 @@
             List<Quadruple<Integer, Integer, JoinOperator, Integer>> 
outerJoinsDependencyList,
             List<Triple<Integer, Integer, Boolean>> buildSets, 
HashMap<LogicalVariable, Integer> varLeafInputIds,
             HashMap<DataSourceScanOperator, ILogicalOperator> 
dataScanAndGroupByDistinctOps,
-            ILogicalOperator grpByDistinctOp, ILogicalOperator orderByOp, 
IOptimizationContext context)
-            throws AsterixException {
+            ILogicalOperator grpByDistinctOp, ILogicalOperator orderByOp, 
List<LogicalVariable> resultAndJoinVars,
+            IOptimizationContext context) throws AsterixException {
         this.singleDatasetPreds = new ArrayList<>();
         this.joinConditions = new ArrayList<>();
         this.joinHints = new HashMap<>();
@@ -170,6 +171,7 @@
         this.dataScanAndGroupByDistinctOps = dataScanAndGroupByDistinctOps;
         this.rootGroupByDistinctOp = grpByDistinctOp;
         this.rootOrderByOp = orderByOp;
+        this.resultAndJoinVars = resultAndJoinVars;
         this.op = op;
         this.forceJoinOrderMode = getForceJoinOrderMode(context);
         this.queryPlanShape = getQueryPlanShape(context);
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 95f2da6..530a482 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -42,6 +42,7 @@
 import org.apache.asterix.optimizer.base.AnalysisUtil;
 import 
org.apache.asterix.optimizer.rules.am.array.AbstractOperatorFromSubplanRewrite;
 import org.apache.asterix.translator.ConstantHelper;
+import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -577,8 +578,12 @@
         // assign [$$68, $$69, $$70, $$71, $$72] <- [serialized-size($$60), 
serialized-size($$str), serialized-size($$61), serialized-size($$65), 
serialized-size($$67)]

         // add the assign [$$56, ..., ] <- [encoded-size($$67), ..., ] on top 
of newAggOp
-        List<LogicalVariable> vars = new ArrayList<>();
-        VariableUtilities.getLiveVariables(logOp, vars);
+        List<LogicalVariable> vars1 = new ArrayList<>();
+        VariableUtilities.getLiveVariables(logOp, vars1);
+        List<LogicalVariable> vars3 =
+                new ArrayList<>(CollectionUtils.subtract(vars1, 
joinEnum.resultAndJoinVars /* vars2 */));
+        List<LogicalVariable> vars = new 
ArrayList<>(CollectionUtils.subtract(vars1, vars3));
+
         LogicalVariable newVar;
         // array to keep track of the assigns
         List<LogicalVariable> newVars = new ArrayList<>();
@@ -659,6 +664,18 @@

     }

+    private List<LogicalVariable> projectedFields(ILogicalOperator op) {
+        List<LogicalVariable> vars = new ArrayList<>();
+        while (op != null && op.getOperatorTag() != 
LogicalOperatorTag.EMPTYTUPLESOURCE) {
+            if (op.getOperatorTag().equals(LogicalOperatorTag.ASSIGN)) {
+                AssignOperator aOp = (AssignOperator) op;
+                vars.add(aOp.getVariables().get(0));
+            }
+            op = op.getInputs().get(0).getValue();
+        }
+        return vars;
+    }
+
     private List<MutableObject> createMutableObjectArray(List<LogicalVariable> 
vars) {
         List<MutableObject> arr = new ArrayList<>();
         for (int i = 0; i < vars.size(); i++) {
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
index 7db992d..aa774b6 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/cardinality-estimation/join-queries/join-queries.8.plan
@@ -46,12 +46,30 @@
                                   -- BTREE_SEARCH  |PARTITIONED|
                                     exchange [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
                                     -- BROADCAST_EXCHANGE  |PARTITIONED|
-                                      project ([$$120, $$128, $$124]) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                      project ([$$124, $$120, $$128]) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                       -- STREAM_PROJECT  |PARTITIONED|
                                         exchange [cardinality: 0.0, op-cost: 
0.0, total-cost: 0.0]
                                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                           join (eq($$123, $$136)) 
[cardinality: 248.35, op-cost: 398.35, total-cost: 2821.71]
-                                          -- HYBRID_HASH_JOIN [$$123][$$136]  
|PARTITIONED|
+                                          -- HYBRID_HASH_JOIN [$$136][$$123]  
|PARTITIONED|
+                                            exchange [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
+                                            -- HASH_PARTITION_EXCHANGE [$$136] 
 |PARTITIONED|
+                                              project ([$$124, $$136]) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                              -- STREAM_PROJECT  |PARTITIONED|
+                                                select (and(lt($$121, 
"1994-01-01"), ge($$121, "1993-01-01"))) [cardinality: 248.35, op-cost: 0.0, 
total-cost: 1500.0]
+                                                -- STREAM_SELECT  |PARTITIONED|
+                                                  project ([$$124, $$136, 
$$121]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                  -- STREAM_PROJECT  
|PARTITIONED|
+                                                    assign [$$136, $$121] <- 
[$$o.getField(1), $$o.getField(4)] [cardinality: 0.0, op-cost: 0.0, total-cost: 
0.0]
+                                                    -- ASSIGN  |PARTITIONED|
+                                                      exchange [cardinality: 
0.0, op-cost: 0.0, total-cost: 0.0]
+                                                      -- ONE_TO_ONE_EXCHANGE  
|PARTITIONED|
+                                                        data-scan []<-[$$124, 
$$o] <- tpch.Orders [cardinality: 1500.0, op-cost: 1500.0, total-cost: 1500.0]
+                                                        -- DATASOURCE_SCAN  
|PARTITIONED|
+                                                          exchange 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                          -- 
ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                                            empty-tuple-source 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+                                                            -- 
EMPTY_TUPLE_SOURCE  |PARTITIONED|
                                             exchange [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
                                             -- HASH_PARTITION_EXCHANGE [$$123] 
 |PARTITIONED|
                                               project ([$$120, $$128, $$123]) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
@@ -88,24 +106,6 @@
                                                               -- 
ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                                 
empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
                                                                 -- 
EMPTY_TUPLE_SOURCE  |PARTITIONED|
-                                            exchange [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
-                                            -- HASH_PARTITION_EXCHANGE [$$136] 
 |PARTITIONED|
-                                              project ([$$124, $$136]) 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                              -- STREAM_PROJECT  |PARTITIONED|
-                                                select (and(lt($$121, 
"1994-01-01"), ge($$121, "1993-01-01"))) [cardinality: 248.35, op-cost: 0.0, 
total-cost: 1500.0]
-                                                -- STREAM_SELECT  |PARTITIONED|
-                                                  project ([$$124, $$136, 
$$121]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                                  -- STREAM_PROJECT  
|PARTITIONED|
-                                                    assign [$$136, $$121] <- 
[$$o.getField(1), $$o.getField(4)] [cardinality: 0.0, op-cost: 0.0, total-cost: 
0.0]
-                                                    -- ASSIGN  |PARTITIONED|
-                                                      exchange [cardinality: 
0.0, op-cost: 0.0, total-cost: 0.0]
-                                                      -- ONE_TO_ONE_EXCHANGE  
|PARTITIONED|
-                                                        data-scan []<-[$$124, 
$$o] <- tpch.Orders [cardinality: 1500.0, op-cost: 1500.0, total-cost: 1500.0]
-                                                        -- DATASOURCE_SCAN  
|PARTITIONED|
-                                                          exchange 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                                          -- 
ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                                            empty-tuple-source 
[cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
-                                                            -- 
EMPTY_TUPLE_SOURCE  |PARTITIONED|
                         exchange [cardinality: 10.0, op-cost: 40.0, 
total-cost: 50.0]
                         -- BROADCAST_EXCHANGE  |PARTITIONED|
                           project ([$$130, $$127]) [cardinality: 10.0, 
op-cost: 0.0, total-cost: 10.0]
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
index d1f713d..640f9b7 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/field-access-pushdown/field-access-pushdown.008.plan
@@ -19,21 +19,7 @@
                   exchange [cardinality: 8.0, op-cost: 0.0, total-cost: 45.0]
                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                     join (eq($$33, $$34)) [cardinality: 8.0, op-cost: 15.0, 
total-cost: 45.0]
-                    -- HYBRID_HASH_JOIN [$$34][$$33]  |PARTITIONED|
-                      exchange [cardinality: 8.0, op-cost: 8.0, total-cost: 
16.0]
-                      -- HASH_PARTITION_EXCHANGE [$$34]  |PARTITIONED|
-                        project ([$$39, $$34]) [cardinality: 8.0, op-cost: 
0.0, total-cost: 8.0]
-                        -- STREAM_PROJECT  |PARTITIONED|
-                          assign [$$39] <- [$$p2.getField("name")] 
[cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
-                          -- ASSIGN  |PARTITIONED|
-                            exchange [cardinality: 8.0, op-cost: 8.0, 
total-cost: 16.0]
-                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                              data-scan []<-[$$34, $$p2] <- 
test.ColumnDataset3 project ({name:any}) [cardinality: 8.0, op-cost: 8.0, 
total-cost: 8.0]
-                              -- DATASOURCE_SCAN  |PARTITIONED|
-                                exchange [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
-                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                                  empty-tuple-source [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
-                                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                    -- HYBRID_HASH_JOIN [$$33][$$34]  |PARTITIONED|
                       exchange [cardinality: 7.0, op-cost: 7.0, total-cost: 
14.0]
                       -- HASH_PARTITION_EXCHANGE [$$33]  |PARTITIONED|
                         project ([$$38, $$33]) [cardinality: 7.0, op-cost: 
0.0, total-cost: 7.0]
@@ -48,3 +34,17 @@
                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                   empty-tuple-source [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
                                   -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                      exchange [cardinality: 8.0, op-cost: 8.0, total-cost: 
16.0]
+                      -- HASH_PARTITION_EXCHANGE [$$34]  |PARTITIONED|
+                        project ([$$39, $$34]) [cardinality: 8.0, op-cost: 
0.0, total-cost: 8.0]
+                        -- STREAM_PROJECT  |PARTITIONED|
+                          assign [$$39] <- [$$p2.getField("name")] 
[cardinality: 8.0, op-cost: 0.0, total-cost: 8.0]
+                          -- ASSIGN  |PARTITIONED|
+                            exchange [cardinality: 8.0, op-cost: 8.0, 
total-cost: 16.0]
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              data-scan []<-[$$34, $$p2] <- 
test.ColumnDataset3 project ({name:any}) [cardinality: 8.0, op-cost: 8.0, 
total-cost: 8.0]
+                              -- DATASOURCE_SCAN  |PARTITIONED|
+                                exchange [cardinality: 0.0, op-cost: 0.0, 
total-cost: 0.0]
+                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                  empty-tuple-source [cardinality: 0.0, 
op-cost: 0.0, total-cost: 0.0]
+                                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17967
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Iba8bf1171750994195a3426f22c25a99720f0983
Gerrit-Change-Number: 17967
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange

Reply via email to