>From Vijay Sarathy <[email protected]>:

Vijay Sarathy has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18365 )


Change subject: [ASTERIXDB-3427][COMP] CBO not picking array index`
......................................................................

[ASTERIXDB-3427][COMP] CBO not picking array index`

Change-Id: Ic47a22556a6fb9468e46244062d281bea7eb5fa1
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
2 files changed, 57 insertions(+), 27 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/65/18365/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
index e868fa3..61f4d22 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
@@ -92,6 +92,7 @@
     protected double origCardinality; // without any selections
     protected double cardinality;
     protected double size; // avg size of whole document; available from the 
sample
+    protected double unnestFactor;
     protected double diskProjectionSize; // what is coming out of the disk; in 
case of row format, it is the entire document
                                          // in case of columnar we need to add 
sizes of individual fields.
     protected double projectionSizeAfterScan; // excludes fields only used for 
selections
@@ -168,6 +169,10 @@
         return size;
     }

+    public double getUnnestFactor() {
+        return unnestFactor;
+    }
+
     public void setLimitVal(int val) {
         limitVal = val;
     }
@@ -247,33 +252,23 @@
     public void setCardsAndSizes(Index.SampleIndexDetails idxDetails, 
ILogicalOperator leafInput)
             throws AlgebricksException {

-        double origDatasetCard, finalDatasetCard;
-        finalDatasetCard = origDatasetCard = idxDetails.getSourceCardinality();
+        double origDatasetCard, finalDatasetCard, sampleCard;
+        unnestFactor = 1.0;

         DataSourceScanOperator scanOp = 
joinEnum.findDataSourceScanOperator(leafInput);
         if (scanOp == null) {
             return; // what happens to the cards and sizes then? this may 
happen in case of in lists
         }

-        double sampleCard = Math.min(idxDetails.getSampleCardinalityTarget(), 
origDatasetCard);
-        if (sampleCard == 0) { // should not happen unless the original 
dataset is empty
-            sampleCard = 1; // we may have to make some adjustments to costs 
when the sample returns very rows.
-
-            IWarningCollector warningCollector = 
joinEnum.optCtx.getWarningCollector();
-            if (warningCollector.shouldWarn()) {
-                warningCollector.warn(Warning.of(scanOp.getSourceLocation(),
-                        
org.apache.asterix.common.exceptions.ErrorCode.SAMPLE_HAS_ZERO_ROWS));
-            }
-        }
-
         List<List<IAObject>> result;
-        SelectOperator selop = (SelectOperator) 
joinEnum.findASelectOp(leafInput);
-        if (selop == null) { // add a SelectOperator with TRUE condition. The 
code below becomes simpler with a select operator.
-            selop = new SelectOperator(new 
MutableObject<>(ConstantExpression.TRUE));
-            ILogicalOperator op = selop;
+        SelectOperator selOp = (SelectOperator) 
joinEnum.findASelectOp(leafInput);
+        if (selOp == null) { // add a SelectOperator with TRUE condition. The 
code below becomes simpler with a select operator.
+            selOp = new SelectOperator(new 
MutableObject<>(ConstantExpression.TRUE));
+            ILogicalOperator op = selOp;
             op.getInputs().add(new MutableObject<>(leafInput));
             leafInput = op;
         }
+
         ILogicalOperator parent = 
joinEnum.findDataSourceScanOperatorParent(leafInput);
         Mutable<ILogicalOperator> ref = new MutableObject<>(leafInput);

@@ -303,6 +298,29 @@
         // if there is only one conjunct, I do not have to call the sampling 
query during index selection!
         // insert this in place of the scandatasourceOp.
         parent.getInputs().get(0).setValue(deepCopyofScan);
+
+        boolean unnest = joinEnum.findUnnestOp(selOp);
+        if (unnest) {
+            ILogicalExpression saveExpr = selOp.getCondition().getValue();
+            sampleCard = 
joinEnum.stats.computeUnnestedOriginalCardinality(selOp);
+            selOp.getCondition().setValue(saveExpr); // restore the expression
+            unnestFactor = sampleCard / 
idxDetails.getSampleCardinalityTarget();
+            finalDatasetCard = origDatasetCard = 
idxDetails.getSourceCardinality() * unnestFactor;
+        } else {
+            finalDatasetCard = origDatasetCard = 
idxDetails.getSourceCardinality();
+            sampleCard = Math.min(idxDetails.getSampleCardinalityTarget(), 
origDatasetCard);
+        }
+
+        if (sampleCard == 0) { // should not happen unless the original 
dataset is empty
+            sampleCard = 1; // we may have to make some adjustments to costs 
when the sample returns very rows.
+
+            IWarningCollector warningCollector = 
joinEnum.optCtx.getWarningCollector();
+            if (warningCollector.shouldWarn()) {
+                warningCollector.warn(Warning.of(scanOp.getSourceLocation(),
+                        
org.apache.asterix.common.exceptions.ErrorCode.SAMPLE_HAS_ZERO_ROWS));
+            }
+        }
+
         // There are predicates here. So skip the predicates and get the 
original dataset card.
         // Now apply all the predicates and get the card after all predicates 
are applied.
         result = 
joinEnum.getStatsHandle().runSamplingQueryProjection(joinEnum.optCtx, 
leafInput, jnArrayIndex,
@@ -316,8 +334,8 @@
             sizeVarsFromDisk = 
joinEnum.getStatsHandle().findSizeVarsFromDisk(result, getNumVarsFromDisk());
             sizeVarsAfterScan = 
joinEnum.getStatsHandle().findSizeVarsAfterScan(result, getNumVarsFromDisk());
         } else { // in case we did not get any tuples from the sample, get the 
size by setting the predicate to true.
-            ILogicalExpression saveExpr = selop.getCondition().getValue();
-            selop.getCondition().setValue(ConstantExpression.TRUE);
+            ILogicalExpression saveExpr = selOp.getCondition().getValue();
+            selOp.getCondition().setValue(ConstantExpression.TRUE);
             result = 
joinEnum.getStatsHandle().runSamplingQueryProjection(joinEnum.optCtx, 
leafInput, jnArrayIndex,
                     primaryKey);
             double x = 
joinEnum.getStatsHandle().findPredicateCardinality(result, true);
@@ -329,7 +347,7 @@
                 sizeVarsFromDisk = 
joinEnum.getStatsHandle().findSizeVarsFromDisk(result, getNumVarsFromDisk());
                 sizeVarsAfterScan = 
joinEnum.getStatsHandle().findSizeVarsAfterScan(result, getNumVarsFromDisk());
             }
-            selop.getCondition().setValue(saveExpr); // restore the expression
+            selOp.getCondition().setValue(saveExpr); // restore the expression
         }

         // Adjust for zero predicate cardinality from the sample.
@@ -350,6 +368,7 @@
             // is small), no need to assign any artificial min. cardinality as 
the sample is accurate.
             setCardinality(finalDatasetCard, scaleUp);
         }
+        setOrigCardinality(origDatasetCard, false);

         setSizeVarsFromDisk(sizeVarsFromDisk);
         setSizeVarsAfterScan(sizeVarsAfterScan);
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 4a57a00..aa2f40f 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -512,13 +512,7 @@
             // SELECT count(*) as revenue
             // FROM   orders o, o.o_orderline ol
             // WHERE  TRUE;
-
-            // Replace ALL SELECTS with TRUE
-            List<ILogicalExpression> selExprs;
-            selExprs = storeSelectConditionsAndMakeThemTrue(selOp, null); // 
all these will be marked true and will be resorted later.
-            result = runSamplingQuery(optCtx, selOp);
-            restoreAllSelectConditions(selOp, selExprs, null);
-            sampleCard = findPredicateCardinality(result, false);
+            sampleCard = computeUnnestedOriginalCardinality(selOp);
         }
         // switch  the scanOp back
         parent.getInputs().get(0).setValue(scanOp);
@@ -543,6 +537,14 @@
         return record.numberOfFields();
     }

+    public double computeUnnestedOriginalCardinality(SelectOperator selOp) 
throws AlgebricksException {
+        // Replace ALL SELECTS with TRUE, restore them after running the 
sampling query.
+        List<ILogicalExpression> selExprs = 
storeSelectConditionsAndMakeThemTrue(selOp, null);
+        List<List<IAObject>> result = runSamplingQuery(optCtx, selOp);
+        restoreAllSelectConditions(selOp, selExprs, null);
+        return findPredicateCardinality(result, false);
+    }
+
     public double findSizeVarsFromDisk(List<List<IAObject>> result, int 
numDiskVars) {
         ARecord record = (ARecord) (((IAObject) ((List<IAObject>) 
(result.get(0))).get(0)));
         // Now figure out the projected size

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18365
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Ic47a22556a6fb9468e46244062d281bea7eb5fa1
Gerrit-Change-Number: 18365
Gerrit-PatchSet: 1
Gerrit-Owner: Vijay Sarathy <[email protected]>
Gerrit-MessageType: newchange

Reply via email to