>From Vijay Sarathy <[email protected]>:
Vijay Sarathy has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18365 )
Change subject: [ASTERIXDB-3427][COMP] CBO not picking array index`
......................................................................
[ASTERIXDB-3427][COMP] CBO not picking array index`
Change-Id: Ic47a22556a6fb9468e46244062d281bea7eb5fa1
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
2 files changed, 57 insertions(+), 27 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/65/18365/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
index e868fa3..61f4d22 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
@@ -92,6 +92,7 @@
protected double origCardinality; // without any selections
protected double cardinality;
protected double size; // avg size of whole document; available from the
sample
+ protected double unnestFactor;
protected double diskProjectionSize; // what is coming out of the disk; in
case of row format, it is the entire document
// in case of columnar we need to add
sizes of individual fields.
protected double projectionSizeAfterScan; // excludes fields only used for
selections
@@ -168,6 +169,10 @@
return size;
}
+ public double getUnnestFactor() {
+ return unnestFactor;
+ }
+
public void setLimitVal(int val) {
limitVal = val;
}
@@ -247,33 +252,23 @@
public void setCardsAndSizes(Index.SampleIndexDetails idxDetails,
ILogicalOperator leafInput)
throws AlgebricksException {
- double origDatasetCard, finalDatasetCard;
- finalDatasetCard = origDatasetCard = idxDetails.getSourceCardinality();
+ double origDatasetCard, finalDatasetCard, sampleCard;
+ unnestFactor = 1.0;
DataSourceScanOperator scanOp =
joinEnum.findDataSourceScanOperator(leafInput);
if (scanOp == null) {
return; // what happens to the cards and sizes then? this may
happen in case of in lists
}
- double sampleCard = Math.min(idxDetails.getSampleCardinalityTarget(),
origDatasetCard);
- if (sampleCard == 0) { // should not happen unless the original
dataset is empty
- sampleCard = 1; // we may have to make some adjustments to costs
when the sample returns very rows.
-
- IWarningCollector warningCollector =
joinEnum.optCtx.getWarningCollector();
- if (warningCollector.shouldWarn()) {
- warningCollector.warn(Warning.of(scanOp.getSourceLocation(),
-
org.apache.asterix.common.exceptions.ErrorCode.SAMPLE_HAS_ZERO_ROWS));
- }
- }
-
List<List<IAObject>> result;
- SelectOperator selop = (SelectOperator)
joinEnum.findASelectOp(leafInput);
- if (selop == null) { // add a SelectOperator with TRUE condition. The
code below becomes simpler with a select operator.
- selop = new SelectOperator(new
MutableObject<>(ConstantExpression.TRUE));
- ILogicalOperator op = selop;
+ SelectOperator selOp = (SelectOperator)
joinEnum.findASelectOp(leafInput);
+ if (selOp == null) { // add a SelectOperator with TRUE condition. The
code below becomes simpler with a select operator.
+ selOp = new SelectOperator(new
MutableObject<>(ConstantExpression.TRUE));
+ ILogicalOperator op = selOp;
op.getInputs().add(new MutableObject<>(leafInput));
leafInput = op;
}
+
ILogicalOperator parent =
joinEnum.findDataSourceScanOperatorParent(leafInput);
Mutable<ILogicalOperator> ref = new MutableObject<>(leafInput);
@@ -303,6 +298,29 @@
// if there is only one conjunct, I do not have to call the sampling
query during index selection!
// insert this in place of the scandatasourceOp.
parent.getInputs().get(0).setValue(deepCopyofScan);
+
+ boolean unnest = joinEnum.findUnnestOp(selOp);
+ if (unnest) {
+ ILogicalExpression saveExpr = selOp.getCondition().getValue();
+ sampleCard =
joinEnum.stats.computeUnnestedOriginalCardinality(selOp);
+ selOp.getCondition().setValue(saveExpr); // restore the expression
+ unnestFactor = sampleCard /
idxDetails.getSampleCardinalityTarget();
+ finalDatasetCard = origDatasetCard =
idxDetails.getSourceCardinality() * unnestFactor;
+ } else {
+ finalDatasetCard = origDatasetCard =
idxDetails.getSourceCardinality();
+ sampleCard = Math.min(idxDetails.getSampleCardinalityTarget(),
origDatasetCard);
+ }
+
+ if (sampleCard == 0) { // should not happen unless the original
dataset is empty
+ sampleCard = 1; // we may have to make some adjustments to costs
when the sample returns very rows.
+
+ IWarningCollector warningCollector =
joinEnum.optCtx.getWarningCollector();
+ if (warningCollector.shouldWarn()) {
+ warningCollector.warn(Warning.of(scanOp.getSourceLocation(),
+
org.apache.asterix.common.exceptions.ErrorCode.SAMPLE_HAS_ZERO_ROWS));
+ }
+ }
+
// There are predicates here. So skip the predicates and get the
original dataset card.
// Now apply all the predicates and get the card after all predicates
are applied.
result =
joinEnum.getStatsHandle().runSamplingQueryProjection(joinEnum.optCtx,
leafInput, jnArrayIndex,
@@ -316,8 +334,8 @@
sizeVarsFromDisk =
joinEnum.getStatsHandle().findSizeVarsFromDisk(result, getNumVarsFromDisk());
sizeVarsAfterScan =
joinEnum.getStatsHandle().findSizeVarsAfterScan(result, getNumVarsFromDisk());
} else { // in case we did not get any tuples from the sample, get the
size by setting the predicate to true.
- ILogicalExpression saveExpr = selop.getCondition().getValue();
- selop.getCondition().setValue(ConstantExpression.TRUE);
+ ILogicalExpression saveExpr = selOp.getCondition().getValue();
+ selOp.getCondition().setValue(ConstantExpression.TRUE);
result =
joinEnum.getStatsHandle().runSamplingQueryProjection(joinEnum.optCtx,
leafInput, jnArrayIndex,
primaryKey);
double x =
joinEnum.getStatsHandle().findPredicateCardinality(result, true);
@@ -329,7 +347,7 @@
sizeVarsFromDisk =
joinEnum.getStatsHandle().findSizeVarsFromDisk(result, getNumVarsFromDisk());
sizeVarsAfterScan =
joinEnum.getStatsHandle().findSizeVarsAfterScan(result, getNumVarsFromDisk());
}
- selop.getCondition().setValue(saveExpr); // restore the expression
+ selOp.getCondition().setValue(saveExpr); // restore the expression
}
// Adjust for zero predicate cardinality from the sample.
@@ -350,6 +368,7 @@
// is small), no need to assign any artificial min. cardinality as
the sample is accurate.
setCardinality(finalDatasetCard, scaleUp);
}
+ setOrigCardinality(origDatasetCard, false);
setSizeVarsFromDisk(sizeVarsFromDisk);
setSizeVarsAfterScan(sizeVarsAfterScan);
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 4a57a00..aa2f40f 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -512,13 +512,7 @@
// SELECT count(*) as revenue
// FROM orders o, o.o_orderline ol
// WHERE TRUE;
-
- // Replace ALL SELECTS with TRUE
- List<ILogicalExpression> selExprs;
- selExprs = storeSelectConditionsAndMakeThemTrue(selOp, null); //
all these will be marked true and will be resorted later.
- result = runSamplingQuery(optCtx, selOp);
- restoreAllSelectConditions(selOp, selExprs, null);
- sampleCard = findPredicateCardinality(result, false);
+ sampleCard = computeUnnestedOriginalCardinality(selOp);
}
// switch the scanOp back
parent.getInputs().get(0).setValue(scanOp);
@@ -543,6 +537,14 @@
return record.numberOfFields();
}
+ public double computeUnnestedOriginalCardinality(SelectOperator selOp)
throws AlgebricksException {
+ // Replace ALL SELECTS with TRUE, restore them after running the
sampling query.
+ List<ILogicalExpression> selExprs =
storeSelectConditionsAndMakeThemTrue(selOp, null);
+ List<List<IAObject>> result = runSamplingQuery(optCtx, selOp);
+ restoreAllSelectConditions(selOp, selExprs, null);
+ return findPredicateCardinality(result, false);
+ }
+
public double findSizeVarsFromDisk(List<List<IAObject>> result, int
numDiskVars) {
ARecord record = (ARecord) (((IAObject) ((List<IAObject>)
(result.get(0))).get(0)));
// Now figure out the projected size
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18365
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Ic47a22556a6fb9468e46244062d281bea7eb5fa1
Gerrit-Change-Number: 18365
Gerrit-PatchSet: 1
Gerrit-Owner: Vijay Sarathy <[email protected]>
Gerrit-MessageType: newchange