This is an automated email from the ASF dual-hosted git repository.
alsuliman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 1a7282afbe [ASTERIXDB-3555][COMP] Use Join Samples to get Join
Selectivity
1a7282afbe is described below
commit 1a7282afbe617ab4544e42fc5fc3da9a060caff3
Author: murali4104 <[email protected]>
AuthorDate: Sun Mar 9 20:11:41 2025 -0700
[ASTERIXDB-3555][COMP] Use Join Samples to get Join Selectivity
Ext-ref: MB-65101
Change-Id: Iacf111e2d6ed5307b747045a57107eccb872d3f0
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19496
Reviewed-by: <[email protected]>
Reviewed-by: Peeyush Gupta <[email protected]>
Tested-by: Jenkins <[email protected]>
---
.../org/apache/asterix/optimizer/rules/cbo/Stats.java | 10 +++++++---
.../pushdown/other-pushdowns/other-pushdowns.015.plan | 18 +++++++++---------
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 52566efc9b..e1aab846bd 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -187,17 +187,21 @@ public class Stats {
return 0.5; // this may not be accurate obviously!
} // we can do all relops here and other joins such as interval
joins and spatial joins, the compile time might increase a lot
+ //If one of the tables is smaller than the target sample size, we
can join the samples directly
+ // to get a good estimate of the join selectivity.
Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails)
index1.getIndexDetails();
Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails)
index2.getIndexDetails();
if ((idxDetails1.getSourceCardinality() <
idxDetails1.getSampleCardinalityTarget())
|| (idxDetails2.getSourceCardinality() <
idxDetails2.getSampleCardinalityTarget())) {
double sel =
findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
joinEnum.leafInputs.get(idx2 - 1), index1, index2,
joinExpr, jOp);
- if (sel > 0.0) { // if sel is 0.0 we call naiveJoinSelectivity
- return sel;
+
+ if (sel == 0.0) {
+ sel = 1.0 / Math.max(card1, card2); // R.uniq = S.uniq is
nicely modelled here. Good heuristic Best we can do so far.
}
+ return sel;
}
- // Now we can handle only equi joins. We make all the uniform and
independence assumptions here.
+ // Now we can handle only equi joins. We make all the uniform and
independence assumptions here. Works well for Pk-FK joins.
double sel = naiveJoinSelectivity(exprUsedVars, card1, card2,
idx1, idx2);
return sel;
}
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
index 8aae0e397d..29e8edcc12 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results_cbo/column/pushdown/other-pushdowns/other-pushdowns.015.plan
@@ -1,20 +1,20 @@
-distribute result [$$52] [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0,
total-cost: 12.0]
+distribute result [$$52] [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0,
total-cost: 12.0]
-- DISTRIBUTE_RESULT |UNPARTITIONED|
- exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
- assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 4.0,
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ assign [$$52] <- [{"$1": $$57}] project: [$$52] [cardinality: 2.1,
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- ASSIGN |UNPARTITIONED|
- aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 4.0, doc-size:
2.0, op-cost: 0.0, total-cost: 12.0]
+ aggregate [$$57] <- [agg-sql-sum($$60)] [cardinality: 2.1, doc-size:
2.0, op-cost: 0.0, total-cost: 12.0]
-- AGGREGATE |UNPARTITIONED|
- exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0, total-cost:
12.0]
+ exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0, total-cost:
12.0]
-- RANDOM_MERGE_EXCHANGE |PARTITIONED|
- aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 4.0,
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
+ aggregate [$$60] <- [agg-sql-count($$50)] [cardinality: 2.1,
doc-size: 2.0, op-cost: 0.0, total-cost: 12.0]
-- AGGREGATE |PARTITIONED|
- project ([$$50]) [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0,
total-cost: 12.0]
+ project ([$$50]) [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0,
total-cost: 12.0]
-- STREAM_PROJECT |PARTITIONED|
- exchange [cardinality: 4.0, doc-size: 2.0, op-cost: 0.0,
total-cost: 12.0]
+ exchange [cardinality: 2.1, doc-size: 2.0, op-cost: 0.0,
total-cost: 12.0]
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- join (eq($$55, $$56)) [cardinality: 4.0, doc-size: 2.0,
op-cost: 4.0, total-cost: 12.0]
+ join (eq($$55, $$56)) [cardinality: 2.1, doc-size: 2.0,
op-cost: 4.0, total-cost: 12.0]
-- HYBRID_HASH_JOIN [$$55][$$56] |PARTITIONED|
exchange [cardinality: 2.0, doc-size: 1.0, op-cost: 0.0,
total-cost: 2.0]
-- HASH_PARTITION_EXCHANGE [$$55] |PARTITIONED|