This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit af89a7c9b8e52cea2804370e7af7ecf2d781d2af Author: Peeyush Gupta <peeyush.gu...@couchbase.com> AuthorDate: Mon Sep 8 10:54:55 2025 -0700 [ASTERIXDB-3641] Sampling query taking long time to run - user model changes: no - storage format changes: no - interface changes: no Ext-ref: MB-68268 Change-Id: I8ad01e32fafdb68aab738cf49070646a4c1ddfe9 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20366 Reviewed-by: Michael Blow <mb...@apache.org> Tested-by: Michael Blow <mb...@apache.org> --- .../apache/asterix/optimizer/rules/cbo/Stats.java | 51 +++++++++++++++++++--- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java index 0d0558e1a7..78cf365daa 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java @@ -177,11 +177,13 @@ public class Stats { boolean unnestOp1 = joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx1 - 1)); boolean unnestOp2 = joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx2 - 1)); boolean unnestOp = unnestOp1 || unnestOp2; - Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) index1.getIndexDetails(); - Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) index2.getIndexDetails(); - if (((idxDetails1.getSourceCardinality() < idxDetails1.getSampleCardinalityTarget()) - || (idxDetails2.getSourceCardinality() < idxDetails2.getSampleCardinalityTarget()) - || exprUsedVars.size() > 2) && !unnestOp) { //* if there are more than 2 variables, it is not a simple join like r.a op s.a + ILogicalOperator leafInput1 = joinEnum.leafInputs.get(idx1 - 1); + ILogicalOperator leafInput2 = joinEnum.leafInputs.get(idx2 - 1); + LogicalVariable var1 = exprUsedVars.get(0); + LogicalVariable var2 = exprUsedVars.get(1); + // If there are more than 2 variables, it is not a simple join like r.a op s.a + if (!unnestOp && (exprUsedVars.size() > 2 + || isJoinSelFromSamplesApplicable(leafInput1, leafInput2, index1, index2, var1, var2))) { double sels = findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1), joinEnum.leafInputs.get(idx2 - 1), index1, index2, joinExpr, jOp); if (sels == 0.0) { @@ -195,6 +197,45 @@ public class Stats { } } + private boolean isJoinSelFromSamplesApplicable(ILogicalOperator leafInput1, ILogicalOperator leafInput2, + Index index1, Index index2, LogicalVariable var1, LogicalVariable var2) throws AlgebricksException { + Index.SampleIndexDetails details1 = (Index.SampleIndexDetails) index1.getIndexDetails(); + Index.SampleIndexDetails details2 = (Index.SampleIndexDetails) index2.getIndexDetails(); + if (details1.getSourceCardinality() >= details1.getSampleCardinalityTarget() + && details2.getSourceCardinality() >= details2.getSampleCardinalityTarget()) { + return false; + } + double numDistinct1 = computeNumDistinct(leafInput1, var1, index1); + if (numDistinct1 < 0) { + return false; + } + double avgNumRowsPerValue1 = details1.getSourceCardinality() / numDistinct1; + double numDistinct2 = computeNumDistinct(leafInput2, var2, index2); + if (numDistinct2 < 0) { + return false; + } + double avgNumRowsPerValue2 = details2.getSourceCardinality() / numDistinct2; + return avgNumRowsPerValue1 * avgNumRowsPerValue2 * Math.min(numDistinct1, numDistinct2) <= Math + .max(Math.max(details1.getSourceCardinality(), details2.getSourceCardinality()), 750000); + } + + private double computeNumDistinct(ILogicalOperator leafInput, LogicalVariable var, Index index) + throws AlgebricksException { + List<List<IAObject>> result = runSamplingQueryDistinct(this.optCtx, leafInput, var, index); + if (result == null) { + return -1; // Negative value indicates failure + } + double numDistincts = findPredicateCardinality(result, true); + Index.SampleIndexDetails details = (Index.SampleIndexDetails) index.getIndexDetails(); + if (numDistincts == 0) { + numDistincts = details.getSourceCardinality(); // All values are nulls + } + if (numDistincts == 0) { + numDistincts = 1; // Sample is empty + } + return numDistincts; + } + private double naiveJoinSelectivity(List<LogicalVariable> exprUsedVars, double card1, double card2, int idx1, int idx2, boolean unnestOp1, boolean unnestOp2) throws AlgebricksException { ILogicalOperator leafInput;