This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit af89a7c9b8e52cea2804370e7af7ecf2d781d2af
Author: Peeyush Gupta <peeyush.gu...@couchbase.com>
AuthorDate: Mon Sep 8 10:54:55 2025 -0700

    [ASTERIXDB-3641] Sampling query taking long time to run
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Ext-ref: MB-68268
    Change-Id: I8ad01e32fafdb68aab738cf49070646a4c1ddfe9
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20366
    Reviewed-by: Michael Blow <mb...@apache.org>
    Tested-by: Michael Blow <mb...@apache.org>
---
 .../apache/asterix/optimizer/rules/cbo/Stats.java  | 51 +++++++++++++++++++---
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 0d0558e1a7..78cf365daa 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -177,11 +177,13 @@ public class Stats {
             boolean unnestOp1 = 
joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx1 - 1));
             boolean unnestOp2 = 
joinEnum.findUnnestOp(joinEnum.leafInputs.get(idx2 - 1));
             boolean unnestOp = unnestOp1 || unnestOp2;
-            Index.SampleIndexDetails idxDetails1 = (Index.SampleIndexDetails) 
index1.getIndexDetails();
-            Index.SampleIndexDetails idxDetails2 = (Index.SampleIndexDetails) 
index2.getIndexDetails();
-            if (((idxDetails1.getSourceCardinality() < 
idxDetails1.getSampleCardinalityTarget())
-                    || (idxDetails2.getSourceCardinality() < 
idxDetails2.getSampleCardinalityTarget())
-                    || exprUsedVars.size() > 2) && !unnestOp) { //* if there 
are more than 2 variables, it is not a simple join like r.a op s.a
+            ILogicalOperator leafInput1 = joinEnum.leafInputs.get(idx1 - 1);
+            ILogicalOperator leafInput2 = joinEnum.leafInputs.get(idx2 - 1);
+            LogicalVariable var1 = exprUsedVars.get(0);
+            LogicalVariable var2 = exprUsedVars.get(1);
+            // If there are more than 2 variables, it is not a simple join 
like r.a op s.a
+            if (!unnestOp && (exprUsedVars.size() > 2
+                    || isJoinSelFromSamplesApplicable(leafInput1, leafInput2, 
index1, index2, var1, var2))) {
                 double sels = 
findJoinSelFromSamples(joinEnum.leafInputs.get(idx1 - 1),
                         joinEnum.leafInputs.get(idx2 - 1), index1, index2, 
joinExpr, jOp);
                 if (sels == 0.0) {
@@ -195,6 +197,45 @@ public class Stats {
         }
     }
 
+    private boolean isJoinSelFromSamplesApplicable(ILogicalOperator 
leafInput1, ILogicalOperator leafInput2,
+            Index index1, Index index2, LogicalVariable var1, LogicalVariable 
var2) throws AlgebricksException {
+        Index.SampleIndexDetails details1 = (Index.SampleIndexDetails) 
index1.getIndexDetails();
+        Index.SampleIndexDetails details2 = (Index.SampleIndexDetails) 
index2.getIndexDetails();
+        if (details1.getSourceCardinality() >= 
details1.getSampleCardinalityTarget()
+                && details2.getSourceCardinality() >= 
details2.getSampleCardinalityTarget()) {
+            return false;
+        }
+        double numDistinct1 = computeNumDistinct(leafInput1, var1, index1);
+        if (numDistinct1 < 0) {
+            return false;
+        }
+        double avgNumRowsPerValue1 = details1.getSourceCardinality() / 
numDistinct1;
+        double numDistinct2 = computeNumDistinct(leafInput2, var2, index2);
+        if (numDistinct2 < 0) {
+            return false;
+        }
+        double avgNumRowsPerValue2 = details2.getSourceCardinality() / 
numDistinct2;
+        return avgNumRowsPerValue1 * avgNumRowsPerValue2 * 
Math.min(numDistinct1, numDistinct2) <= Math
+                .max(Math.max(details1.getSourceCardinality(), 
details2.getSourceCardinality()), 750000);
+    }
+
+    private double computeNumDistinct(ILogicalOperator leafInput, 
LogicalVariable var, Index index)
+            throws AlgebricksException {
+        List<List<IAObject>> result = runSamplingQueryDistinct(this.optCtx, 
leafInput, var, index);
+        if (result == null) {
+            return -1; // Negative value indicates failure
+        }
+        double numDistincts = findPredicateCardinality(result, true);
+        Index.SampleIndexDetails details = (Index.SampleIndexDetails) 
index.getIndexDetails();
+        if (numDistincts == 0) {
+            numDistincts = details.getSourceCardinality(); // All values are 
nulls
+        }
+        if (numDistincts == 0) {
+            numDistincts = 1; // Sample is empty
+        }
+        return numDistincts;
+    }
+
     private double naiveJoinSelectivity(List<LogicalVariable> exprUsedVars, 
double card1, double card2, int idx1,
             int idx2, boolean unnestOp1, boolean unnestOp2) throws 
AlgebricksException {
         ILogicalOperator leafInput;

Reply via email to