This is an automated email from the ASF dual-hosted git repository.

vsarathy1 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new fa8f423473 [ASTERIXDB-3544][COMP] Large difference between estimated 
cardinality and actual cardinality of query
fa8f423473 is described below

commit fa8f42347378a98960a51ca1ef4571cbe0aece74
Author: murali4104 <[email protected]>
AuthorDate: Thu Jan 9 15:40:14 2025 -0800

    [ASTERIXDB-3544][COMP] Large difference between estimated cardinality and 
actual cardinality of query
    
    Change-Id: Ia16d4e9bf92d31242b22ce8f3ab0c30f7777d8d5
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19290
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: <[email protected]>
    Reviewed-by: Vijay Sarathy <[email protected]>
---
 .../apache/asterix/optimizer/rules/cbo/Stats.java  | 68 ++++++++++++++++++----
 1 file changed, 56 insertions(+), 12 deletions(-)

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index ffaf952f1a..66f0ad596a 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -61,9 +61,11 @@ import 
org.apache.hyracks.algebricks.core.algebra.expressions.PredicateCardinali
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
@@ -192,7 +194,7 @@ public class Stats {
                 return 1.0;
             }
 
-            double estDistinctCardinalityFromSample = 
findPredicateCardinality(result, false);
+            double estDistinctCardinalityFromSample = 
findPredicateCardinality(result, true);
             double numDistincts = 
distinctEstimator2(estDistinctCardinalityFromSample, index);
             return 1.0 / numDistincts; // this is the expected selectivity for 
joins.
         }
@@ -653,6 +655,16 @@ public class Stats {
         }
         return index;
     }
+    // plan we need to generate in this routine.
+
+    //  project ([$$36])                                 add here
+    //    assign [$$36] <- [{"$1": $$39}]                add here
+    //      aggregate [$$39] <- [agg-sql-count($$34)]    add here
+    //        distinct ([$$34])                          add here
+    //          project ([$$34])                         add here
+    //            assign [$$34] <- [$$ar.getField("country")]      part of 
leaf input
+    //              data-scan []<-[$$37, $$ar, $$38] <- 
`travel-sample`.inventory.airport
+    //                empty-tuple-source
 
     protected List<List<IAObject>> 
runSamplingQueryDistinct(IOptimizationContext ctx, ILogicalOperator logOp,
             LogicalVariable var, Index index) throws AlgebricksException {
@@ -661,9 +673,10 @@ public class Stats {
         IOptimizationContext newCtx = 
ctx.getOptimizationContextFactory().cloneOptimizationContext(ctx);
 
         ILogicalOperator newLogOp = 
OperatorManipulationUtil.bottomUpCopyOperators(logOp);
-        storeSelectConditionsAndMakeThemTrue(newLogOp, null);
+
         // by passing in null, all select expression will become true.
         // no need to restore them either as this is dne on a copy of the 
logOp.
+        storeSelectConditionsAndMakeThemTrue(newLogOp, null);
 
         ILogicalOperator parent = 
joinEnum.findDataSourceScanOperatorParent(newLogOp);
         DataSourceScanOperator scanOp;
@@ -696,14 +709,25 @@ public class Stats {
             scanOp.setDataSource(sampledatasource);
         }
 
-        List<Mutable<ILogicalExpression>> aggFunArgs = new ArrayList<>(1);
-        aggFunArgs.add(new MutableObject<>(ConstantExpression.TRUE));
-
         AbstractLogicalExpression inputVarRef = new 
VariableReferenceExpression(var, newLogOp.getSourceLocation());
+        // add a project operator on top of newLogOp
+        ProjectOperator projOp = new ProjectOperator(var);
+        projOp.getInputs().add(new MutableObject<>(null)); //add an input
+        projOp.getInputs().get(0).setValue(newLogOp);
+        // add a distinct operator on top of the proj.
+        List<Mutable<ILogicalExpression>> arguments = new ArrayList<>();
+        VariableReferenceExpression e1 = new VariableReferenceExpression(var);
+        arguments.add(new MutableObject<>(e1));
+        DistinctOperator distOp = new DistinctOperator(arguments);
+        distOp.getInputs().add(new MutableObject<>(null)); //add an input
+        distOp.getInputs().get(0).setValue(projOp);
+        
distOp.setExecutionMode(AbstractLogicalOperator.ExecutionMode.PARTITIONED);
+
+        // now add aggregate [$$39] <- [agg-sql-count($$34)] on top of distop
         List<Mutable<ILogicalExpression>> fields = new ArrayList<>(1);
         fields.add(new MutableObject<>(inputVarRef));
 
-        BuiltinFunctionInfo countFn = 
BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.SQL_COUNT_DISTINCT);
+        BuiltinFunctionInfo countFn = 
BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.SQL_COUNT);
         AggregateFunctionCallExpression aggExpr = new 
AggregateFunctionCallExpression(countFn, false, fields);
 
         List<Mutable<ILogicalExpression>> aggExprList = new ArrayList<>(1);
@@ -714,17 +738,37 @@ public class Stats {
         aggVarList.add(aggVar);
 
         AggregateOperator newAggOp = new AggregateOperator(aggVarList, 
aggExprList);
-        newAggOp.getInputs().add(new MutableObject<>(newLogOp));
+        newAggOp.getInputs().add(new MutableObject<>(distOp));
 
+        // now add assign [$$36] <- [{"$1": $$39}]   on top of newAggOp
         Mutable<ILogicalOperator> newAggOpRef = new MutableObject<>(newAggOp);
+        OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx); // is this 
really needed??
+
+        List<MutableObject> arr = 
createMutableObjectArray(newAggOp.getVariables());
+        AbstractFunctionCallExpression f = new ScalarFunctionCallExpression(
+                
FunctionUtil.getFunctionInfo(BuiltinFunctions.OPEN_RECORD_CONSTRUCTOR));
+        for (int i = 0; i < arr.size(); i++) {
+            f.getArguments().add(arr.get(i));
+        }
+
+        LogicalVariable newVar = newCtx.newVar();
+        AssignOperator assignOp = new AssignOperator(newVar, new 
MutableObject<>(f));
+        assignOp.getInputs().add(new MutableObject<>(newAggOp));
+        ProjectOperator pOp = new ProjectOperator(newVar);
+        pOp.getInputs().add(new MutableObject<>(assignOp));
+
+        Mutable<ILogicalOperator> newpOpRef = new MutableObject<>(pOp);
+
+        OperatorPropertiesUtil.typeOpRec(newpOpRef, newCtx);
 
-        OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx);
         LOGGER.info("***returning from sample query***");
 
-        String viewInPlan = new ALogicalPlanImpl(newAggOpRef).toString(); 
//useful when debugging
-        LOGGER.trace("viewInPlan");
-        LOGGER.trace(viewInPlan);
-        return AnalysisUtil.runQuery(newAggOpRef, Arrays.asList(aggVar), 
newCtx, IRuleSetFactory.RuleSetKind.SAMPLING);
+        if (LOGGER.isTraceEnabled()) {
+            String viewInPlan = new ALogicalPlanImpl(newpOpRef).toString(); 
//useful when debugging
+            LOGGER.trace("viewInPlan");
+            LOGGER.trace(viewInPlan);
+        }
+        return AnalysisUtil.runQuery(newpOpRef, Arrays.asList(newVar), newCtx, 
IRuleSetFactory.RuleSetKind.SAMPLING);
     }
 
     // This one gets the cardinality and also projection sizes

Reply via email to