>From Vijay Sarathy <[email protected]>:

Vijay Sarathy has submitted this change. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19290 )

 (

2 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the submitted 
one.
 )Change subject: [ASTERIXDB-3544][COMP] Large difference between estimated 
cardinality and actual cardinality of query
......................................................................

[ASTERIXDB-3544][COMP] Large difference between estimated cardinality and 
actual cardinality of query

Change-Id: Ia16d4e9bf92d31242b22ce8f3ab0c30f7777d8d5
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19290
Tested-by: Jenkins <[email protected]>
Reviewed-by: <[email protected]>
Reviewed-by: Vijay Sarathy <[email protected]>
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
1 file changed, 69 insertions(+), 12 deletions(-)

Approvals:
  [email protected]: Looks good to me, but someone else must approve
  Vijay Sarathy: Looks good to me, approved
  Jenkins: Verified




diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index ffaf952..66f0ad5 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -61,9 +61,11 @@
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
@@ -192,7 +194,7 @@
                 return 1.0;
             }

-            double estDistinctCardinalityFromSample = 
findPredicateCardinality(result, false);
+            double estDistinctCardinalityFromSample = 
findPredicateCardinality(result, true);
             double numDistincts = 
distinctEstimator2(estDistinctCardinalityFromSample, index);
             return 1.0 / numDistincts; // this is the expected selectivity for 
joins.
         }
@@ -653,6 +655,16 @@
         }
         return index;
     }
+    // plan we need to generate in this routine.
+
+    //  project ([$$36])                                 add here
+    //    assign [$$36] <- [{"$1": $$39}]                add here
+    //      aggregate [$$39] <- [agg-sql-count($$34)]    add here
+    //        distinct ([$$34])                          add here
+    //          project ([$$34])                         add here
+    //            assign [$$34] <- [$$ar.getField("country")]      part of 
leaf input
+    //              data-scan []<-[$$37, $$ar, $$38] <- 
`travel-sample`.inventory.airport
+    //                empty-tuple-source

     protected List<List<IAObject>> 
runSamplingQueryDistinct(IOptimizationContext ctx, ILogicalOperator logOp,
             LogicalVariable var, Index index) throws AlgebricksException {
@@ -661,9 +673,10 @@
         IOptimizationContext newCtx = 
ctx.getOptimizationContextFactory().cloneOptimizationContext(ctx);

         ILogicalOperator newLogOp = 
OperatorManipulationUtil.bottomUpCopyOperators(logOp);
-        storeSelectConditionsAndMakeThemTrue(newLogOp, null);
+
         // by passing in null, all select expression will become true.
         // no need to restore them either as this is dne on a copy of the 
logOp.
+        storeSelectConditionsAndMakeThemTrue(newLogOp, null);

         ILogicalOperator parent = 
joinEnum.findDataSourceScanOperatorParent(newLogOp);
         DataSourceScanOperator scanOp;
@@ -696,14 +709,25 @@
             scanOp.setDataSource(sampledatasource);
         }

-        List<Mutable<ILogicalExpression>> aggFunArgs = new ArrayList<>(1);
-        aggFunArgs.add(new MutableObject<>(ConstantExpression.TRUE));
-
         AbstractLogicalExpression inputVarRef = new 
VariableReferenceExpression(var, newLogOp.getSourceLocation());
+        // add a project operator on top of newLogOp
+        ProjectOperator projOp = new ProjectOperator(var);
+        projOp.getInputs().add(new MutableObject<>(null)); //add an input
+        projOp.getInputs().get(0).setValue(newLogOp);
+        // add a distinct operator on top of the proj.
+        List<Mutable<ILogicalExpression>> arguments = new ArrayList<>();
+        VariableReferenceExpression e1 = new VariableReferenceExpression(var);
+        arguments.add(new MutableObject<>(e1));
+        DistinctOperator distOp = new DistinctOperator(arguments);
+        distOp.getInputs().add(new MutableObject<>(null)); //add an input
+        distOp.getInputs().get(0).setValue(projOp);
+        
distOp.setExecutionMode(AbstractLogicalOperator.ExecutionMode.PARTITIONED);
+
+        // now add aggregate [$$39] <- [agg-sql-count($$34)] on top of distop
         List<Mutable<ILogicalExpression>> fields = new ArrayList<>(1);
         fields.add(new MutableObject<>(inputVarRef));

-        BuiltinFunctionInfo countFn = 
BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.SQL_COUNT_DISTINCT);
+        BuiltinFunctionInfo countFn = 
BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.SQL_COUNT);
         AggregateFunctionCallExpression aggExpr = new 
AggregateFunctionCallExpression(countFn, false, fields);

         List<Mutable<ILogicalExpression>> aggExprList = new ArrayList<>(1);
@@ -714,17 +738,37 @@
         aggVarList.add(aggVar);

         AggregateOperator newAggOp = new AggregateOperator(aggVarList, 
aggExprList);
-        newAggOp.getInputs().add(new MutableObject<>(newLogOp));
+        newAggOp.getInputs().add(new MutableObject<>(distOp));

+        // now add assign [$$36] <- [{"$1": $$39}]   on top of newAggOp
         Mutable<ILogicalOperator> newAggOpRef = new MutableObject<>(newAggOp);
+        OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx); // is this 
really needed??

-        OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx);
+        List<MutableObject> arr = 
createMutableObjectArray(newAggOp.getVariables());
+        AbstractFunctionCallExpression f = new ScalarFunctionCallExpression(
+                
FunctionUtil.getFunctionInfo(BuiltinFunctions.OPEN_RECORD_CONSTRUCTOR));
+        for (int i = 0; i < arr.size(); i++) {
+            f.getArguments().add(arr.get(i));
+        }
+
+        LogicalVariable newVar = newCtx.newVar();
+        AssignOperator assignOp = new AssignOperator(newVar, new 
MutableObject<>(f));
+        assignOp.getInputs().add(new MutableObject<>(newAggOp));
+        ProjectOperator pOp = new ProjectOperator(newVar);
+        pOp.getInputs().add(new MutableObject<>(assignOp));
+
+        Mutable<ILogicalOperator> newpOpRef = new MutableObject<>(pOp);
+
+        OperatorPropertiesUtil.typeOpRec(newpOpRef, newCtx);
+
         LOGGER.info("***returning from sample query***");

-        String viewInPlan = new ALogicalPlanImpl(newAggOpRef).toString(); 
//useful when debugging
-        LOGGER.trace("viewInPlan");
-        LOGGER.trace(viewInPlan);
-        return AnalysisUtil.runQuery(newAggOpRef, Arrays.asList(aggVar), 
newCtx, IRuleSetFactory.RuleSetKind.SAMPLING);
+        if (LOGGER.isTraceEnabled()) {
+            String viewInPlan = new ALogicalPlanImpl(newpOpRef).toString(); 
//useful when debugging
+            LOGGER.trace("viewInPlan");
+            LOGGER.trace(viewInPlan);
+        }
+        return AnalysisUtil.runQuery(newpOpRef, Arrays.asList(newVar), newCtx, 
IRuleSetFactory.RuleSetKind.SAMPLING);
     }

     // This one gets the cardinality and also projection sizes

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19290
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Ia16d4e9bf92d31242b22ce8f3ab0c30f7777d8d5
Gerrit-Change-Number: 19290
Gerrit-PatchSet: 4
Gerrit-Owner: [email protected]
Gerrit-Reviewer: Anon. E. Moose #1000171
Gerrit-Reviewer: Jenkins <[email protected]>
Gerrit-Reviewer: Vijay Sarathy <[email protected]>
Gerrit-Reviewer: [email protected]
Gerrit-MessageType: merged

Reply via email to