This is an automated email from the ASF dual-hosted git repository.

htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 21dee76386 [ASTERIXDB-3303][COMP] projection sizes
21dee76386 is described below

commit 21dee763867b490bddeedddcb1e01384a84c4ec2
Author: murali4104 <[email protected]>
AuthorDate: Tue Nov 14 10:06:03 2023 -0800

    [ASTERIXDB-3303][COMP] projection sizes
    
    Change-Id: I0f9df7b7937e0248c6fdae84a527680fac2fa915
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17946
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Vijay Sarathy <[email protected]>
---
 .../asterix/optimizer/rules/cbo/JoinEnum.java      |  20 ++-
 .../apache/asterix/optimizer/rules/cbo/Stats.java  | 141 ++++++++++++++++++---
 .../asterix/om/functions/BuiltinFunctions.java     |   3 +-
 3 files changed, 144 insertions(+), 20 deletions(-)

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index d33f8ce45b..244279dd2c 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -38,8 +38,8 @@ import org.apache.asterix.metadata.declared.DatasetDataSource;
 import org.apache.asterix.metadata.declared.MetadataProvider;
 import org.apache.asterix.metadata.declared.SampleDataSource;
 import org.apache.asterix.metadata.entities.Index;
-import org.apache.asterix.om.base.AInt64;
 import org.apache.asterix.om.base.AOrderedList;
+import org.apache.asterix.om.base.ARecord;
 import org.apache.asterix.om.base.IAObject;
 import org.apache.asterix.om.constants.AsterixConstantValue;
 import org.apache.asterix.om.functions.BuiltinFunctions;
@@ -898,7 +898,12 @@ public class JoinEnum {
                     // There are predicates here. So skip the predicates and 
get the original dataset card.
                     // Now apply all the predicates and get the card after all 
predicates are applied.
                     result = stats.runSamplingQuery(this.optCtx, leafInput);
-                    double predicateCardinality = ((double) ((AInt64) 
result.get(0).get(0)).getLongValue());
+                    double predicateCardinality = 
stats.findPredicateCardinality(result);
+
+                    double projectedSize = -1.0;
+                    if (predicateCardinality > 0.0) { // otherwise, we get 
nulls for the averages
+                        projectedSize = stats.findProjectedSize(result);
+                    }
                     if (predicateCardinality == 0.0) {
                         predicateCardinality = 0.0001 * 
idxDetails.getSampleCardinalityTarget();
                     }
@@ -913,11 +918,17 @@ public class JoinEnum {
                                     
.warn(Warning.of(scanOp.getSourceLocation(), ErrorCode.SAMPLE_HAS_ZERO_ROWS));
                         }
                     }
-
                     finalDatasetCard *= predicateCardinality / sampleCard;
                     // now switch the input back.
                     parent.getInputs().get(0).setValue(scanOp);
                     jn.setCardinality(finalDatasetCard);
+                    if (projectedSize > 0.0) {
+                        jn.setAvgDocSize(projectedSize);
+                    } else {
+                        ARecord record = (ARecord) (((IAObject) 
((List<IAObject>) (result.get(0))).get(0)));
+                        int fields = record.numberOfFields();
+                        jn.setAvgDocSize(fields * 100); // cant think of 
anything better... cards are more important anyway
+                    }
                 }
             }
             dataScanPlan = jn.addSingleDatasetPlans();
@@ -1052,7 +1063,8 @@ public class JoinEnum {
                 SelectOperator selOp = new SelectOperator(new 
MutableObject<>(exp));
                 selOp.getInputs().add(new MutableObject<>(leafInput));
                 result = stats.runSamplingQuery(this.optCtx, selOp);
-                predicateCardinality = ((double) ((AInt64) 
result.get(0).get(0)).getLongValue());
+                predicateCardinality = stats.findPredicateCardinality(result);
+
                 if (predicateCardinality == 0.0) {
                     predicateCardinality = 0.0001 * 
idxDetails.getSampleCardinalityTarget();
                 }
diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index d59fd73e2a..364289c1e6 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -24,17 +24,24 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.asterix.compiler.provider.IRuleSetFactory;
+import org.apache.asterix.lang.common.expression.LiteralExpr;
+import org.apache.asterix.lang.common.literal.StringLiteral;
+import org.apache.asterix.lang.common.util.FunctionUtil;
 import org.apache.asterix.metadata.declared.DataSource;
 import org.apache.asterix.metadata.declared.DataSourceId;
 import org.apache.asterix.metadata.declared.MetadataProvider;
 import org.apache.asterix.metadata.declared.SampleDataSource;
 import org.apache.asterix.metadata.entities.Index;
+import org.apache.asterix.om.base.ADouble;
 import org.apache.asterix.om.base.AInt64;
+import org.apache.asterix.om.base.ARecord;
 import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.constants.AsterixConstantValue;
 import org.apache.asterix.om.functions.BuiltinFunctionInfo;
 import org.apache.asterix.om.functions.BuiltinFunctions;
 import org.apache.asterix.optimizer.base.AnalysisUtil;
 import 
org.apache.asterix.optimizer.rules.am.array.AbstractOperatorFromSubplanRewrite;
+import org.apache.asterix.translator.ConstantHelper;
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -49,11 +56,17 @@ import 
org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionC
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.JoinProductivityAnnotation;
 import 
org.apache.hyracks.algebricks.core.algebra.expressions.PredicateCardinalityAnnotation;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import 
org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
 import 
org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
+import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import org.apache.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
 import 
org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
 import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil;
 import org.apache.hyracks.api.exceptions.ErrorCode;
@@ -489,7 +502,7 @@ public class Stats {
             }
         }
 
-        double predicateCardinality = (double) ((AInt64) 
result.get(0).get(0)).getLongValue();
+        double predicateCardinality = findPredicateCardinality(result); // 
this routine knows how to look into the record inside result
         if (predicateCardinality == 0.0) {
             predicateCardinality = 0.0001 * 
idxDetails.getSampleCardinalityTarget();
         }
@@ -512,7 +525,7 @@ public class Stats {
             selOp.getCondition().setValue(ConstantExpression.TRUE);
             result = runSamplingQuery(optCtx, selOp);
             selOp.getCondition().setValue(saveExprs);
-            sampleCard = (double) ((AInt64) 
result.get(0).get(0)).getLongValue();
+            sampleCard = findPredicateCardinality(result);
         }
         // switch  the scanOp back
         parent.getInputs().get(0).setValue(scanOp);
@@ -521,7 +534,26 @@ public class Stats {
         return sel;
     }
 
-    private void transformtoAnyInPlan(SelectOperator newSelOp) {
+    public double findPredicateCardinality(List<List<IAObject>> result) {
+        ARecord record = (ARecord) (((IAObject) ((List<IAObject>) 
(result.get(0))).get(0)));
+        int fields = record.numberOfFields();
+        IAObject first = record.getValueByPos(0);
+        double predicateCardinality = ((double) ((AInt64) 
first).getLongValue());
+        return predicateCardinality;
+    }
+
+    // Can have null returned, so this routine should only be called if at 
least tuple is returned by the sample
+    public double findProjectedSize(List<List<IAObject>> result) {
+        ARecord record = (ARecord) (((IAObject) ((List<IAObject>) 
(result.get(0))).get(0)));
+        // Now figure out the projected size
+        double projectedSize = 0.0;
+        int fields = record.numberOfFields();
+        for (int j = 1; j < fields; j++) {
+            IAObject field = record.getValueByPos(j);
+            double size = ((double) ((ADouble) field).getDoubleValue());
+            projectedSize += size;
+        }
+        return projectedSize;
     }
 
     protected List<List<IAObject>> runSamplingQuery(IOptimizationContext ctx, 
ILogicalOperator logOp)
@@ -531,28 +563,107 @@ public class Stats {
         IOptimizationContext newCtx = 
ctx.getOptimizationContextFactory().cloneOptimizationContext(ctx);
 
         ILogicalOperator newScanOp = 
OperatorManipulationUtil.bottomUpCopyOperators(logOp);
-
+        // Now we have to generate plans like this on top of the scanOp (logOp)
+        // project ([$$79])
+        // assign [$$79] <- [{"$1": $$73, "$2": $$74, "$3": $$75, "$4": $$76, 
"$5": $$77, "$6": $$78}]
+        // aggregate [$$73, $$74, $$75, $$76, $$77, $$78] <- [agg-count(true), 
sql-avg($$68), sql-avg($$69), sql-avg($$70), sql-avg($$71), sql-avg($$72)]
+        // assign [$$68, $$69, $$70, $$71, $$72] <- [serialized-size($$60), 
serialized-size($$str), serialized-size($$61), serialized-size($$65), 
serialized-size($$67)]
+
+        // add the assign [$$56, ..., ] <- [encoded-size($$67), ..., ] on top 
of newAggOp
+        List<LogicalVariable> vars = new ArrayList<>();
+        VariableUtilities.getLiveVariables(logOp, vars);
+        LogicalVariable newVar;
+        // array to keep track of the assigns
+        List<LogicalVariable> newVars = new ArrayList<>();
+        List<Mutable<ILogicalExpression>> exprs = new ArrayList<>();
+
+        // create this assignOperator
+        // assign [$$68, $$69, $$70, $$71, $$72] <- [serialized-size($$60), 
serialized-size($$str), serialized-size($$61), serialized-size($$65), 
serialized-size($$67)]
+        int count = 0;
+        for (LogicalVariable lv : vars) {
+            count++;
+            VariableReferenceExpression varRefExpr = new 
VariableReferenceExpression(lv);
+            List<Mutable<ILogicalExpression>> vars2 = new ArrayList<>();
+            vars2.add(new MutableObject<>(varRefExpr));
+            ScalarFunctionCallExpression func = new 
ScalarFunctionCallExpression(
+                    
FunctionUtil.getFunctionInfo(BuiltinFunctions.SERIALIZED_SIZE), vars2);
+            exprs.add(new MutableObject<>(func));
+            newVar = newCtx.newVar();
+            newVars.add(newVar);
+        }
+
+        AssignOperator assignOp = new AssignOperator(newVars, exprs);
+        assignOp.getInputs().add(new MutableObject<>(newScanOp));
+        Mutable<ILogicalOperator> tmpRef = new MutableObject<>(assignOp);
+        String viewInPlan = new ALogicalPlanImpl(tmpRef).toString();
+
+        // aggregate [$$73, $$74, $$75, $$76, $$77, $$78] <- [agg-count(true), 
sql-avg($$68), sql-avg($$69), sql-avg($$70), sql-avg($$71), sql-avg($$72)]
+        // add the count-agg (true) first
+        List<LogicalVariable> newVars2 = new ArrayList<>();
+        List<Mutable<ILogicalExpression>> aggExprList = new ArrayList<>();
         List<Mutable<ILogicalExpression>> aggFunArgs = new ArrayList<>(1);
         aggFunArgs.add(new MutableObject<>(ConstantExpression.TRUE));
         BuiltinFunctionInfo countFn = 
BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.COUNT);
         AggregateFunctionCallExpression aggExpr = new 
AggregateFunctionCallExpression(countFn, false, aggFunArgs);
-
-        List<Mutable<ILogicalExpression>> aggExprList = new ArrayList<>(1);
         aggExprList.add(new MutableObject<>(aggExpr));
-
-        List<LogicalVariable> aggVarList = new ArrayList<>(1);
         LogicalVariable aggVar = newCtx.newVar();
-        aggVarList.add(aggVar);
+        newVars2.add(aggVar);
+
+        // Now add the other aggs
+        for (int i = 0; i < count; i++) {
+            VariableReferenceExpression varRefExpr;
+            varRefExpr = new VariableReferenceExpression(newVars.get(i));
+            List<Mutable<ILogicalExpression>> vars2 = new ArrayList<>();
+            vars2.add(new MutableObject<>(varRefExpr));
+            BuiltinFunctionInfo avgFn = 
BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.SQL_AVG);
+            aggExpr = new AggregateFunctionCallExpression(avgFn, false, vars2);
+            newVar = newCtx.newVar();
 
-        AggregateOperator newAggOp = new AggregateOperator(aggVarList, 
aggExprList);
-        newAggOp.getInputs().add(new MutableObject<>(newScanOp));
+            newVars2.add(newVar);
+            aggExprList.add(new MutableObject<>(aggExpr));
+        }
 
+        // add assign [$$79] <- [{"$1": $$73, "$2": $$74, "$3": $$75, "$4": 
$$76, "$5": $$77, "$6": $$78}]
+        AggregateOperator newAggOp = new AggregateOperator(newVars2, 
aggExprList);
+        newAggOp.getInputs().add(new MutableObject<>(assignOp));
         Mutable<ILogicalOperator> newAggOpRef = new MutableObject<>(newAggOp);
+        OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx); // is this 
really needed??
+        List<MutableObject> arr = 
createMutableObjectArray(newAggOp.getVariables());
+        AbstractFunctionCallExpression f = new ScalarFunctionCallExpression(
+                
FunctionUtil.getFunctionInfo(BuiltinFunctions.OPEN_RECORD_CONSTRUCTOR));
+        for (int i = 0; i < arr.size(); i++) {
+            f.getArguments().add(arr.get(i));
+        }
+
+        newVar = newCtx.newVar();
+        assignOp = new AssignOperator(newVar, new MutableObject<>(f));
+        assignOp.getInputs().add(new MutableObject<>(newAggOp));
+        ProjectOperator pOp = new ProjectOperator(newVar);
+        pOp.getInputs().add(new MutableObject<>(assignOp));
 
-        OperatorPropertiesUtil.typeOpRec(newAggOpRef, newCtx);
+        Mutable<ILogicalOperator> Ref = new MutableObject<>(pOp);
         LOGGER.info("***returning from sample query***");
 
-        return AnalysisUtil.runQuery(newAggOpRef, Arrays.asList(aggVar), 
newCtx, IRuleSetFactory.RuleSetKind.SAMPLING);
+        OperatorPropertiesUtil.typeOpRec(Ref, newCtx);
+        String viewInPlan3 = new ALogicalPlanImpl(Ref).toString(); //useful 
when debugging
+        LOGGER.trace("viewInPlan3");
+        LOGGER.trace(viewInPlan3);
+        return AnalysisUtil.runQuery(Ref, Arrays.asList(newVar), newCtx, 
IRuleSetFactory.RuleSetKind.SAMPLING);
+
+    }
+
+    private List<MutableObject> createMutableObjectArray(List<LogicalVariable> 
vars) {
+        List<MutableObject> arr = new ArrayList<>();
+        for (int i = 0; i < vars.size(); i++) {
+            LiteralExpr le = new LiteralExpr();
+            StringLiteral value = new StringLiteral("$" + Integer.toString(i + 
1)); // these start from 1
+            le.setValue(value);
+            AsterixConstantValue cValue = new 
AsterixConstantValue(ConstantHelper.objectFromLiteral(le.getValue()));
+            ConstantExpression cExpr = new ConstantExpression(cValue);
+            arr.add(new MutableObject<>(cExpr));
+            arr.add(new MutableObject<>(new 
VariableReferenceExpression(vars.get(i))));
+        }
+        return arr;
     }
 
     public long findDistinctCardinality(ILogicalOperator grpByDistinctOp) 
throws AlgebricksException {
@@ -603,7 +714,7 @@ public class Stats {
         ILogicalOperator copyOfSelOp = 
OperatorManipulationUtil.bottomUpCopyOperators(selOp);
         if (setSampleDataSource(copyOfSelOp, sampleDataSource)) {
             List<List<IAObject>> result = runSamplingQuery(optCtx, 
copyOfSelOp);
-            sampleSize = ((AInt64) result.get(0).get(0)).getLongValue();
+            sampleSize = (long) findPredicateCardinality(result);
         }
         return sampleSize;
     }
@@ -619,7 +730,7 @@ public class Stats {
             if (setSampleDataSource(copyOfGrpByDistinctOp, sampleDataSource)) {
                 // get distinct cardinality from the sampling source
                 List<List<IAObject>> result = runSamplingQuery(optCtx, 
copyOfGrpByDistinctOp);
-                estDistCardinalityFromSample = ((double) ((AInt64) 
result.get(0).get(0)).getLongValue());
+                estDistCardinalityFromSample = 
findPredicateCardinality(result);
             }
         }
         if (estDistCardinalityFromSample != -1.0) { // estimate distinct 
cardinality for the dataset from the sampled cardinality
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index 6c621e04c3..0659d001f2 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -2108,7 +2108,8 @@ public class BuiltinFunctions {
         addFunction(DECODE_DATAVERSE_NAME, 
OrderedListOfAStringTypeComputer.INSTANCE_NULLABLE, true);
 
         addPrivateFunction(COLLECTION_TO_SEQUENCE, 
CollectionToSequenceTypeComputer.INSTANCE, true);
-        addPrivateFunction(SERIALIZED_SIZE, AInt64TypeComputer.INSTANCE, true);
+        addFunction(SERIALIZED_SIZE, AInt64TypeComputer.INSTANCE, true);
+        // used by CBO's internal sampling queries for determining projection 
sizes
 
         // external lookup
         addPrivateFunction(EXTERNAL_LOOKUP, AnyTypeComputer.INSTANCE, false);

Reply via email to