This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit eede5bf7d1553b3275cf06d4f41572a8215368eb Author: Wail Alkowaileet <[email protected]> AuthorDate: Sat Feb 4 08:43:56 2023 -0800 [ASTERIXDB-3103][COMP] Extract non-pure functions - user model changes: no - storage format changes: no - interface changes: no Details: Currently, the SQL++ translator aggressively inlines all functions, including non-pure ones. This inlining could lead to producing multiple calls to non-pure functions where it should be a single call, and its value should be referenced instead. Change-Id: I59c524a109ef8e18a11844a96b690eac115b27f0 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17300 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> --- .../translator/LangExpressionToPlanTranslator.java | 51 +++-- .../optimizerts/results/ASTERIXDB-2402.plan | 223 ++++++++++----------- .../results/joins/nested_query_with_bcast.plan | 40 ++-- .../insert-primary-key-index-with-auto-gen-pk.plan | 6 +- .../limit-non-pure-function.1.query.sqlpp} | 11 +- .../limit-non-pure-function.2.query.sqlpp} | 10 +- .../constant_folding.7.query.sqlpp | 4 + .../load-record-fields.3.query.sqlpp | 4 +- .../load-record-fields.4.query.sqlpp | 4 +- ...uery.sqlpp => load-record-fields.5.query.sqlpp} | 2 +- ...uery.sqlpp => load-record-fields.6.query.sqlpp} | 2 +- .../limit-non-pure-function.1.plan | 14 ++ .../limit-non-pure-function.2.plan | 18 ++ .../misc/constant_folding/constant_folding.7.plan | 10 +- .../load-record-fields/load-record-fields.4.plan | 30 ++- .../load-record-fields/load-record-fields.5.adm | 2 + .../load-record-fields/load-record-fields.6.plan | 22 ++ .../visitors/IsomorphismOperatorVisitor.java | 4 +- .../rules/InlineSingleReferenceVariablesRule.java | 3 +- .../rewriter/rules/InlineVariablesRule.java | 69 ++++++- 20 files changed, 325 insertions(+), 204 deletions(-) diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java index 794a0b7db7..daa1d2f7fb 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java @@ -1716,8 +1716,7 @@ abstract class LangExpressionToPlanTranslator default: if (expressionNeedsNoNesting(expr)) { Pair<ILogicalOperator, LogicalVariable> p = expr.accept(this, topOpRef); - ILogicalExpression exp = ((AssignOperator) p.first).getExpressions().get(0).getValue(); - return new Pair<>(exp, p.first.getInputs().get(0)); + return inlineAssignIfPossible((AssignOperator) p.first); } else { Mutable<ILogicalOperator> srcRef = new MutableObject<>(); Pair<ILogicalOperator, LogicalVariable> p = expr.accept(this, srcRef); @@ -1749,6 +1748,32 @@ abstract class LangExpressionToPlanTranslator } } + /** + * TODO(wyk) I believe that inlining expressions should be done at the optimization level and not at the translation + * level. By inlining at the translation level, we could possibly miss optimizing inlined expressions in rules + * that do not inspect arguments of a function. I kept inlining all pure (a.k.a functional) functions for now to + * match the previous behavior. For non-pure functions, the assign should be kept as we do not inline them at + * first due to ASTERIXDB-3103 + * + * @see org.apache.hyracks.algebricks.rewriter.rules.InlineVariablesRule + */ + private Pair<ILogicalExpression, Mutable<ILogicalOperator>> inlineAssignIfPossible(AssignOperator assignOp) { + ILogicalExpression expr = assignOp.getExpressions().get(0).getValue(); + + if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { + return new Pair<>(expr, assignOp.getInputs().get(0)); + } + + AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr; + if (funcExpr.isFunctional()) { + return new Pair<>(expr, assignOp.getInputs().get(0)); + } + + //Do not inline non-functional expressions (e.g. uuid()) and keep the assign + return new Pair<>(new VariableReferenceExpression(assignOp.getVariables().get(0)), + new MutableObject<>(assignOp)); + } + protected Pair<ILogicalOperator, LogicalVariable> aggListifyForSubquery(LogicalVariable var, Mutable<ILogicalOperator> opRef, boolean bProject) { SourceLocation sourceLoc = opRef.getValue().getSourceLocation(); @@ -1902,8 +1927,7 @@ abstract class LangExpressionToPlanTranslator * Eliminate shared operator references in a query plan. Deep copy a new query * plan subtree whenever there is a shared operator reference. * - * @param plan, - * the query plan. + * @param plan, the query plan. * @throws CompilationException */ protected void eliminateSharedOperatorReferenceForPlan(ILogicalPlan plan) throws CompilationException { @@ -1918,12 +1942,10 @@ abstract class LangExpressionToPlanTranslator * <code>currentOpRef.getValue()</code>. Deep copy a new query plan subtree * whenever there is a shared operator reference. * - * @param currentOpRef, - * the operator reference to consider - * @param opRefSet, - * the set storing seen operator references so far. + * @param currentOpRef, the operator reference to consider + * @param opRefSet, the set storing seen operator references so far. * @return a mapping that maps old variables to new variables, for the ancestors - * of <code>currentOpRef</code> to replace variables properly. + * of <code>currentOpRef</code> to replace variables properly. * @throws CompilationException */ private LinkedHashMap<LogicalVariable, LogicalVariable> eliminateSharedOperatorReference( @@ -2005,14 +2027,11 @@ abstract class LangExpressionToPlanTranslator /** * Constructs a subplan operator for a branch in a if-else (or case) expression. * - * @param inputOp, - * the input operator. - * @param selectExpr, - * the expression to select tuples that are processed by this branch. - * @param branchExpression, - * the expression to be evaluated in this branch. + * @param inputOp, the input operator. + * @param selectExpr, the expression to select tuples that are processed by this branch. + * @param branchExpression, the expression to be evaluated in this branch. * @return a pair of the constructed subplan operator and the output variable - * for the branch. + * for the branch. * @throws CompilationException */ protected Pair<ILogicalOperator, LogicalVariable> constructSubplanOperatorForBranch(ILogicalOperator inputOp, diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan index 6b489417c6..efd14f7463 100644 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/ASTERIXDB-2402.plan @@ -34,98 +34,48 @@ -- STREAM_PROJECT |PARTITIONED| -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- PRE_CLUSTERED_GROUP_BY[$$252] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- STREAM_SELECT |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- PRE_CLUSTERED_GROUP_BY[$$253] |PARTITIONED| - { - -- AGGREGATE |LOCAL| - -- STREAM_SELECT |LOCAL| - -- NESTED_TUPLE_SOURCE |LOCAL| - } - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$253(ASC)] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$253] |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| + -- STABLE_SORT [$$252(ASC)] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$252] |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- HYBRID_HASH_JOIN [$$251][$$222] |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$309][$$222] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$309] |PARTITIONED| - -- RUNNING_AGGREGATE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- UNNEST |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- PRE_CLUSTERED_GROUP_BY[$$317] |PARTITIONED| - { - -- AGGREGATE |LOCAL| - -- MICRO_PRE_CLUSTERED_GROUP_BY[$$319, $$321] |LOCAL| - { - -- AGGREGATE |LOCAL| - -- STREAM_SELECT |LOCAL| - -- NESTED_TUPLE_SOURCE |LOCAL| - } - -- STREAM_SELECT |LOCAL| - -- NESTED_TUPLE_SOURCE |LOCAL| - } - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$317(ASC), $$319(ASC), $$321(ASC)] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$317] |PARTITIONED| - -- UNION_ALL |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH (channels.Shelters.Shelters) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- SPLIT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- RTREE_SEARCH (channels.Shelters.s_location) |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- NESTED_LOOP |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- NESTED_LOOP |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- RUNNING_AGGREGATE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (channels.EmergenciesNearMeChannelChannelSubscriptions) |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |UNPARTITIONED| - -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH (channels.Reports.Reports) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$260(ASC)] |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- BTREE_SEARCH (channels.Reports.report_time) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (channels.UserLocations) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- RUNNING_AGGREGATE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- UNNEST |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- PRE_CLUSTERED_GROUP_BY[$$313] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- MICRO_PRE_CLUSTERED_GROUP_BY[$$315, $$316] |LOCAL| + { + -- AGGREGATE |LOCAL| + -- STREAM_SELECT |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- STREAM_SELECT |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$313(ASC), $$315(ASC), $$316(ASC)] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$313] |PARTITIONED| + -- UNION_ALL |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH (channels.Shelters.Shelters) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- SPLIT |PARTITIONED| @@ -140,13 +90,11 @@ -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- NESTED_LOOP |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- RUNNING_AGGREGATE |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (channels.EmergenciesNearMeChannelChannelSubscriptions) |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- ASSIGN |UNPARTITIONED| - -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (channels.EmergenciesNearMeChannelChannelSubscriptions) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| -- BROADCAST_EXCHANGE |PARTITIONED| -- ASSIGN |PARTITIONED| -- STREAM_SELECT |PARTITIONED| @@ -154,7 +102,7 @@ -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- BTREE_SEARCH (channels.Reports.Reports) |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$260(ASC)] |PARTITIONED| + -- STABLE_SORT [$$259(ASC)] |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| @@ -168,22 +116,67 @@ -- DATASOURCE_SCAN (channels.UserLocations) |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$222] |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$233, $$235][$$224, $$225] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$233, $$235] |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (channels.EmergenciesNearMeChannelBrokerSubscriptions) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- SPLIT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- RTREE_SEARCH (channels.Shelters.s_location) |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- NESTED_LOOP |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (channels.EmergenciesNearMeChannelChannelSubscriptions) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH (channels.Reports.Reports) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$259(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH (channels.Reports.report_time) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (channels.UserLocations) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$222] |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- HYBRID_HASH_JOIN [$$233, $$235][$$224, $$225] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$233, $$235] |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (channels.EmergenciesNearMeChannelBrokerSubscriptions) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (channels.Broker) |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (channels.Broker) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/joins/nested_query_with_bcast.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/joins/nested_query_with_bcast.plan index 075f7af501..15c163dec4 100644 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/results/joins/nested_query_with_bcast.plan +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/joins/nested_query_with_bcast.plan @@ -9,31 +9,29 @@ -- STREAM_PROJECT |PARTITIONED| -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- PRE_CLUSTERED_GROUP_BY[$$38] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- STREAM_SELECT |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- PRE_CLUSTERED_GROUP_BY[$$38] |PARTITIONED| - { - -- AGGREGATE |LOCAL| - -- STREAM_SELECT |LOCAL| - -- NESTED_TUPLE_SOURCE |LOCAL| - } + -- STABLE_SORT [$$38(ASC)] |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STABLE_SORT [$$38(ASC)] |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| + -- HYBRID_HASH_JOIN [$$41][$$39] |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$41][$$39] |PARTITIONED| + -- ASSIGN |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- DATASOURCE_SCAN (test.tweetDataset) |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.tweetDataset) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.countryDataset) |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.countryDataset) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/primary-key-index/insert-primary-key-index-with-auto-gen-pk.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/primary-key-index/insert-primary-key-index-with-auto-gen-pk.plan index 792b79d1bd..9a296a5e1c 100644 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/results/primary-key-index/insert-primary-key-index-with-auto-gen-pk.plan +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/primary-key-index/insert-primary-key-index-with-auto-gen-pk.plan @@ -6,7 +6,5 @@ -- ASSIGN |UNPARTITIONED| -- STREAM_PROJECT |UNPARTITIONED| -- ASSIGN |UNPARTITIONED| - -- STREAM_PROJECT |UNPARTITIONED| - -- ASSIGN |UNPARTITIONED| - -- ASSIGN |UNPARTITIONED| - -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| + -- ASSIGN |UNPARTITIONED| + -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/limit-non-pure-function/limit-non-pure-function.1.query.sqlpp similarity index 82% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/limit-non-pure-function/limit-non-pure-function.1.query.sqlpp index 517a996a2b..c23e98cb6d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/limit-non-pure-function/limit-non-pure-function.1.query.sqlpp @@ -17,10 +17,7 @@ * under the License. */ -/* - * Description: No constant folding of OR with a non functional argument - */ - -explain select value true or get_year(current_date()) < x -from range(1, 4) x -order by x; \ No newline at end of file +EXPLAIN +SELECT VALUE A +FROM [1, 2, 3] AS A +LIMIT random() diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/limit-non-pure-function/limit-non-pure-function.2.query.sqlpp similarity index 83% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/limit-non-pure-function/limit-non-pure-function.2.query.sqlpp index 517a996a2b..9c995e0597 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/limit-non-pure-function/limit-non-pure-function.2.query.sqlpp @@ -18,9 +18,11 @@ */ /* - * Description: No constant folding of OR with a non functional argument + * Description: Make sure that the call to random() (rand) is not inlined */ -explain select value true or get_year(current_date()) < x -from range(1, 4) x -order by x; \ No newline at end of file +EXPLAIN +WITH rand AS random() +SELECT A, rand +FROM [1, 2, 3] AS A +LIMIT rand \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp index 517a996a2b..dfb47e90e9 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/constant_folding/constant_folding.7.query.sqlpp @@ -19,6 +19,10 @@ /* * Description: No constant folding of OR with a non functional argument + * + * Update/note: this should be folded as the non functional argument would short-circuited in runtime. Due to + * ASTERIXDB-3103, the constant folding rule doesn't see a non functional argument, but a variable. Hence, + * the expression 'get_year(current_date()) < x' is eliminated */ explain select value true or get_year(current_date()) < x diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.3.query.sqlpp index 278f5d2b1e..1ed0c05586 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.3.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.3.query.sqlpp @@ -25,6 +25,6 @@ SET `compiler.sort.parallel` "false"; SELECT VALUE md.name FROM MyDataset md -LET currentData = {"myDate": current_date()} -WHERE currentData.myDate = current_date() +LET myObject = {"myUid": uuid()} +WHERE myObject.myUid != uuid() ORDER BY md.id diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.4.query.sqlpp index f44921f99b..96095b4f69 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.4.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.4.query.sqlpp @@ -25,6 +25,6 @@ SET `compiler.sort.parallel` "false"; EXPLAIN SELECT VALUE md.name FROM MyDataset md -LET currentData = {"myDate": current_date()} -WHERE currentData.myDate = current_date() +LET myObject = {"myUid": uuid()} +WHERE myObject.myUid != uuid() ORDER BY md.id diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.5.query.sqlpp similarity index 95% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.3.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.5.query.sqlpp index 278f5d2b1e..60f5d4b136 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.3.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.5.query.sqlpp @@ -26,5 +26,5 @@ SET `compiler.sort.parallel` "false"; SELECT VALUE md.name FROM MyDataset md LET currentData = {"myDate": current_date()} -WHERE currentData.myDate = current_date() +WHERE currentData.myDate != date("1980-09-10") ORDER BY md.id diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.6.query.sqlpp similarity index 95% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.4.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.6.query.sqlpp index f44921f99b..5dae0bc81c 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.4.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/objects/load-record-fields/load-record-fields.6.query.sqlpp @@ -26,5 +26,5 @@ EXPLAIN SELECT VALUE md.name FROM MyDataset md LET currentData = {"myDate": current_date()} -WHERE currentData.myDate = current_date() +WHERE currentData.myDate != date("1980-09-10") ORDER BY md.id diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/limit-non-pure-function/limit-non-pure-function.1.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/limit-non-pure-function/limit-non-pure-function.1.plan new file mode 100644 index 0000000000..d5f6401e58 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/limit-non-pure-function/limit-non-pure-function.1.plan @@ -0,0 +1,14 @@ +distribute result [$$A] +-- DISTRIBUTE_RESULT |UNPARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| + project ([$$A]) + -- STREAM_PROJECT |UNPARTITIONED| + limit switch-case(gt($$13, 0), true, $$13, 0) + -- STREAM_LIMIT |UNPARTITIONED| + assign [$$13] <- [treat-as-integer(random())] + -- ASSIGN |UNPARTITIONED| + unnest $$A <- scan-collection(array: [ 1, 2, 3 ]) + -- UNNEST |UNPARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/limit-non-pure-function/limit-non-pure-function.2.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/limit-non-pure-function/limit-non-pure-function.2.plan new file mode 100644 index 0000000000..54efcb6035 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/limit-non-pure-function/limit-non-pure-function.2.plan @@ -0,0 +1,18 @@ +distribute result [$$24] +-- DISTRIBUTE_RESULT |UNPARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| + project ([$$24]) + -- STREAM_PROJECT |UNPARTITIONED| + limit switch-case(gt($$25, 0), true, $$25, 0) + -- STREAM_LIMIT |UNPARTITIONED| + project ([$$25, $$24]) + -- STREAM_PROJECT |UNPARTITIONED| + assign [$$25, $$24] <- [treat-as-integer($$21), {"A": $$A, "rand": $$21}] + -- ASSIGN |UNPARTITIONED| + unnest $$A <- scan-collection(array: [ 1, 2, 3 ]) + -- UNNEST |UNPARTITIONED| + assign [$$21] <- [random()] + -- ASSIGN |UNPARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/constant_folding/constant_folding.7.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/constant_folding/constant_folding.7.plan index 775cc2a6c6..52ccc4f648 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/constant_folding/constant_folding.7.plan +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/constant_folding/constant_folding.7.plan @@ -2,10 +2,10 @@ distribute result [$$16] -- DISTRIBUTE_RESULT |UNPARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| - project ([$$16]) - -- STREAM_PROJECT |UNPARTITIONED| - assign [$$16] <- [or(true, lt(get-year(current-date()), $$x))] - -- ASSIGN |UNPARTITIONED| + assign [$$16] <- [true] + -- ASSIGN |UNPARTITIONED| + project ([]) + -- STREAM_PROJECT |UNPARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED| order (ASC, $$x) @@ -15,4 +15,4 @@ distribute result [$$16] unnest $$x <- range(1, 4) -- UNNEST |UNPARTITIONED| empty-tuple-source - -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| \ No newline at end of file + -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.4.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.4.plan index 5ac69f34ba..a4b2ce0e92 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.4.plan +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.4.plan @@ -4,23 +4,19 @@ distribute result [$$28] -- ONE_TO_ONE_EXCHANGE |PARTITIONED| project ([$$28]) -- STREAM_PROJECT |PARTITIONED| - exchange - -- SORT_MERGE_EXCHANGE [$$30(ASC) ] |PARTITIONED| - project ([$$28, $$30]) + assign [$$28] <- [$$md.getField("name")] + -- ASSIGN |PARTITIONED| + project ([$$md]) -- STREAM_PROJECT |PARTITIONED| - select (eq($$31, current-date())) - -- STREAM_SELECT |PARTITIONED| - assign [$$31] <- [current-date()] - -- ASSIGN |PARTITIONED| - project ([$$30, $$28]) - -- STREAM_PROJECT |PARTITIONED| - assign [$$28] <- [$$md.getField("name")] - -- ASSIGN |PARTITIONED| + exchange + -- SORT_MERGE_EXCHANGE [$$30(ASC) ] |PARTITIONED| + select (neq(uuid(), uuid())) + -- STREAM_SELECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$30, $$md] <- test.MyDataset + -- DATASOURCE_SCAN |PARTITIONED| exchange -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - data-scan []<-[$$30, $$md] <- test.MyDataset - -- DATASOURCE_SCAN |PARTITIONED| - exchange - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - empty-tuple-source - -- EMPTY_TUPLE_SOURCE |PARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.5.adm new file mode 100644 index 0000000000..ac2dc970eb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.5.adm @@ -0,0 +1,2 @@ +"Alice" +"Bob" diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.6.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.6.plan new file mode 100644 index 0000000000..600472738a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/objects/load-record-fields/load-record-fields.6.plan @@ -0,0 +1,22 @@ +distribute result [$$28] +-- DISTRIBUTE_RESULT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$28]) + -- STREAM_PROJECT |PARTITIONED| + assign [$$28] <- [$$md.getField("name")] + -- ASSIGN |PARTITIONED| + project ([$$md]) + -- STREAM_PROJECT |PARTITIONED| + exchange + -- SORT_MERGE_EXCHANGE [$$30(ASC) ] |PARTITIONED| + select (neq(current-date(), date: { 1980-09-10 })) + -- STREAM_SELECT |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$30, $$md] <- test.MyDataset + -- DATASOURCE_SCAN |PARTITIONED| + exchange + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java index e4df39724d..b44607513d 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java @@ -249,8 +249,8 @@ public class IsomorphismOperatorVisitor implements ILogicalOperatorVisitor<Boole return Boolean.FALSE; } OrderOperator orderOpArg = (OrderOperator) copyAndSubstituteVar(op, arg); - boolean isomorphic = compareIOrderAndExpressions(op.getOrderExpressions(), orderOpArg.getOrderExpressions()); - return isomorphic; + return op.getTopK() == orderOpArg.getTopK() + && compareIOrderAndExpressions(op.getOrderExpressions(), orderOpArg.getOrderExpressions()); } @Override diff --git a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java index f072312f54..be169f42d6 100644 --- a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java +++ b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java @@ -27,7 +27,6 @@ import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; -import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities; /** @@ -83,7 +82,7 @@ public class InlineSingleReferenceVariablesRule extends InlineVariablesRule { } @Override - protected boolean performBottomUpAction(AbstractLogicalOperator op) throws AlgebricksException { + protected boolean performBottomUpAction(ILogicalOperator op) throws AlgebricksException { usedVars.clear(); VariableUtilities.getUsedVariables(op, usedVars); for (LogicalVariable var : usedVars) { diff --git a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java index cec830ed45..6ad50e6340 100644 --- a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java +++ b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.Set; import org.apache.commons.lang3.mutable.Mutable; +import org.apache.commons.lang3.mutable.MutableInt; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; @@ -37,7 +38,6 @@ import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; -import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans; import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator; import org.apache.hyracks.algebricks.core.algebra.operators.logical.WindowOperator; @@ -80,6 +80,10 @@ public class InlineVariablesRule implements IAlgebraicRewriteRule { // set to prevent re-visiting a subtree from the other sides. Operators with multiple outputs are the ones that // could be re-visited twice or more (e.g. replicate and split operators) private final Map<ILogicalOperator, Boolean> subTreesDone = new HashMap<>(); + // temporary set to get used variables + private final List<LogicalVariable> usedVars = new ArrayList<>(); + // map of variables and the counts of how many times they were used + private final Map<LogicalVariable, MutableInt> usedVariableCounter = new HashMap<>(); @Override public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) { @@ -108,9 +112,10 @@ public class InlineVariablesRule implements IAlgebraicRewriteRule { varAssignRhs.clear(); inlineVisitor.setContext(context); subTreesDone.clear(); + usedVariableCounter.clear(); } - protected boolean performBottomUpAction(AbstractLogicalOperator op) throws AlgebricksException { + protected boolean performBottomUpAction(ILogicalOperator op) throws AlgebricksException { // Only inline variables in operators that can deal with arbitrary expressions. if (!op.requiresVariableReferenceExpressions()) { inlineVisitor.setOperator(op); @@ -125,27 +130,33 @@ public class InlineVariablesRule implements IAlgebraicRewriteRule { private boolean inlineVariables(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { - AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); + ILogicalOperator op = opRef.getValue(); // check if you have already visited the subtree rooted at this operator if (subTreesDone.containsKey(op)) { return subTreesDone.get(op); } + + // compute how many times a variable was used + computeUsedVariableCount(op); + // Update mapping from variables to expressions during top-down traversal. if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) { AssignOperator assignOp = (AssignOperator) op; List<LogicalVariable> vars = assignOp.getVariables(); List<Mutable<ILogicalExpression>> exprs = assignOp.getExpressions(); for (int i = 0; i < vars.size(); i++) { + LogicalVariable variable = vars.get(i); ILogicalExpression expr = exprs.get(i).getValue(); // Ignore functions that are either in the doNotInline set or are non-functional if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) { AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr; - if (doNotInlineFuncs.contains(funcExpr.getFunctionIdentifier()) || !funcExpr.isFunctional()) { + if (doNotInlineFuncs.contains(funcExpr.getFunctionIdentifier()) + || skipNonFunctional(variable, funcExpr)) { continue; } } - varAssignRhs.put(vars.get(i), exprs.get(i).getValue()); + varAssignRhs.put(variable, expr); } } @@ -200,6 +211,54 @@ public class InlineVariablesRule implements IAlgebraicRewriteRule { return modified; } + /** + * Skip inlining non-pure functions if they are referenced more than once. + * + * @param variable assigned to the function + * @param expr of the assigned variable (potentially a non-pure one) + * @return true if inlining should be skipped, false if the non-pure functions can be inlined + */ + private boolean skipNonFunctional(LogicalVariable variable, AbstractFunctionCallExpression expr) { + return !expr.isFunctional() && usedVariableCounter.containsKey(variable) + && usedVariableCounter.get(variable).getValue() > 1; + } + + /** + * Computes how many times the variables in the plan were used. The variable counts + * help to determine if whether we can inline non-pure functions (e.g., current_date()) or not. + * Inlining non-pure functions can help unlocking other optimizations. For instance, we can pushdown + * limits and selects into data-scans or avoid create_query_uuid(). + * <p> + * Non-pure functions can only be inlined if they were referenced once. For example, + * in the following plan, the function current_date() (or variable $$x) can be inlined as it was used + * once by select: + * <p> + * Before: + * select (get_year($$x) > 2000) + * * assign [$$x] <- [current_date()] + * After: + * select (get_year(current_date()) > 2000) + * <p> + * However, the following current_date() (or variable $$x) cannot be inlined as it referenced twice: + * select (get_year($$x) > 2000 && get_month($$x) == 11) + * * assign [$$x] <- [current_date()] + * + * @param op the logical operator that potentially uses one or more variables + */ + private void computeUsedVariableCount(ILogicalOperator op) throws AlgebricksException { + if (op.getOperatorTag() == LogicalOperatorTag.SUBPLAN) { + // to avoid count a variable twice as this routine traverses the subplan + return; + } + + usedVars.clear(); + VariableUtilities.getUsedVariables(op, usedVars); + for (LogicalVariable variable : usedVars) { + MutableInt counter = usedVariableCounter.computeIfAbsent(variable, k -> new MutableInt(0)); + counter.increment(); + } + } + public static class InlineVariablesVisitor extends LogicalExpressionReferenceTransformVisitor implements ILogicalExpressionReferenceTransform {
