This is an automated email from the ASF dual-hosted git repository.

vsarathy1 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 61cd52011c [ASTERIXDB-3543][COMP] Do not inline expressions containing 
collection record
61cd52011c is described below

commit 61cd52011cb21713e4d0b79031d5e718d933d6c7
Author: Vijay Sarathy <[email protected]>
AuthorDate: Fri Jan 31 13:30:39 2025 -0800

    [ASTERIXDB-3543][COMP] Do not inline expressions containing collection 
record
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Inlining expressions involving a collection's record
    (e.g. field access $$col_rec.getField("x")) into the final
    assign op (SELECT's assign op) can make query execution take
    a long time especially if the records are not small. This is
    because the $$col_rec has to be kept along in the frames from
    the originating data-scan all the way through the operators
    to the final assign op. This can prevent early projections
    that aims to reduce the tuples size and increase the number
    of tuples in a given frame.
    
    This patch is to prevent inlining under certain conditions.
    If the path from the source operator producing the expression
    to the final assign op contains:
    1. Operators with >= 2 inputs (e.g. JOINS).
    2. ORDER-BY operator.
    3. UNNEST operator.
    
    Then we do not inline those expressions involving the data-scan
    produced variables.
    
    Ext-ref: MB-64252
    
    Change-Id: Ia818f11e45bf66e3b4807938bfb635c70fa9c496
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19351
    Reviewed-by: Vijay Sarathy <[email protected]>
    Reviewed-by: Ali Alsuliman <[email protected]>
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
---
 .../rules/PushLimitIntoPrimarySearchRule.java      |  2 +-
 .../inline-single-reference_1.sqlpp                | 42 +++++++++++
 .../inline-single-reference_2.sqlpp                | 39 +++++++++++
 .../inline-single-reference_3.sqlpp                | 39 +++++++++++
 .../inline-single-reference_4.sqlpp                | 41 +++++++++++
 .../inline-single-reference_1.plan                 | 12 ++++
 .../inline-single-reference_2.plan                 |  9 +++
 .../inline-single-reference_3.plan                 | 10 +++
 .../inline-single-reference_4.plan                 | 23 ++++++
 .../ngram-jaccard-check_04.plan                    |  6 +-
 .../inverted-index-join/ngram-jaccard_04.plan      |  6 +-
 .../inverted-index-join/word-jaccard-check_04.plan |  6 +-
 .../inverted-index-join/word-jaccard_04.plan       |  6 +-
 .../rules/InlineSingleReferenceVariablesRule.java  | 22 +++++-
 .../rewriter/rules/InlineVariablesRule.java        | 81 ++++++++++++++++++++--
 15 files changed, 328 insertions(+), 16 deletions(-)

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushLimitIntoPrimarySearchRule.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushLimitIntoPrimarySearchRule.java
index d95a95257a..525624b968 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushLimitIntoPrimarySearchRule.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/PushLimitIntoPrimarySearchRule.java
@@ -117,7 +117,7 @@ public class PushLimitIntoPrimarySearchRule implements 
IAlgebraicRewriteRule {
             extractInlinableVariablesFromAssign(assignOp, selectedVariables, 
varAssignRhs);
             if (!varAssignRhs.isEmpty()) {
                 if (inlineVisitor == null) {
-                    inlineVisitor = new 
InlineVariablesRule.InlineVariablesVisitor(varAssignRhs);
+                    inlineVisitor = new 
InlineVariablesRule.InlineVariablesVisitor(varAssignRhs, null);
                     inlineVisitor.setContext(context);
                     inlineVisitor.setOperator(select);
                 }
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_1.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_1.sqlpp
new file mode 100644
index 0000000000..ceeb15363f
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_1.sqlpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description  : Test inline single reference.
+* Expected Res : Success
+* Date         : 01/31/2015
+*/
+
+drop  dataverse test if exists;
+create  dataverse test;
+
+use test;
+
+create type TT as closed {
+  id : integer
+};
+
+create  dataset t1(TT) primary key id;
+
+SELECT a.doc,
+       a.name,
+       CASE WHEN h.uname IS VALUED THEN h.uname ELSE NULL END AS uname
+FROM t1 a
+UNNEST a.history h
+WHERE h.ts >= "1999-03-02T08:04:57.750006+00:00";
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_2.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_2.sqlpp
new file mode 100644
index 0000000000..d5a6c8025c
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_2.sqlpp
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description  : Test inline single reference.
+* Expected Res : Success
+* Date         : 01/31/2015
+*/
+
+drop  dataverse test if exists;
+create  dataverse test;
+
+use test;
+
+create type TT as closed {
+  id : integer
+};
+
+create  dataset t1(TT) primary key id;
+
+SELECT a.doc,
+       a.name
+FROM t1 a
+WHERE a.new = 1;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_3.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_3.sqlpp
new file mode 100644
index 0000000000..dcec0bcf9d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_3.sqlpp
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description  : Test inline single reference.
+* Expected Res : Success
+* Date         : 01/31/2015
+*/
+
+drop  dataverse test if exists;
+create  dataverse test;
+
+use test;
+
+create type TT as closed {
+  id : integer
+};
+
+create  dataset t1(TT) primary key id;
+
+SELECT a.doc,
+       a.name
+FROM t1 a
+ORDER BY a.id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_4.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_4.sqlpp
new file mode 100644
index 0000000000..25bba2b6d5
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inline-single-reference/inline-single-reference_4.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description  : Test inline single reference.
+* Expected Res : Success
+* Date         : 01/31/2015
+*/
+
+drop  dataverse test if exists;
+create  dataverse test;
+
+use test;
+
+create type TT as closed {
+  id : integer
+};
+
+create  dataset t1(TT) primary key id;
+create  dataset t2(TT) primary key id;
+
+SELECT a.doc,
+       b.name
+FROM t1 a, t2 b
+WHERE a.jn = b.jn
+ORDER BY a.id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_1.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_1.plan
new file mode 100644
index 0000000000..3745304675
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_1.plan
@@ -0,0 +1,12 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- ASSIGN  |PARTITIONED|
+      -- STREAM_SELECT  |PARTITIONED|
+        -- ASSIGN  |PARTITIONED|
+          -- UNNEST  |PARTITIONED|
+            -- ASSIGN  |PARTITIONED|
+              -- STREAM_PROJECT  |PARTITIONED|
+                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                  -- DATASOURCE_SCAN (test.t1)  |PARTITIONED|
+                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_2.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_2.plan
new file mode 100644
index 0000000000..3c9b8ec26d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_2.plan
@@ -0,0 +1,9 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- ASSIGN  |PARTITIONED|
+      -- STREAM_SELECT  |PARTITIONED|
+        -- STREAM_PROJECT  |PARTITIONED|
+          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+            -- DATASOURCE_SCAN (test.t1)  |PARTITIONED|
+              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_3.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_3.plan
new file mode 100644
index 0000000000..aa769d23a6
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_3.plan
@@ -0,0 +1,10 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- ASSIGN  |PARTITIONED|
+      -- STREAM_PROJECT  |PARTITIONED|
+        -- SORT_MERGE_EXCHANGE [$$18(ASC) ]  |PARTITIONED|
+          -- STABLE_SORT [$$18(ASC)]  |PARTITIONED|
+            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+              -- DATASOURCE_SCAN (test.t1)  |PARTITIONED|
+                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_4.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_4.plan
new file mode 100644
index 0000000000..30d22c2ab2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inline-single-reference/inline-single-reference_4.plan
@@ -0,0 +1,23 @@
+-- DISTRIBUTE_RESULT  |PARTITIONED|
+  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+    -- ASSIGN  |PARTITIONED|
+      -- STREAM_PROJECT  |PARTITIONED|
+        -- SORT_MERGE_EXCHANGE [$$33(ASC) ]  |PARTITIONED|
+          -- STABLE_SORT [$$33(ASC)]  |PARTITIONED|
+            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+              -- STREAM_PROJECT  |PARTITIONED|
+                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                  -- HYBRID_HASH_JOIN [$$35][$$36]  |PARTITIONED|
+                    -- HASH_PARTITION_EXCHANGE [$$35]  |PARTITIONED|
+                      -- ASSIGN  |PARTITIONED|
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          -- DATASOURCE_SCAN (test.t1)  |PARTITIONED|
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                    -- HASH_PARTITION_EXCHANGE [$$36]  |PARTITIONED|
+                      -- ASSIGN  |PARTITIONED|
+                        -- STREAM_PROJECT  |PARTITIONED|
+                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                            -- DATASOURCE_SCAN (test.t2)  |PARTITIONED|
+                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard-check_04.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard-check_04.plan
index e06ae9f533..243faab6c3 100644
--- 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard-check_04.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard-check_04.plan
@@ -5,9 +5,11 @@
         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
           -- HYBRID_HASH_JOIN [$$61][$$49]  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$61]  |PARTITIONED|
-              -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+              -- ASSIGN  |PARTITIONED|
                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                  -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$49]  |PARTITIONED|
               -- STREAM_SELECT  |PARTITIONED|
                 -- ASSIGN  |PARTITIONED|
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard_04.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard_04.plan
index a4041cd46f..23820140d1 100644
--- 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard_04.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/ngram-jaccard_04.plan
@@ -5,9 +5,11 @@
         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
           -- HYBRID_HASH_JOIN [$$61][$$48]  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$61]  |PARTITIONED|
-              -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+              -- ASSIGN  |PARTITIONED|
                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                  -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$48]  |PARTITIONED|
               -- STREAM_SELECT  |PARTITIONED|
                 -- ASSIGN  |PARTITIONED|
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard-check_04.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard-check_04.plan
index c48868f3a4..623f0c5cff 100644
--- 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard-check_04.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard-check_04.plan
@@ -5,9 +5,11 @@
         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
           -- HYBRID_HASH_JOIN [$$61][$$49]  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$61]  |PARTITIONED|
-              -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+              -- ASSIGN  |PARTITIONED|
                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                  -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$49]  |PARTITIONED|
               -- STREAM_SELECT  |PARTITIONED|
                 -- ASSIGN  |PARTITIONED|
diff --git 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard_04.plan
 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard_04.plan
index e56f7a6943..2616a991ef 100644
--- 
a/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard_04.plan
+++ 
b/asterixdb/asterix-app/src/test/resources/optimizerts/results/inverted-index-join/word-jaccard_04.plan
@@ -5,9 +5,11 @@
         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
           -- HYBRID_HASH_JOIN [$$61][$$48]  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$61]  |PARTITIONED|
-              -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+              -- ASSIGN  |PARTITIONED|
                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
-                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+                  -- DATASOURCE_SCAN (test.DBLP)  |PARTITIONED|
+                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                      -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
             -- HASH_PARTITION_EXCHANGE [$$48]  |PARTITIONED|
               -- STREAM_SELECT  |PARTITIONED|
                 -- ASSIGN  |PARTITIONED|
diff --git 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java
 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java
index be169f42d6..77a97c8456 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java
@@ -19,13 +19,16 @@
 package org.apache.hyracks.algebricks.rewriter.rules;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
 import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
 
@@ -50,15 +53,18 @@ import 
org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.Var
 public class InlineSingleReferenceVariablesRule extends InlineVariablesRule {
 
     // Maps from variable to a list of operators using that variable.
-    protected Map<LogicalVariable, List<ILogicalOperator>> usedVarsMap =
-            new LinkedHashMap<LogicalVariable, List<ILogicalOperator>>();
-    protected List<LogicalVariable> usedVars = new 
ArrayList<LogicalVariable>();
+    protected Map<LogicalVariable, List<ILogicalOperator>> usedVarsMap = new 
LinkedHashMap<>();
+    protected List<LogicalVariable> usedVars = new ArrayList<>();
+    protected Set<LogicalVariable> usedResultVars = new HashSet<>();
+    protected Set<LogicalVariable> docRefVars = new HashSet<>();
 
     @Override
     protected void prepare(IOptimizationContext context) {
         super.prepare(context);
         usedVarsMap.clear();
         usedVars.clear();
+        usedResultVars.clear();
+        docRefVars.clear();
     }
 
     @Override
@@ -71,6 +77,8 @@ public class InlineSingleReferenceVariablesRule extends 
InlineVariablesRule {
                 if (!op.requiresVariableReferenceExpressions()) {
                     inlineVisitor.setOperator(op);
                     inlineVisitor.setTargetVariable(entry.getKey());
+                    inlineVisitor.setUsedResultVars(usedResultVars);
+                    inlineVisitor.setDocRefVars(docRefVars);
                     if (op.accept(inlineVisitor, inlineVisitor)) {
                         modified = true;
                     }
@@ -83,6 +91,14 @@ public class InlineSingleReferenceVariablesRule extends 
InlineVariablesRule {
 
     @Override
     protected boolean performBottomUpAction(ILogicalOperator op) throws 
AlgebricksException {
+        if (op.getOperatorTag() == LogicalOperatorTag.DISTRIBUTE_RESULT) {
+            VariableUtilities.getUsedVariables(op, usedResultVars);
+        }
+        if (op.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN
+                || op.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP) {
+            // TODO: We should find a way to exclude primary keys from the 
produced variables.
+            VariableUtilities.getProducedVariables(op, docRefVars);
+        }
         usedVars.clear();
         VariableUtilities.getUsedVariables(op, usedVars);
         for (LogicalVariable var : usedVars) {
diff --git 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
index 5970f4ff3e..f344343805 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
@@ -71,11 +71,14 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
 
     // map of variables that could be replaced by their producing expression.
     // populated during the top-down sweep of the plan.
-    private Map<LogicalVariable, ILogicalExpression> varAssignRhs = new 
HashMap<>();
+    private final Map<LogicalVariable, ILogicalExpression> varAssignRhs = new 
HashMap<>();
+    // map of variables to the operator that produces the expression.
+    // populated during the top-down sweep of the plan.
+    private final Map<LogicalVariable, ILogicalOperator> varAssignOp = new 
HashMap<>();
     // visitor for replacing variable reference expressions with their 
originating expression.
-    protected InlineVariablesVisitor inlineVisitor = new 
InlineVariablesVisitor(varAssignRhs);
+    protected final InlineVariablesVisitor inlineVisitor = new 
InlineVariablesVisitor(varAssignRhs, varAssignOp);
     // set of FunctionIdentifiers that we should not inline.
-    protected Set<FunctionIdentifier> doNotInlineFuncs = new HashSet<>();
+    protected final Set<FunctionIdentifier> doNotInlineFuncs = new HashSet<>();
     // indicates whether the rule has been run
     private boolean hasRun = false;
     // set to prevent re-visiting a subtree from the other sides. Operators 
with multiple outputs are the ones that
@@ -112,6 +115,7 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
 
     protected void prepare(IOptimizationContext context) {
         varAssignRhs.clear();
+        varAssignOp.clear();
         inlineVisitor.setContext(context);
         subTreesDone.clear();
         usedVariableCounter.clear();
@@ -160,6 +164,7 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
                     }
                 }
                 varAssignRhs.put(variable, expr);
+                varAssignOp.put(variable, assignOp);
             }
         }
 
@@ -181,6 +186,7 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
                 if (varMap != null && !varMap.isEmpty() && 
Collections.max(varMap.values()) > context
                         
.getPhysicalOptimizationConfig().getMaxVariableOccurrencesForInlining()) {
                     varAssignRhs.remove(variable);
+                    varAssignOp.remove(variable);
                 }
             }
         }
@@ -198,6 +204,7 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
                     Set<LogicalVariable> producedVars = new HashSet<>();
                     VariableUtilities.getProducedVariables(root.getValue(), 
producedVars);
                     varAssignRhs.keySet().removeAll(producedVars);
+                    varAssignOp.keySet().removeAll(producedVars);
                 }
             }
         }
@@ -208,6 +215,7 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
             Set<LogicalVariable> rightLiveVars = new HashSet<>();
             
VariableUtilities.getLiveVariables(op.getInputs().get(1).getValue(), 
rightLiveVars);
             varAssignRhs.keySet().removeAll(rightLiveVars);
+            varAssignOp.keySet().removeAll(rightLiveVars);
         }
 
         if (performBottomUpAction(op)) {
@@ -302,15 +310,21 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
             implements ILogicalExpressionReferenceTransform {
 
         private final Map<LogicalVariable, ILogicalExpression> varAssignRhs;
+        private final Map<LogicalVariable, ILogicalOperator> varAssignOp;
         private final Set<LogicalVariable> liveVars = new HashSet<>();
         private final List<LogicalVariable> rhsUsedVars = new ArrayList<>();
         private ILogicalOperator op;
         private IOptimizationContext context;
         // If set, only replace this variable reference.
         private LogicalVariable targetVar;
+        private Set<LogicalVariable> usedResultVars = new HashSet<>();
+        private Set<LogicalVariable> docRefVars = new HashSet<>();
+        private List<LogicalVariable> opProducedVars = new ArrayList<>();
 
-        public InlineVariablesVisitor(Map<LogicalVariable, ILogicalExpression> 
varAssignRhs) {
+        public InlineVariablesVisitor(Map<LogicalVariable, ILogicalExpression> 
varAssignRhs,
+                Map<LogicalVariable, ILogicalOperator> varAssignOp) {
             this.varAssignRhs = varAssignRhs;
+            this.varAssignOp = varAssignOp;
         }
 
         public void setTargetVariable(LogicalVariable targetVar) {
@@ -326,6 +340,14 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
             liveVars.clear();
         }
 
+        public void setUsedResultVars(Set<LogicalVariable> usedResultVars) {
+            this.usedResultVars = usedResultVars;
+        }
+
+        public void setDocRefVars(Set<LogicalVariable> docRefVars) {
+            this.docRefVars = docRefVars;
+        }
+
         @Override
         public Boolean visitWindowOperator(WindowOperator op, 
ILogicalExpressionReferenceTransform arg)
                 throws AlgebricksException {
@@ -369,6 +391,10 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
             }
 
             ILogicalExpression rhs = varAssignRhs.get(var);
+            ILogicalOperator rhsOp = null;
+            if (varAssignOp != null) {
+                rhsOp = varAssignOp.get(var);
+            }
             if (rhs == null) {
                 // Variable was not produced by an assign.
                 return false;
@@ -384,11 +410,58 @@ public class InlineVariablesRule implements 
IAlgebraicRewriteRule {
                 if (!liveVars.contains(rhsUsedVar)) {
                     return false;
                 }
+                // Do not inline an expression that contains the doc variable 
(e.g. $$abc) if there is a “sensitive”
+                // operator in between this op and the op that produced the 
expression. A "sensitive" operator is one
+                // that would be more expensive if the size of the data 
flowing through it is bigger.
+                if (docRefVars.contains(rhsUsedVar) && 
opProducedVarsInResultVars(op)
+                        && !safeToInlineVariables(op, rhsOp)) {
+                    return false;
+                }
             }
 
             // Replace variable reference with a clone of the rhs expr.
             exprRef.setValue(rhs.cloneExpression());
             return true;
         }
+
+        private boolean opProducedVarsInResultVars(ILogicalOperator op) throws 
AlgebricksException {
+            opProducedVars.clear();
+            VariableUtilities.getProducedVariables(op, opProducedVars);
+            for (LogicalVariable pVar : opProducedVars) {
+                if (usedResultVars.contains(pVar)) {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        private boolean safeToInlineVariables(ILogicalOperator op, 
ILogicalOperator rhsOp) throws AlgebricksException {
+            ILogicalOperator currentOp = op;
+            while (currentOp != null) {
+                if (currentOp.getOperatorTag() == 
LogicalOperatorTag.DATASOURCESCAN
+                        || currentOp.getOperatorTag() == 
LogicalOperatorTag.UNNEST_MAP) {
+                    break;
+                }
+                if (rhsOp != null && currentOp == rhsOp) {
+                    break;
+
+                }
+                if (sensitiveOpForInlining(currentOp)) {
+                    return false;
+                }
+                currentOp = currentOp.getInputs().get(0).getValue();
+            }
+            return true;
+        }
+
+        private boolean sensitiveOpForInlining(ILogicalOperator op) {
+            if (op.getInputs().size() != 1) {
+                return true;
+            }
+            return switch (op.getOperatorTag()) {
+                case ORDER, UNNEST -> true;
+                default -> false;
+            };
+        }
     }
 }

Reply via email to