Luo Chen has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/2043

Change subject: [ASTERIXDB-2119][COMP] Fix variable ordering of projects
......................................................................

[ASTERIXDB-2119][COMP] Fix variable ordering of projects

- user model changes: no
- storage format changes: no
- interface changes: no

Details:

The current IntroduceProjectsRule implementation uses HashSet to
calculate projected variables, which makes the ordering of output
variables unpreditable. This patch fixes this undesired behavior by
using LinkedHashSet to ensure the project variables have the same
ordering from the original variables.

Change-Id: Id96a5fe048dd11b7f2e97f4d4a802736ba5ba003
---
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
M 
hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
2 files changed, 12 insertions(+), 12 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/43/2043/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
index 6463d6d..daa29c4 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
@@ -30,7 +30,7 @@
                   -- STREAM_PROJECT  |PARTITIONED|
                     assign [$$18] <- 
[substring($$22.getField("department_id"), 1)]
                     -- ASSIGN  |PARTITIONED|
-                      project ([$$22, $$15])
+                      project ([$$15, $$22])
                       -- STREAM_PROJECT  |PARTITIONED|
                         assign [$$15, $$22] <- [$$e.getField("salary"), 
$$e.getField("dept")]
                         -- ASSIGN  |PARTITIONED|
diff --git 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
index d17e021..44a7b57 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
@@ -21,7 +21,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -48,14 +48,14 @@
  * that the overall cost of the plan is reduced since project operators also 
add a cost.
  */
 public class IntroduceProjectsRule implements IAlgebraicRewriteRule {
-
-    private final Set<LogicalVariable> usedVars = new HashSet<>();
-    private final Set<LogicalVariable> liveVars = new HashSet<>();
-    private final Set<LogicalVariable> producedVars = new HashSet<>();
+    // preserve the original variable order using linked hash set
+    private final Set<LogicalVariable> usedVars = new LinkedHashSet<>();
+    private final Set<LogicalVariable> liveVars = new LinkedHashSet<>();
+    private final Set<LogicalVariable> producedVars = new LinkedHashSet<>();
     private final List<LogicalVariable> projectVars = new ArrayList<>();
     protected boolean hasRun = false;
     // Keep track of used variables after the current operator, including used 
variables in itself.
-    private final Map<AbstractLogicalOperator, HashSet<LogicalVariable>> 
allUsedVarsAfterOpMap = new HashMap<>();
+    private final Map<AbstractLogicalOperator, Set<LogicalVariable>> 
allUsedVarsAfterOpMap = new HashMap<>();
 
     @Override
     public boolean rewritePost(Mutable<ILogicalOperator> opRef, 
IOptimizationContext context) {
@@ -74,7 +74,7 @@
         // This is necessary since introduceProjects() may generate a wrong 
project if it doesn't have the information
         // for all paths in the plan in case there are two or more branches 
since it can only deal one path at a time.
         // So, a variable used in one path might be removed while the method 
traverses another path.
-        Set<LogicalVariable> parentUsedVars = new HashSet<>();
+        Set<LogicalVariable> parentUsedVars = new LinkedHashSet<>();
         collectUsedVars(opRef, parentUsedVars);
 
         // Introduce projects
@@ -89,7 +89,7 @@
     protected void collectUsedVars(Mutable<ILogicalOperator> opRef, 
Set<LogicalVariable> parentUsedVars)
             throws AlgebricksException {
         AbstractLogicalOperator op = (AbstractLogicalOperator) 
opRef.getValue();
-        HashSet<LogicalVariable> usedVarsPerOp = new HashSet<>();
+        Set<LogicalVariable> usedVarsPerOp = new LinkedHashSet<>();
         VariableUtilities.getUsedVariables(op, usedVarsPerOp);
         usedVarsPerOp.addAll(parentUsedVars);
 
@@ -117,7 +117,7 @@
         // In the top-down pass, maintain a set of variables that are used in 
op and all its parents.
         // This is a necessary step for the newly created project operator 
during this optimization,
         // since we already have all information from collectUsedVars() method 
for the other operators.
-        HashSet<LogicalVariable> parentsUsedVars = new HashSet<>();
+        Set<LogicalVariable> parentsUsedVars = new LinkedHashSet<>();
         parentsUsedVars.addAll(parentUsedVars);
         parentsUsedVars.addAll(usedVars);
 
@@ -180,8 +180,8 @@
                 boolean eliminateProject = true;
                 // For UnionAll the variables must also be in exactly the 
correct order.
                 if (parentOp.getOperatorTag() == LogicalOperatorTag.UNIONALL) {
-                    eliminateProject = 
canEliminateProjectBelowUnion((UnionAllOperator) parentOp, projectOp,
-                            parentInputIndex);
+                    eliminateProject =
+                            canEliminateProjectBelowUnion((UnionAllOperator) 
parentOp, projectOp, parentInputIndex);
                 }
                 if (eliminateProject) {
                     // The existing project has become useless. Remove it.

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/2043
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id96a5fe048dd11b7f2e97f4d4a802736ba5ba003
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Luo Chen <[email protected]>

Reply via email to