Luo Chen has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/2043
Change subject: [ASTERIXDB-2119][COMP] Fix variable ordering of projects
......................................................................
[ASTERIXDB-2119][COMP] Fix variable ordering of projects
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
The current IntroduceProjectsRule implementation uses HashSet to
calculate projected variables, which makes the ordering of output
variables unpreditable. This patch fixes this undesired behavior by
using LinkedHashSet to ensure the project variables have the same
ordering from the original variables.
Change-Id: Id96a5fe048dd11b7f2e97f4d4a802736ba5ba003
---
M
asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
M
hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
2 files changed, 12 insertions(+), 12 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/43/2043/1
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
index 6463d6d..daa29c4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/explain/explain_field_access/explain_field_access.1.adm
@@ -30,7 +30,7 @@
-- STREAM_PROJECT |PARTITIONED|
assign [$$18] <-
[substring($$22.getField("department_id"), 1)]
-- ASSIGN |PARTITIONED|
- project ([$$22, $$15])
+ project ([$$15, $$22])
-- STREAM_PROJECT |PARTITIONED|
assign [$$15, $$22] <- [$$e.getField("salary"),
$$e.getField("dept")]
-- ASSIGN |PARTITIONED|
diff --git
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
index d17e021..44a7b57 100644
---
a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
+++
b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
@@ -21,7 +21,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -48,14 +48,14 @@
* that the overall cost of the plan is reduced since project operators also
add a cost.
*/
public class IntroduceProjectsRule implements IAlgebraicRewriteRule {
-
- private final Set<LogicalVariable> usedVars = new HashSet<>();
- private final Set<LogicalVariable> liveVars = new HashSet<>();
- private final Set<LogicalVariable> producedVars = new HashSet<>();
+ // preserve the original variable order using linked hash set
+ private final Set<LogicalVariable> usedVars = new LinkedHashSet<>();
+ private final Set<LogicalVariable> liveVars = new LinkedHashSet<>();
+ private final Set<LogicalVariable> producedVars = new LinkedHashSet<>();
private final List<LogicalVariable> projectVars = new ArrayList<>();
protected boolean hasRun = false;
// Keep track of used variables after the current operator, including used
variables in itself.
- private final Map<AbstractLogicalOperator, HashSet<LogicalVariable>>
allUsedVarsAfterOpMap = new HashMap<>();
+ private final Map<AbstractLogicalOperator, Set<LogicalVariable>>
allUsedVarsAfterOpMap = new HashMap<>();
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef,
IOptimizationContext context) {
@@ -74,7 +74,7 @@
// This is necessary since introduceProjects() may generate a wrong
project if it doesn't have the information
// for all paths in the plan in case there are two or more branches
since it can only deal one path at a time.
// So, a variable used in one path might be removed while the method
traverses another path.
- Set<LogicalVariable> parentUsedVars = new HashSet<>();
+ Set<LogicalVariable> parentUsedVars = new LinkedHashSet<>();
collectUsedVars(opRef, parentUsedVars);
// Introduce projects
@@ -89,7 +89,7 @@
protected void collectUsedVars(Mutable<ILogicalOperator> opRef,
Set<LogicalVariable> parentUsedVars)
throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator)
opRef.getValue();
- HashSet<LogicalVariable> usedVarsPerOp = new HashSet<>();
+ Set<LogicalVariable> usedVarsPerOp = new LinkedHashSet<>();
VariableUtilities.getUsedVariables(op, usedVarsPerOp);
usedVarsPerOp.addAll(parentUsedVars);
@@ -117,7 +117,7 @@
// In the top-down pass, maintain a set of variables that are used in
op and all its parents.
// This is a necessary step for the newly created project operator
during this optimization,
// since we already have all information from collectUsedVars() method
for the other operators.
- HashSet<LogicalVariable> parentsUsedVars = new HashSet<>();
+ Set<LogicalVariable> parentsUsedVars = new LinkedHashSet<>();
parentsUsedVars.addAll(parentUsedVars);
parentsUsedVars.addAll(usedVars);
@@ -180,8 +180,8 @@
boolean eliminateProject = true;
// For UnionAll the variables must also be in exactly the
correct order.
if (parentOp.getOperatorTag() == LogicalOperatorTag.UNIONALL) {
- eliminateProject =
canEliminateProjectBelowUnion((UnionAllOperator) parentOp, projectOp,
- parentInputIndex);
+ eliminateProject =
+ canEliminateProjectBelowUnion((UnionAllOperator)
parentOp, projectOp, parentInputIndex);
}
if (eliminateProject) {
// The existing project has become useless. Remove it.
--
To view, visit https://asterix-gerrit.ics.uci.edu/2043
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Id96a5fe048dd11b7f2e97f4d4a802736ba5ba003
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Luo Chen <[email protected]>