This is an automated email from the ASF dual-hosted git repository. dlych pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 46e3ad2d88992aab046996cfa02bf8cdaf5c436f Author: Dmitry Lychagin <[email protected]> AuthorDate: Thu Jan 14 14:33:54 2021 -0800 [NO ISSUE][COMP] Improve variable substitution - user model changes: no - storage format changes: no - interface changes: no Details: - Rename fields in PropagatingTypeEnvironment to align with their semantics - PropagatingTypeEnvironment.substituteProducedVariable() should also substitute variables in 'nonMissableVariables' and 'correlatedMissableVariableLists' - Minor improvements in SubstituteVariableVisitor Change-Id: I8acafe7fae8fa53dc962fe260e48e8ff84dadb86 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/9603 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> --- .../algebra/operators/logical/AssignOperator.java | 11 ----- .../operators/logical/LeftOuterJoinOperator.java | 4 +- .../logical/LeftOuterUnnestMapOperator.java | 6 +-- .../operators/logical/LeftOuterUnnestOperator.java | 14 +----- .../algebra/operators/logical/SelectOperator.java | 2 +- .../visitors/SubstituteVariableVisitor.java | 16 +++++-- .../algebra/properties/TypePropagationPolicy.java | 24 +++++----- .../algebra/typing/PropagatingTypeEnvironment.java | 56 ++++++++++++++-------- 8 files changed, 67 insertions(+), 66 deletions(-) diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java index 202c291..ade9552 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java @@ -23,11 +23,9 @@ import java.util.List; import org.apache.commons.lang3.mutable.Mutable; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; -import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag; import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; import org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment; -import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression; import org.apache.hyracks.algebricks.core.algebra.properties.LocalOrderProperty; import org.apache.hyracks.algebricks.core.algebra.properties.VariablePropagationPolicy; import org.apache.hyracks.algebricks.core.algebra.typing.ITypingContext; @@ -80,15 +78,6 @@ public class AssignOperator extends AbstractAssignOperator { for (int i = 0; i < n; i++) { env.setVarType(variables.get(i), ctx.getExpressionTypeComputer().getType(expressions.get(i).getValue(), ctx.getMetadataProvider(), env)); - if (expressions.get(i).getValue().getExpressionTag() == LogicalExpressionTag.VARIABLE) { - LogicalVariable var = - ((VariableReferenceExpression) expressions.get(i).getValue()).getVariableReference(); - for (List<LogicalVariable> list : env.getCorrelatedMissableVariableLists()) { - if (list.contains(var)) { - list.add(variables.get(i)); - } - } - } } return env; } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java index 797c5eb..4e382d2 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterJoinOperator.java @@ -67,8 +67,8 @@ public class LeftOuterJoinOperator extends AbstractBinaryJoinOperator { PropagatingTypeEnvironment env = new PropagatingTypeEnvironment(ctx.getExpressionTypeComputer(), ctx.getMissableTypeComputer(), ctx.getMetadataProvider(), TypePropagationPolicy.LEFT_OUTER, envPointers); - List<LogicalVariable> liveVars = new ArrayList<LogicalVariable>(); - VariableUtilities.getLiveVariables(inputs.get(1).getValue(), liveVars); // live variables from outer branch can be null together + List<LogicalVariable> liveVars = new ArrayList<>(); + VariableUtilities.getLiveVariables(inputs.get(1).getValue(), liveVars); // live variables from right branch can be MISSING together env.getCorrelatedMissableVariableLists().add(liveVars); return env; } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java index 6bacdb4..cd009c0 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestMapOperator.java @@ -18,7 +18,6 @@ */ package org.apache.hyracks.algebricks.core.algebra.operators.logical; -import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.mutable.Mutable; @@ -61,10 +60,7 @@ public class LeftOuterUnnestMapOperator extends AbstractUnnestMapOperator { // Propagates all input variables that come from the outer branch. PropagatingTypeEnvironment env = createPropagatingAllInputsTypeEnvironment(ctx); - env.getCorrelatedMissableVariableLists().add(new ArrayList<>(variables)); - - // For the variables from the inner branch, the output type is the union - // of (original type + null). + // The produced variables of the this operator are missable because of the left outer semantics. for (int i = 0; i < variables.size(); i++) { env.setVarType(variables.get(i), ctx.getMissableTypeComputer().makeMissableType(variableTypes.get(i))); } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java index 8c95a3f..14996dd 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/LeftOuterUnnestOperator.java @@ -14,9 +14,6 @@ */ package org.apache.hyracks.algebricks.core.algebra.operators.logical; -import java.util.ArrayList; -import java.util.List; - import org.apache.commons.lang3.mutable.Mutable; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; @@ -46,21 +43,14 @@ public class LeftOuterUnnestOperator extends AbstractUnnestNonMapOperator { @Override public IVariableTypeEnvironment computeOutputTypeEnvironment(ITypingContext ctx) throws AlgebricksException { PropagatingTypeEnvironment env = createPropagatingAllInputsTypeEnvironment(ctx); + + // The produced variables of the this operator are missable because of the left outer semantics. Object t = env.getType(expression.getValue()); - // For the variables from the inner branch, the output type is the union - // of (original type + missing). env.setVarType(variables.get(0), ctx.getMissableTypeComputer().makeMissableType(t)); if (positionalVariable != null) { env.setVarType(positionalVariable, ctx.getMissableTypeComputer().makeMissableType(positionalVariableType)); } - // The produced variables of the this operator are missable because of the left outer semantics. - List<LogicalVariable> missableVars = new ArrayList<>(); - missableVars.add(variables.get(0)); - if (positionalVariable != null) { - missableVars.add(positionalVariable); - } - env.getCorrelatedMissableVariableLists().add(missableVars); return env; } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java index 26ce137..b2e2dfd 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java @@ -113,7 +113,7 @@ public class SelectOperator extends AbstractLogicalOperator { ILogicalExpression a2 = f2.getArguments().get(0).getValue(); if (a2.getExpressionTag() == LogicalExpressionTag.VARIABLE) { LogicalVariable var = ((VariableReferenceExpression) a2).getVariableReference(); - env.getNonNullVariables().add(var); + env.getNonMissableVariables().add(var); } } } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java index 0b1bf5b..d1a1c03 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java @@ -169,8 +169,9 @@ public class SubstituteVariableVisitor if (!producedVarFound) { substInNestedPlans(op, pair.first, pair.second); } - // always call substProducedVarInTypeEnvironment() because GroupByOperator.computeOutputTypeEnvironment() - // adds used vars into output type environment in some cases. + // GROUP BY operator may add its used variables + // to its own output type environment as produced variables + // therefore we need perform variable substitution in its own type environment // TODO (dmitry): this needs to be revisited substProducedVarInTypeEnvironment(op, pair); return null; @@ -187,6 +188,10 @@ public class SubstituteVariableVisitor public Void visitLeftOuterJoinOperator(LeftOuterJoinOperator op, Pair<LogicalVariable, LogicalVariable> pair) throws AlgebricksException { substUsedVariablesInExpr(op.getCondition(), pair.first, pair.second); + // LEFT OUTER JOIN operator adds its right branch variables + // to its own output type environment as 'correlatedMissableVariables' + // therefore we need perform variable substitution in its own type environment + substProducedVarInTypeEnvironment(op, pair); return null; } @@ -245,8 +250,13 @@ public class SubstituteVariableVisitor } @Override - public Void visitSelectOperator(SelectOperator op, Pair<LogicalVariable, LogicalVariable> pair) { + public Void visitSelectOperator(SelectOperator op, Pair<LogicalVariable, LogicalVariable> pair) + throws AlgebricksException { substUsedVariablesInExpr(op.getCondition(), pair.first, pair.second); + // SELECT operator may add its used variable + // to its own output type environment as 'nonMissableVariable' (not(is-missing($used_var)) + // therefore we need perform variable substitution in its own type environment + substProducedVarInTypeEnvironment(op, pair); return null; } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java index 9d60370..c37c674 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/properties/TypePropagationPolicy.java @@ -31,21 +31,22 @@ public abstract class TypePropagationPolicy { @Override public Object getVarType(LogicalVariable var, IMissableTypeComputer ntc, - List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists, - ITypeEnvPointer... typeEnvs) throws AlgebricksException { + List<LogicalVariable> nonMissableVariableList, + List<List<LogicalVariable>> correlatedMissableVariableLists, ITypeEnvPointer... typeEnvs) + throws AlgebricksException { for (ITypeEnvPointer p : typeEnvs) { IVariableTypeEnvironment env = p.getTypeEnv(); if (env == null) { throw new AlgebricksException( "Null environment for pointer " + p + " in getVarType for var=" + var); } - Object t = env.getVarType(var, nonNullVariableList, correlatedNullableVariableLists); + Object t = env.getVarType(var, nonMissableVariableList, correlatedMissableVariableLists); if (t != null) { if (ntc != null && ntc.canBeMissing(t)) { - for (List<LogicalVariable> list : correlatedNullableVariableLists) { + for (List<LogicalVariable> list : correlatedMissableVariableLists) { if (list.contains(var)) { for (LogicalVariable v : list) { - if (nonNullVariableList.contains(v)) { + if (nonMissableVariableList.contains(v)) { return ntc.getNonOptionalType(t); } } @@ -63,16 +64,17 @@ public abstract class TypePropagationPolicy { @Override public Object getVarType(LogicalVariable var, IMissableTypeComputer ntc, - List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists, - ITypeEnvPointer... typeEnvs) throws AlgebricksException { + List<LogicalVariable> nonMissableVariableList, + List<List<LogicalVariable>> correlatedMissableVariableLists, ITypeEnvPointer... typeEnvs) + throws AlgebricksException { int n = typeEnvs.length; // Searches from the inner branch to the outer branch. // TODO(buyingyi): A split operator could lead to the case that the type for a variable could be // found in both inner and outer branches. Fix computeOutputTypeEnvironment() in ProjectOperator // and investigate why many test queries fail if only live variables' types are propagated. for (int i = n - 1; i >= 0; i--) { - Object t = - typeEnvs[i].getTypeEnv().getVarType(var, nonNullVariableList, correlatedNullableVariableLists); + Object t = typeEnvs[i].getTypeEnv().getVarType(var, nonMissableVariableList, + correlatedMissableVariableLists); if (t == null) { continue; } @@ -82,7 +84,7 @@ public abstract class TypePropagationPolicy { // inner branch boolean nonMissingVarIsProduced = false; - for (LogicalVariable v : nonNullVariableList) { + for (LogicalVariable v : nonMissableVariableList) { boolean toBreak = false; if (v == var) { nonMissingVarIsProduced = true; @@ -106,6 +108,6 @@ public abstract class TypePropagationPolicy { }; public abstract Object getVarType(LogicalVariable var, IMissableTypeComputer ntc, - List<LogicalVariable> nonNullVariableList, List<List<LogicalVariable>> correlatedNullableVariableLists, + List<LogicalVariable> nonMissableVariableList, List<List<LogicalVariable>> correlatedMissableVariableLists, ITypeEnvPointer... typeEnvs) throws AlgebricksException; } diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java index 9d2a5da..27aa902 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/typing/PropagatingTypeEnvironment.java @@ -32,60 +32,74 @@ public class PropagatingTypeEnvironment extends AbstractTypeEnvironment { private final TypePropagationPolicy policy; - private final IMissableTypeComputer nullableTypeComputer; + private final IMissableTypeComputer missableTypeComputer; private final ITypeEnvPointer[] envPointers; - private final List<LogicalVariable> nonNullVariables = new ArrayList<>(); + private final List<LogicalVariable> nonMissableVariables = new ArrayList<>(); - private final List<List<LogicalVariable>> correlatedNullableVariableLists = new ArrayList<>(); + private final List<List<LogicalVariable>> correlatedMissableVariableLists = new ArrayList<>(); public PropagatingTypeEnvironment(IExpressionTypeComputer expressionTypeComputer, - IMissableTypeComputer nullableTypeComputer, IMetadataProvider<?, ?> metadataProvider, + IMissableTypeComputer missableTypeComputer, IMetadataProvider<?, ?> metadataProvider, TypePropagationPolicy policy, ITypeEnvPointer[] envPointers) { super(expressionTypeComputer, metadataProvider); - this.nullableTypeComputer = nullableTypeComputer; + this.missableTypeComputer = missableTypeComputer; this.policy = policy; this.envPointers = envPointers; } @Override public Object getVarType(LogicalVariable var) throws AlgebricksException { - return getVarTypeFullList(var, nonNullVariables, correlatedNullableVariableLists); + return getVarTypeFullList(var, nonMissableVariables, correlatedMissableVariableLists); } - public List<LogicalVariable> getNonNullVariables() { - return nonNullVariables; + public List<LogicalVariable> getNonMissableVariables() { + return nonMissableVariables; } public List<List<LogicalVariable>> getCorrelatedMissableVariableLists() { - return correlatedNullableVariableLists; + return correlatedMissableVariableLists; } @Override - public Object getVarType(LogicalVariable var, List<LogicalVariable> nonNullVariableList, - List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException { - for (LogicalVariable v : nonNullVariables) { - if (!nonNullVariableList.contains(v)) { - nonNullVariableList.add(v); + public Object getVarType(LogicalVariable var, List<LogicalVariable> nonMissableVariableList, + List<List<LogicalVariable>> correlatedMissableVariableLists) throws AlgebricksException { + for (LogicalVariable v : nonMissableVariables) { + if (!nonMissableVariableList.contains(v)) { + nonMissableVariableList.add(v); } } - Object t = getVarTypeFullList(var, nonNullVariableList, correlatedNullableVariableLists); - for (List<LogicalVariable> list : this.correlatedNullableVariableLists) { - if (!correlatedNullableVariableLists.contains(list)) { - correlatedNullableVariableLists.add(list); + Object t = getVarTypeFullList(var, nonMissableVariableList, correlatedMissableVariableLists); + for (List<LogicalVariable> list : correlatedMissableVariableLists) { + if (!correlatedMissableVariableLists.contains(list)) { + correlatedMissableVariableLists.add(list); } } return t; } - private Object getVarTypeFullList(LogicalVariable var, List<LogicalVariable> nonNullVariableList, - List<List<LogicalVariable>> correlatedNullableVariableLists) throws AlgebricksException { + private Object getVarTypeFullList(LogicalVariable var, List<LogicalVariable> nonMissableVariableList, + List<List<LogicalVariable>> correlatedMissableVariableLists) throws AlgebricksException { Object t = varTypeMap.get(var); if (t != null) { return t; } - return policy.getVarType(var, nullableTypeComputer, nonNullVariableList, correlatedNullableVariableLists, + return policy.getVarType(var, missableTypeComputer, nonMissableVariableList, correlatedMissableVariableLists, envPointers); } + + @Override + public boolean substituteProducedVariable(LogicalVariable v1, LogicalVariable v2) throws AlgebricksException { + boolean result = super.substituteProducedVariable(v1, v2); + if (nonMissableVariables.remove(v1)) { + nonMissableVariables.add(v2); + } + for (List<LogicalVariable> missableVarList : correlatedMissableVariableLists) { + if (missableVarList.remove(v1)) { + missableVarList.add(v2); + } + } + return result; + } }
