VXQUERY-105: Add group-by functionality, Add scalability to JSON parser 1) Adding group-by feature according to XQuery 3.0 specifications 2) Creating group by-specific rewrite rules 3) Adding rewrite rules to enable parallel access to JSON data 4) Changing JSON parser to enable JSONiq scalability
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/53b86c24 Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/53b86c24 Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/53b86c24 Branch: refs/heads/master Commit: 53b86c24a5f5cee1a9ce3ad368848b6f12b4cdb7 Parents: 2c88102 Author: Christina Pavlopoulou <[email protected]> Authored: Thu Dec 29 11:56:20 2016 -0800 Committer: Christina Pavlopoulou <[email protected]> Committed: Thu May 18 19:07:57 2017 -0700 ---------------------------------------------------------------------- .../scripts/testing_logging.properties | 6 +- .../compiler/rewriter/RewriteRuleset.java | 39 +- .../algebricks_new_version/NestGroupByRule.java | 193 ++++++++ .../PushGroupByThroughProduct.java | 198 ++++++++ .../AbstractPushExpressionIntoDatascanRule.java | 107 ++++ ...tractRemoveRedundantTypeExpressionsRule.java | 38 +- .../rules/ConvertAssignToAggregateRule.java | 3 +- .../EliminateUnnestAggregateSubplanRule.java | 6 +- .../rules/IntroduceTwoStepAggregateRule.java | 27 +- .../rules/PushAggregateIntoGroupbyRule.java | 494 +++++++++++++++++++ .../rules/PushChildIntoDataScanRule.java | 87 +--- .../PushKeysOrMembersIntoDatascanRule.java | 79 +++ .../rules/PushValueIntoDatascanRule.java | 92 ++++ .../RemoveUnusedSortDistinctNodesRule.java | 36 +- .../rules/SetVariableIdContextRule.java | 11 +- .../rewriter/rules/util/ExpressionToolbox.java | 23 + .../rewriter/rules/util/OperatorToolbox.java | 2 +- .../vxquery/exceptions/SystemException.java | 5 + .../vxquery/functions/builtin-operators.xml | 1 + .../apache/vxquery/jsonparser/JSONParser.java | 433 ++++++++++++++-- .../metadata/AbstractVXQueryDataSource.java | 85 +++- .../vxquery/metadata/IVXQueryDataSource.java | 30 ++ .../metadata/VXQueryCollectionDataSource.java | 86 +--- .../VXQueryCollectionOperatorDescriptor.java | 30 +- .../metadata/VXQueryIndexingDataSource.java | 82 +-- .../VXQueryIndexingOperatorDescriptor.java | 2 +- .../metadata/VXQueryMetadataProvider.java | 38 +- ...ctTaggedValueArgumentUnnestingEvaluator.java | 11 +- .../json/KeysOrMembersScalarEvaluator.java | 37 +- .../functions/json/KeysOrMembersUnnesting.java | 92 ++++ .../json/KeysOrMembersUnnestingEvaluator.java | 44 ++ .../KeysOrMembersUnnestingEvaluatorFactory.java | 39 ++ .../org/apache/vxquery/xmlquery/ast/ASTTag.java | 4 +- .../vxquery/xmlquery/ast/GroupSpecNode.java | 67 +++ .../vxquery/xmlquery/ast/GroupbyClauseNode.java | 42 ++ .../xmlquery/query/XMLQueryCompiler.java | 2 + .../vxquery/xmlquery/query/XMLQueryParser.java | 4 +- .../xmlquery/translator/XMLQueryTranslator.java | 100 +++- vxquery-core/src/main/javacc/xquery-grammar.jj | 65 ++- .../src/main/xslt/generate-op-defns.xsl | 20 +- .../ExpectedTestResults/Groups/group.txt | 4 + .../ExpectedTestResults/Groups/group_json.txt | 4 + .../Groups/group_json_count.txt | 4 + .../Json/Parser/Partition-1/q15_parser.txt | 3 + .../Json/Parser/Partition-1/q16_parser.txt | 3 + .../Json/Parser/Partition-2/q14_parser.txt | 3 + .../Json/Parser/Partition-2/q16_parser.txt | 3 + .../Json/Parser/Partition-4/q14_parser.txt | 3 + .../Json/Parser/Partition-4/q15_parser.txt | 3 + .../resources/Queries/XQuery/Groups/group.xq | 24 + .../Queries/XQuery/Groups/group_json.xq | 25 + .../Queries/XQuery/Groups/group_json_count.xq | 27 + .../XQuery/Indexing/Partition-1/useIndex6.xq | 1 + .../Json/Parser/Partition-1/q15_parser.xq | 26 + .../Json/Parser/Partition-1/q16_parser.xq | 25 + .../Json/Parser/Partition-2/q14_parser.xq | 25 + .../Json/Parser/Partition-2/q16_parser.xq | 25 + .../Json/Parser/Partition-4/q14_parser.xq | 25 + .../Json/Parser/Partition-4/q15_parser.xq | 25 + .../src/test/resources/VXQueryCatalog.xml | 14 + .../src/test/resources/cat/GroupQueries.xml | 38 ++ .../test/resources/cat/JsonParserQueries.xml | 34 +- 62 files changed, 2690 insertions(+), 414 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties index ec85207..d196229 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties @@ -62,7 +62,7 @@ handlers= java.util.logging.ConsoleHandler # Limit the message that are printed on the console to FINE and above. -java.util.logging.ConsoleHandler.level = INFO +java.util.logging.ConsoleHandler.level = FINE java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter @@ -75,5 +75,5 @@ java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter # messages: # edu.uci.ics.asterix.level = FINE -# edu.uci.ics.hyracks.algebricks.level = FINE -edu.uci.ics.hyracks.level = SEVERE +org.apache.hyracks.algebricks.level = FINE +org.apache.hyracks.level = SEVERE http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java index 1ee4833..d5fb32a 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/RewriteRuleset.java @@ -35,7 +35,6 @@ import org.apache.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecor import org.apache.hyracks.algebricks.rewriter.rules.InferTypesRule; import org.apache.hyracks.algebricks.rewriter.rules.InlineAssignIntoAggregateRule; import org.apache.hyracks.algebricks.rewriter.rules.InlineVariablesRule; -//import org.apache.hyracks.algebricks.rewriter.rules.InsertOuterJoinRule; import org.apache.hyracks.algebricks.rewriter.rules.IntroJoinInsideSubplanRule; import org.apache.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule; import org.apache.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule; @@ -59,8 +58,11 @@ import org.apache.hyracks.algebricks.rewriter.rules.subplan.EliminateSubplanWith import org.apache.hyracks.algebricks.rewriter.rules.subplan.NestedSubplanToJoinRule; import org.apache.hyracks.algebricks.rewriter.rules.subplan.PushSubplanIntoGroupByRule; import org.apache.hyracks.algebricks.rewriter.rules.subplan.SubplanOutOfGroupRule; +import org.apache.vxquery.compiler.rewriter.algebricks_new_version.NestGroupByRule; +import org.apache.vxquery.compiler.rewriter.algebricks_new_version.PushGroupByThroughProduct; import org.apache.vxquery.compiler.rewriter.rules.ConsolidateAssignAggregateRule; import org.apache.vxquery.compiler.rewriter.rules.ConsolidateDescandantChild; +import org.apache.vxquery.compiler.rewriter.rules.ConvertAssignToAggregateRule; import org.apache.vxquery.compiler.rewriter.rules.ConvertAssignToUnnestRule; import org.apache.vxquery.compiler.rewriter.rules.ConvertFromAlgebricksExpressionsRule; import org.apache.vxquery.compiler.rewriter.rules.ConvertToAlgebricksExpressionsRule; @@ -70,8 +72,11 @@ import org.apache.vxquery.compiler.rewriter.rules.EliminateUnnestAggregateSubpla import org.apache.vxquery.compiler.rewriter.rules.IntroduceCollectionRule; import org.apache.vxquery.compiler.rewriter.rules.IntroduceIndexingRule; import org.apache.vxquery.compiler.rewriter.rules.IntroduceTwoStepAggregateRule; +import org.apache.vxquery.compiler.rewriter.rules.PushAggregateIntoGroupbyRule; import org.apache.vxquery.compiler.rewriter.rules.PushChildIntoDataScanRule; import org.apache.vxquery.compiler.rewriter.rules.PushFunctionsOntoEqJoinBranches; +import org.apache.vxquery.compiler.rewriter.rules.PushKeysOrMembersIntoDatascanRule; +import org.apache.vxquery.compiler.rewriter.rules.PushValueIntoDatascanRule; import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantBooleanExpressionsRule; import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantCastExpressionsRule; import org.apache.vxquery.compiler.rewriter.rules.RemoveRedundantDataExpressionsRule; @@ -98,7 +103,7 @@ public class RewriteRuleset { public static final List<IAlgebraicRewriteRule> buildPathStepNormalizationRuleCollection() { List<IAlgebraicRewriteRule> normalization = new LinkedList<>(); normalization.add(new SetVariableIdContextRule()); - + normalization.add(new InferTypesRule()); // Remove unused functions. normalization.add(new RemoveUnusedSortDistinctNodesRule()); normalization.add(new RemoveRedundantTreatExpressionsRule()); @@ -163,8 +168,6 @@ public class RewriteRuleset { normalization.add(new RemoveUnusedAssignAndAggregateRule()); // Find assign for scalar aggregate function. - // normalization.add(new ConvertAssignToAggregateRule()); - // Use two step aggregate operators if possible. normalization.add(new IntroduceTwoStepAggregateRule()); @@ -190,8 +193,12 @@ public class RewriteRuleset { normalization.add(new ConvertToAlgebricksExpressionsRule()); normalization.add(new RemoveRedundantBooleanExpressionsRule()); // Clean up + normalization.add(new ConvertAssignToAggregateRule()); + normalization.add(new IntroduceTwoStepAggregateRule()); normalization.add(new RemoveRedundantVariablesRule()); normalization.add(new RemoveUnusedAssignAndAggregateRule()); + normalization.add(new PushValueIntoDatascanRule()); + normalization.add(new PushKeysOrMembersIntoDatascanRule()); return normalization; } @@ -206,8 +213,11 @@ public class RewriteRuleset { xquery.add(new SimpleUnnestToProductRule()); xquery.add(new PushMapOperatorDownThroughProductRule()); xquery.add(new PushSubplanWithAggregateDownThroughProductRule()); + xquery.add(new PushMapOperatorDownThroughProductRule()); + xquery.add(new PushGroupByThroughProduct()); xquery.add(new PushSelectDownRule()); xquery.add(new PushSelectIntoJoinRule()); + // Clean up xquery.add(new RemoveRedundantVariablesRule()); xquery.add(new RemoveUnusedAssignAndAggregateRule()); @@ -229,7 +239,6 @@ public class RewriteRuleset { List<IAlgebraicRewriteRule> xquery = new LinkedList<>(); xquery.add(new PushSelectDownRule()); - xquery.add(new SimpleUnnestToProductRule()); xquery.add(new ComplexUnnestToProductRule()); xquery.add(new ComplexJoinInferenceRule()); xquery.add(new PushSelectIntoJoinRule()); @@ -240,23 +249,18 @@ public class RewriteRuleset { xquery.add(new SubplanOutOfGroupRule()); // xquery.add(new InsertOuterJoinRule()); xquery.add(new ExtractFunctionsFromJoinConditionRule()); - xquery.add(new RemoveRedundantVariablesRule()); xquery.add(new RemoveUnusedAssignAndAggregateRule()); - xquery.add(new FactorRedundantGroupAndDecorVarsRule()); - xquery.add(new EliminateSubplanRule()); - xquery.add(new EliminateGroupByEmptyKeyRule()); - xquery.add(new PushSubplanIntoGroupByRule()); - xquery.add(new NestedSubplanToJoinRule()); - xquery.add(new EliminateSubplanWithInputCardinalityOneRule()); - return xquery; } public static final List<IAlgebraicRewriteRule> buildNormalizationRuleCollection() { List<IAlgebraicRewriteRule> normalization = new LinkedList<>(); normalization.add(new EliminateSubplanRule()); + normalization.add(new SimpleUnnestToProductRule()); + normalization.add(new NestedSubplanToJoinRule()); + normalization.add(new EliminateSubplanWithInputCardinalityOneRule()); normalization.add(new BreakSelectIntoConjunctsRule()); normalization.add(new PushSelectIntoJoinRule()); normalization.add(new ExtractGbyExpressionsRule()); @@ -267,8 +271,17 @@ public class RewriteRuleset { List<IAlgebraicRewriteRule> condPushDown = new LinkedList<>(); condPushDown.add(new PushSelectDownRule()); condPushDown.add(new InlineVariablesRule()); + condPushDown.add(new SubplanOutOfGroupRule()); + condPushDown.add(new RemoveRedundantVariablesRule()); + condPushDown.add(new RemoveUnusedAssignAndAggregateRule()); condPushDown.add(new FactorRedundantGroupAndDecorVarsRule()); + condPushDown.add(new PushAggregateIntoGroupbyRule()); condPushDown.add(new EliminateSubplanRule()); + condPushDown.add(new PushGroupByThroughProduct()); + condPushDown.add(new NestGroupByRule()); + condPushDown.add(new EliminateGroupByEmptyKeyRule()); + condPushDown.add(new PushSubplanIntoGroupByRule()); + condPushDown.add(new NestedSubplanToJoinRule()); return condPushDown; } http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/NestGroupByRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/NestGroupByRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/NestGroupByRule.java new file mode 100644 index 0000000..ad3db93 --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/NestGroupByRule.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.vxquery.compiler.rewriter.algebricks_new_version; + +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.lang3.mutable.Mutable; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan; +import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; +import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; +import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities; +import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; +import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; +import org.apache.vxquery.functions.BuiltinOperators; + +public class NestGroupByRule implements IAlgebraicRewriteRule { + + @Override + public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { + return false; + } + + @Override + public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { + AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue(); + if (op1.getOperatorTag() != LogicalOperatorTag.SUBPLAN) { + return false; + } + SubplanOperator subplan = (SubplanOperator) op1; + if (subplan.getNestedPlans().size() != 1) { + return false; + } + ILogicalPlan p = subplan.getNestedPlans().get(0); + if (p.getRoots().size() != 1) { + return false; + } + + Set<LogicalVariable> free = new HashSet<LogicalVariable>(); + OperatorPropertiesUtil.getFreeVariablesInSubplans(subplan, free); + if (free.size() != 1) { + return false; + } + LogicalVariable fVar = null; + for (LogicalVariable v : free) { + fVar = v; + break; + } + + AbstractLogicalOperator op2 = (AbstractLogicalOperator) op1.getInputs().get(0).getValue(); + if (op2.getOperatorTag() != LogicalOperatorTag.GROUP) { + return false; + } + GroupByOperator gby = (GroupByOperator) op2; + if (gby.getNestedPlans().size() != 1) { + return false; + } + ILogicalPlan p2 = gby.getNestedPlans().get(0); + if (p2.getRoots().size() != 1) { + return false; + } + Mutable<ILogicalOperator> r2 = p2.getRoots().get(0); + AbstractLogicalOperator opr2 = (AbstractLogicalOperator) r2.getValue(); + if (opr2.getOperatorTag() != LogicalOperatorTag.AGGREGATE) { + return false; + } + AggregateOperator aggOuter = (AggregateOperator) opr2; + int posInAggList = aggOuter.getVariables().indexOf(fVar); + if (posInAggList < 0) { + return false; + } + AbstractLogicalOperator outerAggSon = (AbstractLogicalOperator) aggOuter.getInputs().get(0).getValue(); + if (outerAggSon.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) { + return false; + } + ILogicalExpression eAgg = aggOuter.getExpressions().get(posInAggList).getValue(); + if (eAgg.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { + return false; + } + AbstractFunctionCallExpression listifyCall = (AbstractFunctionCallExpression) eAgg; + if (listifyCall.getFunctionIdentifier() != BuiltinOperators.SEQUENCE.getFunctionIdentifier()) { + return false; + } + ILogicalExpression argListify = listifyCall.getArguments().get(0).getValue(); + if (argListify.getExpressionTag() != LogicalExpressionTag.VARIABLE) { + return false; + } + + Mutable<ILogicalOperator> r = p.getRoots().get(0); + AbstractLogicalOperator opInS = (AbstractLogicalOperator) r.getValue(); + if (opInS.getOperatorTag() != LogicalOperatorTag.AGGREGATE) { + return false; + } + AggregateOperator aggInner = (AggregateOperator) opInS; + do { + opInS = (AbstractLogicalOperator) opInS.getInputs().get(0).getValue(); + } while (opInS.getOperatorTag() == LogicalOperatorTag.ASSIGN); + if (opInS.getOperatorTag() != LogicalOperatorTag.GROUP) { + return false; + } + AbstractLogicalOperator unnestParent = opInS; + AbstractLogicalOperator opUnder = (AbstractLogicalOperator) opInS.getInputs().get(0).getValue(); + // skip Assigns + while (opUnder.getOperatorTag() == LogicalOperatorTag.ASSIGN) { + unnestParent = opUnder; + opUnder = (AbstractLogicalOperator) opUnder.getInputs().get(0).getValue(); + } + if (opUnder.getOperatorTag() != LogicalOperatorTag.UNNEST) { + return false; + } + UnnestOperator unnest = (UnnestOperator) opUnder; + AbstractLogicalOperator unnestSon = (AbstractLogicalOperator) unnest.getInputs().get(0).getValue(); + if (unnestSon.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) { + return false; + } + NestedTupleSourceOperator innerNts = (NestedTupleSourceOperator) unnestSon; + + ILogicalExpression eUnnest = unnest.getExpressionRef().getValue(); + if (eUnnest.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { + return false; + } + AbstractFunctionCallExpression uf = (AbstractFunctionCallExpression) eUnnest; + if (uf.getFunctionIdentifier() != BuiltinOperators.ITERATE.getFunctionIdentifier()) { + return false; + } + ILogicalExpression scanArg = uf.getArguments().get(0).getValue(); + if (scanArg.getExpressionTag() != LogicalExpressionTag.VARIABLE) { + return false; + } + if (((VariableReferenceExpression) scanArg).getVariableReference() != fVar) { + return false; + } + LogicalVariable uVar = unnest.getVariable(); + GroupByOperator innerGby = (GroupByOperator) opInS; + Set<LogicalVariable> freeInInnerGby = new HashSet<LogicalVariable>(); + OperatorPropertiesUtil.getFreeVariablesInSubplans(innerGby, freeInInnerGby); + for (LogicalVariable v : freeInInnerGby) { + if (v != uVar) { + return false; + } + } + + unnestParent.getInputs().get(0).setValue(innerNts); + LogicalVariable listifiedVar = ((VariableReferenceExpression) argListify).getVariableReference(); + substInSubplan(aggInner, uVar, listifiedVar, context); + gby.getNestedPlans().add(p); + innerNts.getDataSourceReference().setValue(gby); + opRef.setValue(gby); + OperatorPropertiesUtil.typePlan(p, context); + OperatorPropertiesUtil.typePlan(p2, context); + context.computeAndSetTypeEnvironmentForOperator(gby); + return true; + + } + + private void substInSubplan(AggregateOperator aggInner, LogicalVariable v1, LogicalVariable v2, + IOptimizationContext context) throws AlgebricksException { + ILogicalOperator op = aggInner; + while (op.getInputs().size() == 1) { + VariableUtilities.substituteVariables(op, v1, v2, context); + op = op.getInputs().get(0).getValue(); + } + } +} http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/PushGroupByThroughProduct.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/PushGroupByThroughProduct.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/PushGroupByThroughProduct.java new file mode 100644 index 0000000..4c770ff --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/algebricks_new_version/PushGroupByThroughProduct.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.vxquery.compiler.rewriter.algebricks_new_version; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.mutable.Mutable; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.common.utils.Pair; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan; +import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; +import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities; +import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil; +import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; +import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; + +/** +* The rule searches for a group by operator immediately following a join operator +* operator. +* +* <pre> +* Before +* +* GROUPBY( $v2 : $v1 ){ +* ... +* } +* JOIN(TRUE) +* +* +* After +* +* JOIN(TRUE) +* GROUPBY( $v2 : $v1 ){ +* ... +* } +* </pre> +*/ + +public class PushGroupByThroughProduct implements IAlgebraicRewriteRule { + + private enum PushTestResult { + FALSE, + TRUE, + REPEATED_DECORS + } + + @Override + public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { + return false; + } + + @Override + public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { + AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue(); + if (op1.getOperatorTag() != LogicalOperatorTag.GROUP) { + return false; + } + Mutable<ILogicalOperator> opRef2 = op1.getInputs().get(0); + AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue(); + if (op2.getOperatorTag() != LogicalOperatorTag.INNERJOIN) { + return false; + } + InnerJoinOperator join = (InnerJoinOperator) op2; + if (!OperatorPropertiesUtil.isAlwaysTrueCond(join.getCondition().getValue())) { + // not a product + return false; + } + GroupByOperator gby = (GroupByOperator) op1; + + List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorToPush = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>(); + List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorNotToPush = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>(); + + Mutable<ILogicalOperator> opLeftRef = join.getInputs().get(0); + ILogicalOperator opLeft = opLeftRef.getValue(); + switch (canPushThrough(gby, opLeft, decorToPush, decorNotToPush)) { + case REPEATED_DECORS: { + return false; + } + case TRUE: { + push(opRef, opRef2, 0, decorToPush, decorNotToPush, context); + return true; + } + case FALSE: { + decorToPush.clear(); + Mutable<ILogicalOperator> opRightRef = join.getInputs().get(1); + ILogicalOperator opRight = opRightRef.getValue(); + if (canPushThrough(gby, opRight, decorToPush, decorNotToPush) == PushTestResult.TRUE) { + push(opRef, opRef2, 1, decorToPush, decorNotToPush, context); + return true; + } else { + return false; + } + } + default: { + throw new IllegalStateException(); + } + } + } + + private void push(Mutable<ILogicalOperator> opRefGby, Mutable<ILogicalOperator> opRefJoin, int branch, + List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorToPush, + List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> decorNotToPush, IOptimizationContext context) + throws AlgebricksException { + GroupByOperator gby = (GroupByOperator) opRefGby.getValue(); + AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) opRefJoin.getValue(); + gby.getDecorList().clear(); + gby.getDecorList().addAll(decorToPush); + for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : decorNotToPush) { + LogicalVariable v1 = p.first; + if (v1 != null) { + VariableReferenceExpression varRef = (VariableReferenceExpression) p.second.getValue(); + LogicalVariable v2 = varRef.getVariableReference(); + OperatorManipulationUtil.substituteVarRec(join, v2, v1, true, context); + } + } + Mutable<ILogicalOperator> branchRef = join.getInputs().get(branch); + ILogicalOperator opBranch = branchRef.getValue(); + opRefJoin.setValue(opBranch); + branchRef.setValue(gby); + opRefGby.setValue(join); + } + + private PushTestResult canPushThrough(GroupByOperator gby, ILogicalOperator branch, + List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> toPush, + List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> notToPush) throws AlgebricksException { + Collection<LogicalVariable> fromBranch = new HashSet<LogicalVariable>(); + VariableUtilities.getLiveVariables(branch, fromBranch); + Collection<LogicalVariable> usedInGbyExprList = new ArrayList<LogicalVariable>(); + for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getGroupByList()) { + p.second.getValue().getUsedVariables(usedInGbyExprList); + } + + if (!fromBranch.containsAll(usedInGbyExprList)) { + return PushTestResult.FALSE; + } + Set<LogicalVariable> free = new HashSet<LogicalVariable>(); + for (ILogicalPlan p : gby.getNestedPlans()) { + for (Mutable<ILogicalOperator> r : p.getRoots()) { + OperatorPropertiesUtil.getFreeVariablesInSelfOrDesc((AbstractLogicalOperator) r.getValue(), free); + } + } + if (!fromBranch.containsAll(free)) { + return PushTestResult.FALSE; + } + + Set<LogicalVariable> decorVarRhs = new HashSet<LogicalVariable>(); + decorVarRhs.clear(); + for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gby.getDecorList()) { + ILogicalExpression expr = p.second.getValue(); + if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) { + return PushTestResult.FALSE; + } + VariableReferenceExpression varRef = (VariableReferenceExpression) expr; + LogicalVariable v = varRef.getVariableReference(); + if (decorVarRhs.contains(v)) { + return PushTestResult.REPEATED_DECORS; + } + decorVarRhs.add(v); + + if (fromBranch.contains(v)) { + toPush.add(p); + } else { + notToPush.add(p); + } + } + return PushTestResult.TRUE; + } +} http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractPushExpressionIntoDatascanRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractPushExpressionIntoDatascanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractPushExpressionIntoDatascanRule.java new file mode 100644 index 0000000..d35ffca --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractPushExpressionIntoDatascanRule.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.vxquery.compiler.rewriter.rules; + +import org.apache.commons.lang3.mutable.Mutable; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator; +import org.apache.vxquery.compiler.rewriter.VXQueryOptimizationContext; +import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox; +import org.apache.vxquery.context.StaticContext; +import org.apache.vxquery.metadata.IVXQueryDataSource; +import org.apache.vxquery.metadata.VXQueryMetadataProvider; + +public abstract class AbstractPushExpressionIntoDatascanRule extends AbstractUsedVariablesProcessingRule { + StaticContext dCtx = null; + final int ARG_DATA = 0; + final int ARG_TYPE = 1; + + protected boolean processOperator(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { + boolean unnestOp = false; + boolean assignOp = false; + + UnnestOperator unnest = null; + AssignOperator assign = null; + AbstractLogicalOperator op2 = null; + + if (dCtx == null) { + VXQueryOptimizationContext vxqueryCtx = (VXQueryOptimizationContext) context; + dCtx = ((VXQueryMetadataProvider) vxqueryCtx.getMetadataProvider()).getStaticContext(); + } + AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue(); + if (!(op1.getOperatorTag() == getOperator())) { + return false; + } + if (op1.getOperatorTag() == LogicalOperatorTag.UNNEST) { + unnest = (UnnestOperator) op1; + unnestOp = true; + op2 = (AbstractLogicalOperator) unnest.getInputs().get(0).getValue(); + } else { + assign = (AssignOperator) op1; + assignOp = true; + op2 = (AbstractLogicalOperator) assign.getInputs().get(0).getValue(); + } + + if (op2.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) { + return false; + } + DataSourceScanOperator datascan = (DataSourceScanOperator) op2; + + if (!usedVariables.contains(datascan.getVariables())) { + + Mutable<ILogicalExpression> expressionRef = null; + if (unnestOp) { + expressionRef = unnest.getExpressionRef(); + } else if (assignOp) { + expressionRef = assign.getExpressions().get(0); + } + if (!(updateDataSource((IVXQueryDataSource) datascan.getDataSource(), expressionRef))) { + return false; + } + if (unnestOp) { + Mutable<ILogicalExpression> varExp = ExpressionToolbox.findVariableExpression(expressionRef, + datascan.getVariables().get(0)); + AssignOperator noOp = new AssignOperator(unnest.getVariable(), varExp); + noOp.getInputs().addAll(unnest.getInputs()); + opRef.setValue(noOp); + } else if (assignOp) { + Mutable<ILogicalExpression> varExp = ExpressionToolbox + .findVariableExpression(assign.getExpressions().get(0), datascan.getVariables().get(0)); + AssignOperator noOp = new AssignOperator(assign.getVariables().get(0), varExp); + noOp.getInputs().addAll(assign.getInputs()); + opRef.setValue(noOp); + } + + return true; + } + return false; + + } + + abstract boolean updateDataSource(IVXQueryDataSource datasource, Mutable<ILogicalExpression> expression); + + abstract LogicalOperatorTag getOperator(); + +} http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractRemoveRedundantTypeExpressionsRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractRemoveRedundantTypeExpressionsRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractRemoveRedundantTypeExpressionsRule.java index 2430b5d..afafdf1 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractRemoveRedundantTypeExpressionsRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/AbstractRemoveRedundantTypeExpressionsRule.java @@ -20,30 +20,32 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.mutable.Mutable; -import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox; -import org.apache.vxquery.compiler.rewriter.rules.util.OperatorToolbox; -import org.apache.vxquery.context.RootStaticContextImpl; -import org.apache.vxquery.context.StaticContextImpl; -import org.apache.vxquery.types.SequenceType; - import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans; import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; +import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox; +import org.apache.vxquery.compiler.rewriter.rules.util.OperatorToolbox; +import org.apache.vxquery.context.RootStaticContextImpl; +import org.apache.vxquery.context.StaticContextImpl; +import org.apache.vxquery.types.SequenceType; public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlgebraicRewriteRule { final StaticContextImpl dCtx = new StaticContextImpl(RootStaticContextImpl.INSTANCE); final int ARG_DATA = 0; final int ARG_TYPE = 1; final List<Mutable<ILogicalExpression>> functionList = new ArrayList<Mutable<ILogicalExpression>>(); - + protected abstract FunctionIdentifier getSearchFunction(); @Override - public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { + public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { return false; } @@ -54,6 +56,7 @@ public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlg List<Mutable<ILogicalExpression>> expressions = OperatorToolbox.getExpressions(opRef); for (Mutable<ILogicalExpression> expression : expressions) { if (processTypeExpression(opRef, expression)) { + context.computeAndSetTypeEnvironmentForOperator(opRef.getValue()); modified = true; } } @@ -70,11 +73,14 @@ public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlg // Get input function AbstractFunctionCallExpression searchFunction = (AbstractFunctionCallExpression) searchM.getValue(); Mutable<ILogicalExpression> argFirstM = searchFunction.getArguments().get(ARG_DATA); - // Find the input return type. inputSequenceType = ExpressionToolbox.getOutputSequenceType(opRef, argFirstM, dCtx); - // Find the argument type. + if (inputSequenceType == null && !isNestedPlanOperator(opRef).isEmpty()) { + for (Mutable<ILogicalOperator> agg : isNestedPlanOperator(opRef)) { + inputSequenceType = ExpressionToolbox.getOutputSequenceType(agg, argFirstM, dCtx); + } + } sTypeArg = null; if (hasTypeArgument()) { sTypeArg = ExpressionToolbox.getTypeExpressionTypeArgument(searchM, dCtx); @@ -89,6 +95,18 @@ public abstract class AbstractRemoveRedundantTypeExpressionsRule implements IAlg return modified; } + public List<Mutable<ILogicalOperator>> isNestedPlanOperator(Mutable<ILogicalOperator> opRef) { + List<Mutable<ILogicalOperator>> nestedPlans = new ArrayList<Mutable<ILogicalOperator>>(); + AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue().getInputs().get(0).getValue(); + if (op.hasNestedPlans()) { + AbstractOperatorWithNestedPlans aownp = (AbstractOperatorWithNestedPlans) op; + for (Mutable<ILogicalOperator> input : aownp.getNestedPlans().get(0).getRoots()) { + nestedPlans.add(input); + } + } + return nestedPlans; + } + public abstract boolean matchesAllInstancesOf(SequenceType sTypeArg, SequenceType sTypeOutput); public boolean hasTypeArgument() { http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignToAggregateRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignToAggregateRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignToAggregateRule.java index 7ea0614..0fe6f09 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignToAggregateRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/ConvertAssignToAggregateRule.java @@ -145,6 +145,7 @@ public class ConvertAssignToAggregateRule extends AbstractVXQueryAggregateRule { subplanOperator.getInputs().add(assign.getInputs().get(0)); subplanOperator.setRootOp(nextOperatorRef); + assign.getInputs().clear(); opRef.setValue(subplanOperator); return true; @@ -164,7 +165,7 @@ public class ConvertAssignToAggregateRule extends AbstractVXQueryAggregateRule { aggregateSequenceArgs.add(aggregateArgs); List<Mutable<ILogicalExpression>> exprs = new ArrayList<Mutable<ILogicalExpression>>(); - ILogicalExpression aggregateExp = new AggregateFunctionCallExpression(aggregateFunction, true, + ILogicalExpression aggregateExp = new AggregateFunctionCallExpression(aggregateFunction, false, aggregateSequenceArgs); Mutable<ILogicalExpression> aggregateExpRef = new MutableObject<ILogicalExpression>(aggregateExp); exprs.add(aggregateExpRef); http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/EliminateUnnestAggregateSubplanRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/EliminateUnnestAggregateSubplanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/EliminateUnnestAggregateSubplanRule.java index faf6e09..7b04857 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/EliminateUnnestAggregateSubplanRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/EliminateUnnestAggregateSubplanRule.java @@ -65,7 +65,8 @@ import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; */ public class EliminateUnnestAggregateSubplanRule implements IAlgebraicRewriteRule { @Override - public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { + public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); if (op.getOperatorTag() != LogicalOperatorTag.UNNEST) { return false; @@ -121,7 +122,8 @@ public class EliminateUnnestAggregateSubplanRule implements IAlgebraicRewriteRul } functionCall.getArguments().get(0).setValue(new VariableReferenceExpression(assignVariable)); unnest.getInputs().get(0).setValue(aOp); - + context.computeAndSetTypeEnvironmentForOperator(aOp); + context.computeAndSetTypeEnvironmentForOperator(unnest); return true; } http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java index 4343522..806b532 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/IntroduceTwoStepAggregateRule.java @@ -71,27 +71,30 @@ public class IntroduceTwoStepAggregateRule implements IAlgebraicRewriteRule { final Map<FunctionIdentifier, Pair<IFunctionInfo, IFunctionInfo>> AGGREGATE_MAP = new HashMap<FunctionIdentifier, Pair<IFunctionInfo, IFunctionInfo>>(); public IntroduceTwoStepAggregateRule() { - AGGREGATE_MAP.put(BuiltinFunctions.FN_AVG_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>( - BuiltinOperators.AVG_LOCAL, BuiltinOperators.AVG_GLOBAL)); - AGGREGATE_MAP.put(BuiltinFunctions.FN_COUNT_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>( - BuiltinFunctions.FN_COUNT_1, BuiltinFunctions.FN_SUM_1)); - AGGREGATE_MAP.put(BuiltinFunctions.FN_MAX_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>( - BuiltinFunctions.FN_MAX_1, BuiltinFunctions.FN_MAX_1)); - AGGREGATE_MAP.put(BuiltinFunctions.FN_MIN_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>( - BuiltinFunctions.FN_MIN_1, BuiltinFunctions.FN_MIN_1)); - AGGREGATE_MAP.put(BuiltinFunctions.FN_SUM_1.getFunctionIdentifier(), new Pair<IFunctionInfo, IFunctionInfo>( - BuiltinFunctions.FN_SUM_1, BuiltinFunctions.FN_SUM_1)); + AGGREGATE_MAP.put(BuiltinFunctions.FN_AVG_1.getFunctionIdentifier(), + new Pair<IFunctionInfo, IFunctionInfo>(BuiltinOperators.AVG_LOCAL, BuiltinOperators.AVG_GLOBAL)); + AGGREGATE_MAP.put(BuiltinFunctions.FN_COUNT_1.getFunctionIdentifier(), + new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_COUNT_1, BuiltinFunctions.FN_SUM_1)); + AGGREGATE_MAP.put(BuiltinFunctions.FN_MAX_1.getFunctionIdentifier(), + new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_MAX_1, BuiltinFunctions.FN_MAX_1)); + AGGREGATE_MAP.put(BuiltinFunctions.FN_MIN_1.getFunctionIdentifier(), + new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_MIN_1, BuiltinFunctions.FN_MIN_1)); + AGGREGATE_MAP.put(BuiltinFunctions.FN_SUM_1.getFunctionIdentifier(), + new Pair<IFunctionInfo, IFunctionInfo>(BuiltinFunctions.FN_SUM_1, BuiltinFunctions.FN_SUM_1)); } @Override - public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { + public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { // Check if aggregate function. AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); if (op.getOperatorTag() != LogicalOperatorTag.AGGREGATE) { return false; } AggregateOperator aggregate = (AggregateOperator) op; - + if (aggregate.getExpressions().size() == 0) { + return false; + } Mutable<ILogicalExpression> mutableLogicalExpression = aggregate.getExpressions().get(0); ILogicalExpression logicalExpression = mutableLogicalExpression.getValue(); if (logicalExpression.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushAggregateIntoGroupbyRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushAggregateIntoGroupbyRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushAggregateIntoGroupbyRule.java new file mode 100644 index 0000000..5cb111c --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushAggregateIntoGroupbyRule.java @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.vxquery.compiler.rewriter.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.lang3.mutable.Mutable; +import org.apache.commons.lang3.mutable.MutableObject; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.common.utils.Pair; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan; +import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; +import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; +import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression; +import org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression; +import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator; +import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities; +import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil; +import org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil; +import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; +import org.apache.vxquery.functions.BuiltinOperators; +import org.apache.vxquery.functions.Function; + +/** + * This rule pushes a subplan on top of a group-by into the + * nested plan of the group-by. + * + * <pre> + * Before + * + * SUBPLAN { + * AGGREGATE ($v5 : $v4) + * UNNEST ($v4 :$v3) + * } + * GROUPBY ($v2 : $v1]) decor ([]) { + * AGGREGATE ($v3 : $v0) + * } + * + * After + * + * GROUPBY ($v2 : $v1]) decor ([]) { + * AGGREGATE ($v5 : $v0) + * } + * + * </pre> + */ + +public class PushAggregateIntoGroupbyRule implements IAlgebraicRewriteRule { + + @Override + public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) { + return false; + } + + @Override + public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) + throws AlgebricksException { + Map<LogicalVariable, Integer> gbyAggVars = new HashMap<LogicalVariable, Integer>(); + Map<LogicalVariable, Integer> gbyAggVarToPlanIndex = new HashMap<LogicalVariable, Integer>(); + Map<LogicalVariable, GroupByOperator> gbyWithAgg = new HashMap<LogicalVariable, GroupByOperator>(); + Map<ILogicalExpression, ILogicalExpression> aggExprToVarExpr = new HashMap<ILogicalExpression, ILogicalExpression>(); + // first collect vars. referring to listified sequences + boolean changed = collectVarsBottomUp(opRef, context, gbyAggVars, gbyWithAgg, gbyAggVarToPlanIndex, + aggExprToVarExpr); + if (changed) { + removeRedundantListifies(opRef, context, gbyAggVars, gbyWithAgg, gbyAggVarToPlanIndex); + } + return changed; + } + + private void removeRedundantListifies(Mutable<ILogicalOperator> opRef, IOptimizationContext context, + Map<LogicalVariable, Integer> gbyAggVars, Map<LogicalVariable, GroupByOperator> gbyWithAgg, + Map<LogicalVariable, Integer> gbyAggVarToPlanIndex) throws AlgebricksException { + for (LogicalVariable aggVar : gbyAggVars.keySet()) { + int occurs = gbyAggVars.get(aggVar); + if (occurs == 0) { + GroupByOperator gbyOp = gbyWithAgg.get(aggVar); + AggregateOperator aggOp = (AggregateOperator) gbyOp.getNestedPlans() + .get(gbyAggVarToPlanIndex.get(aggVar)).getRoots().get(0).getValue(); + int pos = aggOp.getVariables().indexOf(aggVar); + if (pos >= 0) { + aggOp.getVariables().remove(pos); + aggOp.getExpressions().remove(pos); + List<LogicalVariable> producedVarsAtAgg = new ArrayList<LogicalVariable>(); + VariableUtilities.getProducedVariablesInDescendantsAndSelf(aggOp, producedVarsAtAgg); + if (producedVarsAtAgg.isEmpty()) { + gbyOp.getNestedPlans().remove(gbyAggVarToPlanIndex.get(aggVar)); + } + } + } + } + } + + private boolean collectVarsBottomUp(Mutable<ILogicalOperator> opRef, IOptimizationContext context, + Map<LogicalVariable, Integer> gbyListifyVarsCount, Map<LogicalVariable, GroupByOperator> gbyWithAgg, + Map<LogicalVariable, Integer> gbyAggVarToPlanIndex, + Map<ILogicalExpression, ILogicalExpression> aggregateExprToVarExpr) throws AlgebricksException { + AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue(); + context.addToDontApplySet(this, op1); + boolean change = false; + for (Mutable<ILogicalOperator> child : op1.getInputs()) { + if (collectVarsBottomUp(child, context, gbyListifyVarsCount, gbyWithAgg, gbyAggVarToPlanIndex, + aggregateExprToVarExpr)) { + change = true; + } + } + Set<LogicalVariable> used = new HashSet<>(); + VariableUtilities.getUsedVariables(op1, used); + switch (op1.getOperatorTag()) { + case ASSIGN: + case SELECT: { + boolean found = false; + // Do some prefiltering: check if the Assign uses any gby vars. + for (LogicalVariable v : used) { + if (gbyListifyVarsCount.get(v) != null) { + found = true; + break; + } + } + if (found) { + if (op1.getOperatorTag() == LogicalOperatorTag.ASSIGN) { + AssignOperator assign = (AssignOperator) op1; + for (Mutable<ILogicalExpression> exprRef : assign.getExpressions()) { + Pair<Boolean, ILogicalExpression> p = extractAggFunctionsFromExpression(exprRef, gbyWithAgg, + aggregateExprToVarExpr, context); + if (p.first) { + change = true; + exprRef.setValue(p.second); + } + } + } + if (op1.getOperatorTag() == LogicalOperatorTag.SELECT) { + SelectOperator select = (SelectOperator) op1; + Mutable<ILogicalExpression> exprRef = select.getCondition(); + Pair<Boolean, ILogicalExpression> p = extractAggFunctionsFromExpression(exprRef, gbyWithAgg, + aggregateExprToVarExpr, context); + if (p.first) { + change = true; + exprRef.setValue(p.second); + } + } + used.clear(); + VariableUtilities.getUsedVariables(op1, used); + // increment the count for the ones which are still used + for (LogicalVariable v : used) { + Integer m = gbyListifyVarsCount.get(v); + if (m != null) { + gbyListifyVarsCount.put(v, m + 1); + } + } + } + break; + } + case SUBPLAN: { + for (LogicalVariable v : used) { + Integer m = gbyListifyVarsCount.get(v); + if (m != null) { + GroupByOperator gbyOp = gbyWithAgg.get(v); + if (pushSubplanAsAggIntoGby(opRef, gbyOp, v, gbyListifyVarsCount, gbyWithAgg, + gbyAggVarToPlanIndex, context)) { + change = true; + } else { + gbyListifyVarsCount.put(v, m + 1); + } + } + } + break; + } + case GROUP: { + List<LogicalVariable> vars = collectOneVarPerAggFromGroupOp((GroupByOperator) op1); + if (vars != null) { + for (int i = 0; i < vars.size(); i++) { + LogicalVariable v = vars.get(i); + if (v != null) { + gbyListifyVarsCount.put(v, 0); + gbyAggVarToPlanIndex.put(v, i); + gbyWithAgg.put(v, (GroupByOperator) op1); + } + } + } + break; + } + default: { + for (LogicalVariable v : used) { + Integer m = gbyListifyVarsCount.get(v); + if (m != null) { + gbyListifyVarsCount.put(v, m + 1); + } + } + } + } + return change; + } + + private List<LogicalVariable> collectOneVarPerAggFromGroupOp(GroupByOperator group) { + List<ILogicalPlan> nPlans = group.getNestedPlans(); + if (nPlans == null || nPlans.size() < 1) { + return null; + } + + List<LogicalVariable> aggVars = new ArrayList<LogicalVariable>(); + // test that the group-by computes a "listify" aggregate + for (int i = 0; i < nPlans.size(); i++) { + AbstractLogicalOperator topOp = (AbstractLogicalOperator) nPlans.get(i).getRoots().get(0).getValue(); + if (topOp.getOperatorTag() != LogicalOperatorTag.AGGREGATE) { + continue; + } + AggregateOperator agg = (AggregateOperator) topOp; + if (agg.getVariables().size() != 1) { + continue; + } + ILogicalExpression expr = agg.getExpressions().get(0).getValue(); + if (((AbstractLogicalExpression) expr).getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { + continue; + } + AbstractFunctionCallExpression fceAgg = (AbstractFunctionCallExpression) expr; + if (fceAgg.getFunctionIdentifier() != BuiltinOperators.SEQUENCE.getFunctionIdentifier()) { + continue; + } + aggVars.add(agg.getVariables().get(0)); + } + return aggVars; + } + + /** + * @param expr + * @param aggVars + * @param gbyWithAgg + * @param context + * @return a pair whose first member is a boolean which is true iff + * something was changed in the expression tree rooted at expr. The + * second member is the result of transforming expr. + * @throws AlgebricksException + */ + + private Pair<Boolean, ILogicalExpression> extractAggFunctionsFromExpression(Mutable<ILogicalExpression> exprRef, + Map<LogicalVariable, GroupByOperator> gbyWithAgg, + Map<ILogicalExpression, ILogicalExpression> aggregateExprToVarExpr, IOptimizationContext context) + throws AlgebricksException { + ILogicalExpression expr = exprRef.getValue(); + switch (expr.getExpressionTag()) { + case FUNCTION_CALL: { + AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) expr; + Function functionInfo = (Function) fce.getFunctionInfo(); + FunctionIdentifier fi = null; + if (functionInfo.hasAggregateEvaluatorFactory()) { + fi = functionInfo.getFunctionIdentifier(); + } //FunctionIdentifier fi = functionInfo.getFunctionIdentifier(); + if (fi != null) { + ILogicalExpression a1 = fce.getArguments().get(0).getValue(); + if (a1.getExpressionTag() == LogicalExpressionTag.VARIABLE) { + LogicalVariable argVar = ((VariableReferenceExpression) a1).getVariableReference(); + GroupByOperator gbyOp = gbyWithAgg.get(argVar); + + if (gbyOp != null) { + if (!aggregateExprToVarExpr.containsKey(expr)) { + LogicalVariable newVar = context.newVar(); + AggregateFunctionCallExpression aggFun = new AggregateFunctionCallExpression( + functionInfo, false, fce.getArguments()); + rewriteGroupByAggregate(argVar, gbyOp, aggFun, newVar, context); + ILogicalExpression newVarExpr = new VariableReferenceExpression(newVar); + aggregateExprToVarExpr.put(expr, newVarExpr); + return new Pair<Boolean, ILogicalExpression>(Boolean.TRUE, newVarExpr); + } else { + ILogicalExpression varExpr = aggregateExprToVarExpr.get(expr); + return new Pair<Boolean, ILogicalExpression>(Boolean.TRUE, varExpr); + } + } + } + } + + boolean change = false; + for (Mutable<ILogicalExpression> a : fce.getArguments()) { + Pair<Boolean, ILogicalExpression> aggArg = extractAggFunctionsFromExpression(a, gbyWithAgg, + aggregateExprToVarExpr, context); + if (aggArg.first.booleanValue()) { + a.setValue(aggArg.second); + change = true; + } + } + return new Pair<Boolean, ILogicalExpression>(change, fce); + + } + case VARIABLE: + case CONSTANT: { + return new Pair<Boolean, ILogicalExpression>(Boolean.FALSE, expr); + } + default: { + throw new IllegalArgumentException(); + } + } + } + + private void rewriteGroupByAggregate(LogicalVariable oldAggVar, GroupByOperator gbyOp, + AggregateFunctionCallExpression aggFun, LogicalVariable newAggVar, IOptimizationContext context) + throws AlgebricksException { + for (int j = 0; j < gbyOp.getNestedPlans().size(); j++) { + AggregateOperator aggOp = (AggregateOperator) gbyOp.getNestedPlans().get(j).getRoots().get(0).getValue(); + int n = aggOp.getVariables().size(); + for (int i = 0; i < n; i++) { + LogicalVariable v = aggOp.getVariables().get(i); + if (v.equals(oldAggVar)) { + AbstractFunctionCallExpression oldAggExpr = (AbstractFunctionCallExpression) aggOp.getExpressions() + .get(i).getValue(); + AggregateFunctionCallExpression newAggFun = new AggregateFunctionCallExpression( + aggFun.getFunctionInfo(), false, new ArrayList<Mutable<ILogicalExpression>>()); + for (Mutable<ILogicalExpression> arg : oldAggExpr.getArguments()) { + ILogicalExpression cloned = ((AbstractLogicalExpression) arg.getValue()).cloneExpression(); + newAggFun.getArguments().add(new MutableObject<ILogicalExpression>(cloned)); + } + aggOp.getVariables().add(newAggVar); + aggOp.getExpressions().add(new MutableObject<ILogicalExpression>(newAggFun)); + context.computeAndSetTypeEnvironmentForOperator(aggOp); + break; + } + } + } + } + + private boolean pushSubplanAsAggIntoGby(Mutable<ILogicalOperator> subplanOpRef, GroupByOperator gbyOp, + LogicalVariable varFromGroupAgg, Map<LogicalVariable, Integer> gbyAggVars, + Map<LogicalVariable, GroupByOperator> gbyWithAgg, Map<LogicalVariable, Integer> gbyAggVarToPlanIndex, + IOptimizationContext context) throws AlgebricksException { + SubplanOperator subplan = (SubplanOperator) subplanOpRef.getValue(); + // only free var can be varFromGroupAgg + HashSet<LogicalVariable> freeVars = new HashSet<LogicalVariable>(); + OperatorPropertiesUtil.getFreeVariablesInSubplans(subplan, freeVars); + for (LogicalVariable vFree : freeVars) { + if (!vFree.equals(varFromGroupAgg)) { + return false; + } + } + + List<ILogicalPlan> plans = subplan.getNestedPlans(); + if (plans.size() > 1) { + return false; + } + ILogicalPlan p = plans.get(0); + if (p.getRoots().size() > 1) { + return false; + } + Mutable<ILogicalOperator> opRef = p.getRoots().get(0); + AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); + if (op.getOperatorTag() != LogicalOperatorTag.AGGREGATE) { + return false; + } + AggregateOperator aggInSubplanOp = (AggregateOperator) op; + LogicalVariable unnestVar = null; + boolean pushableNestedSubplan = false; + while (op.getInputs().size() == 1) { + opRef = op.getInputs().get(0); + op = (AbstractLogicalOperator) opRef.getValue(); + switch (op.getOperatorTag()) { + case ASSIGN: { + break; + } + case UNNEST: { + UnnestOperator unnest = (UnnestOperator) op; + if (unnest.getPositionalVariable() != null) { + // TODO currently subplan with both accumulating and running aggregate is not supported. + return false; + } + ILogicalExpression expr = unnest.getExpressionRef().getValue(); + if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { + return false; + } + AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) expr; + if (fun.getFunctionIdentifier() != BuiltinOperators.ITERATE.getFunctionIdentifier()) { + return false; + } + ILogicalExpression arg0 = fun.getArguments().get(0).getValue(); + if (arg0.getExpressionTag() != LogicalExpressionTag.VARIABLE) { + return false; + } + VariableReferenceExpression varExpr = (VariableReferenceExpression) arg0; + if (!varExpr.getVariableReference().equals(varFromGroupAgg)) { + return false; + } + opRef = op.getInputs().get(0); + op = (AbstractLogicalOperator) opRef.getValue(); + if (op.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) { + return false; + } + pushableNestedSubplan = true; + unnestVar = unnest.getVariable(); + break; + } + default: { + return false; + } + } + } + if (pushableNestedSubplan) { + for (int i = 0; i < gbyOp.getNestedPlans().size(); i++) { + Mutable<ILogicalOperator> gbyAggRef = gbyOp.getNestedPlans().get(i).getRoots().get(0); + AggregateOperator gbyAgg = (AggregateOperator) gbyAggRef.getValue(); + Mutable<ILogicalOperator> gbyAggChildRef = gbyAgg.getInputs().get(0); + LogicalVariable listifyVar = findListifiedVariable(gbyAgg, varFromGroupAgg); + if (listifyVar == null) { + continue; + } + OperatorManipulationUtil.substituteVarRec(aggInSubplanOp, unnestVar, listifyVar, true, context); + gbyAgg.getVariables().addAll(aggInSubplanOp.getVariables()); + gbyAgg.getExpressions().addAll(aggInSubplanOp.getExpressions()); + for (LogicalVariable v : aggInSubplanOp.getVariables()) { + gbyWithAgg.put(v, gbyOp); + gbyAggVars.put(v, 0); + gbyAggVarToPlanIndex.put(v, i); + } + + Mutable<ILogicalOperator> opRef1InSubplan = aggInSubplanOp.getInputs().get(0); + if (opRef1InSubplan.getValue().getInputs().size() > 0) { + Mutable<ILogicalOperator> opRef2InSubplan = opRef1InSubplan.getValue().getInputs().get(0); + AbstractLogicalOperator op2InSubplan = (AbstractLogicalOperator) opRef2InSubplan.getValue(); + if (op2InSubplan.getOperatorTag() != LogicalOperatorTag.NESTEDTUPLESOURCE) { + List<Mutable<ILogicalOperator>> gbyInpList = gbyAgg.getInputs(); + gbyInpList.clear(); + gbyInpList.add(opRef1InSubplan); + while (true) { + opRef2InSubplan = opRef1InSubplan.getValue().getInputs().get(0); + op2InSubplan = (AbstractLogicalOperator) opRef2InSubplan.getValue(); + if (op2InSubplan.getOperatorTag() == LogicalOperatorTag.UNNEST) { + List<Mutable<ILogicalOperator>> opInpList = opRef1InSubplan.getValue().getInputs(); + opInpList.clear(); + opInpList.add(gbyAggChildRef); + break; + } + opRef1InSubplan = opRef2InSubplan; + if (opRef1InSubplan.getValue().getInputs().size() == 0) { + throw new IllegalStateException("PushAggregateIntoGroupbyRule: could not find UNNEST."); + } + } + } + } + subplanOpRef.setValue(subplan.getInputs().get(0).getValue()); + OperatorPropertiesUtil.typeOpRec(gbyAggRef, context); + } + return true; + } else { + return false; + } + } + + private LogicalVariable findListifiedVariable(AggregateOperator gbyAgg, LogicalVariable varFromGroupAgg) { + int n = gbyAgg.getVariables().size(); + + for (int i = 0; i < n; i++) { + if (gbyAgg.getVariables().get(i).equals(varFromGroupAgg)) { + AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) gbyAgg.getExpressions().get(i) + .getValue(); + if (fce.getFunctionIdentifier().equals(BuiltinOperators.SEQUENCE.getFunctionIdentifier())) { + ILogicalExpression argExpr = fce.getArguments().get(0).getValue(); + if (((AbstractLogicalExpression) argExpr).getExpressionTag() == LogicalExpressionTag.VARIABLE) { + return ((VariableReferenceExpression) argExpr).getVariableReference(); + } + } + } + } + return null; + } + +} http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java index 6060c19..dbcce54 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushChildIntoDataScanRule.java @@ -20,22 +20,11 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.mutable.Mutable; -import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; -import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; -import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; -import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; -import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator; -import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator; -import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator; -import org.apache.vxquery.compiler.rewriter.VXQueryOptimizationContext; import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox; -import org.apache.vxquery.context.StaticContext; import org.apache.vxquery.functions.BuiltinOperators; -import org.apache.vxquery.metadata.VXQueryCollectionDataSource; -import org.apache.vxquery.metadata.VXQueryIndexingDataSource; -import org.apache.vxquery.metadata.VXQueryMetadataProvider; +import org.apache.vxquery.metadata.IVXQueryDataSource; import org.apache.vxquery.types.ElementType; /** @@ -55,82 +44,42 @@ import org.apache.vxquery.types.ElementType; * After * * plan__parent + * ASSIGN( $v2 : $v1 ) * DATASCAN( $source : $v1 ) * plan__child * * $source is encoded with the child parameters. * </pre> - * - * @author prestonc */ -public class PushChildIntoDataScanRule extends AbstractUsedVariablesProcessingRule { - StaticContext dCtx = null; - final int ARG_DATA = 0; - final int ARG_TYPE = 1; - - protected boolean processOperator(Mutable<ILogicalOperator> opRef, IOptimizationContext context) - throws AlgebricksException { - if (dCtx == null) { - VXQueryOptimizationContext vxqueryCtx = (VXQueryOptimizationContext) context; - dCtx = ((VXQueryMetadataProvider) vxqueryCtx.getMetadataProvider()).getStaticContext(); - } - AbstractLogicalOperator op1 = (AbstractLogicalOperator) opRef.getValue(); - if (op1.getOperatorTag() != LogicalOperatorTag.UNNEST) { - return false; - } - UnnestOperator unnest = (UnnestOperator) op1; +public class PushChildIntoDataScanRule extends AbstractPushExpressionIntoDatascanRule { - AbstractLogicalOperator op2 = (AbstractLogicalOperator) unnest.getInputs().get(0).getValue(); - if (op2.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN) { + @Override + boolean updateDataSource(IVXQueryDataSource datasource, Mutable<ILogicalExpression> expression) { + //TODO: indexing needs to be extended to support push child into datascan + if (datasource.usingIndex()) { return false; } - DataSourceScanOperator datascan = (DataSourceScanOperator) op2; - - if (!usedVariables.contains(datascan.getVariables())) { - VXQueryCollectionDataSource ds = null; - VXQueryIndexingDataSource ids = null; - - // Find all child functions. - try { - ids = (VXQueryIndexingDataSource) datascan.getDataSource(); - } catch (ClassCastException e) { - ds = (VXQueryCollectionDataSource) datascan.getDataSource(); - } - - if (!updateDataSource(ds, unnest.getExpressionRef())) { - return false; - } - - // Replace unnest with noop assign. Keeps variable chain. - Mutable<ILogicalExpression> varExp = ExpressionToolbox.findVariableExpression(unnest.getExpressionRef(), - datascan.getVariables().get(0)); - AssignOperator noOp = new AssignOperator(unnest.getVariable(), varExp); - noOp.getInputs().addAll(unnest.getInputs()); - opRef.setValue(noOp); - return true; - } - return false; - } - - /** - * In reverse add them to the data source. - * - * @param ds - * @param expression - */ - private boolean updateDataSource(VXQueryCollectionDataSource ds, Mutable<ILogicalExpression> expression) { boolean added = false; List<Mutable<ILogicalExpression>> finds = new ArrayList<Mutable<ILogicalExpression>>(); ExpressionToolbox.findAllFunctionExpressions(expression, BuiltinOperators.CHILD.getFunctionIdentifier(), finds); for (int i = finds.size(); i > 0; --i) { int typeId = ExpressionToolbox.getTypeExpressionTypeArgument(finds.get(i - 1)); if (typeId > 0) { - if (dCtx.lookupSequenceType(typeId).getItemType().equals(ElementType.ANYELEMENT) && typeId > 0) { - ds.addChildSeq(typeId); + ElementType it = (ElementType) dCtx.lookupSequenceType(typeId).getItemType(); + ElementType et = ElementType.ANYELEMENT; + + if (it.getContentType().equals(et.getContentType())) { + datasource.addChildSeq(typeId); added = true; } } } return added; } + + @Override + LogicalOperatorTag getOperator() { + return LogicalOperatorTag.UNNEST; + } + } http://git-wip-us.apache.org/repos/asf/vxquery/blob/53b86c24/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushKeysOrMembersIntoDatascanRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushKeysOrMembersIntoDatascanRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushKeysOrMembersIntoDatascanRule.java new file mode 100644 index 0000000..41b6401 --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/PushKeysOrMembersIntoDatascanRule.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.vxquery.compiler.rewriter.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.mutable.Mutable; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; +import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; +import org.apache.hyracks.data.std.primitive.BooleanPointable; +import org.apache.vxquery.compiler.rewriter.rules.util.ExpressionToolbox; +import org.apache.vxquery.datamodel.values.XDMConstants; +import org.apache.vxquery.functions.BuiltinOperators; +import org.apache.vxquery.metadata.AbstractVXQueryDataSource; +import org.apache.vxquery.metadata.IVXQueryDataSource; + +/** + * The rule searches for an unnest operator immediately following a data scan + * operator. + * + * <pre> + * Before + * + * plan__parent + * UNNEST( $v2 : keys-or-members( $v1 ) ) + * DATASCAN( $source : $v1 ) + * plan__child + * + * Where $v1 is not used in plan__parent. + * + * After + * + * plan__parent + * ASSIGN( $v2 : $v1 ) + * DATASCAN( $source : $v1 ) + * plan__child + * + * $source is encoded with the child parameters. + * </pre> + */ +public class PushKeysOrMembersIntoDatascanRule extends AbstractPushExpressionIntoDatascanRule { + + @Override + boolean updateDataSource(IVXQueryDataSource datasource, Mutable<ILogicalExpression> expression) { + AbstractVXQueryDataSource ds = (AbstractVXQueryDataSource) datasource; + boolean added = false; + BooleanPointable bp = (BooleanPointable) BooleanPointable.FACTORY.createPointable(); + List<Mutable<ILogicalExpression>> findkeys = new ArrayList<Mutable<ILogicalExpression>>(); + ExpressionToolbox.findAllFunctionExpressions(expression, + BuiltinOperators.KEYS_OR_MEMBERS.getFunctionIdentifier(), findkeys); + for (int i = findkeys.size(); i > 0; --i) { + XDMConstants.setTrue(bp); + ds.addValueSeq(ArrayUtils.toObject(bp.getByteArray())); + added = true; + } + return added; + } + + @Override + LogicalOperatorTag getOperator() { + return LogicalOperatorTag.UNNEST; + } +}
