[CALCITE-1731] Materialized view rewriting for join and aggregate operators
* Remove previous join rewriting rule (MaterializedViewJoinRule.java) and update documentation. Project: http://git-wip-us.apache.org/repos/asf/calcite/repo Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/27ca3109 Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/27ca3109 Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/27ca3109 Branch: refs/heads/master Commit: 27ca3109539eba4324b3bf4fcf4065cb7a5d300c Parents: 1f81e13 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Tue Apr 25 13:58:40 2017 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Wed Apr 26 20:03:11 2017 +0100 ---------------------------------------------------------------------- .../rel/rules/MaterializedViewJoinRule.java | 373 ------------------- .../calcite/test/MaterializationTest.java | 5 +- site/_docs/materialized_views.md | 25 +- 3 files changed, 9 insertions(+), 394 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/calcite/blob/27ca3109/core/src/main/java/org/apache/calcite/rel/rules/MaterializedViewJoinRule.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/calcite/rel/rules/MaterializedViewJoinRule.java b/core/src/main/java/org/apache/calcite/rel/rules/MaterializedViewJoinRule.java deleted file mode 100644 index f6fb009..0000000 --- a/core/src/main/java/org/apache/calcite/rel/rules/MaterializedViewJoinRule.java +++ /dev/null @@ -1,373 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to you under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.calcite.rel.rules; - -import org.apache.calcite.plan.RelOptMaterialization; -import org.apache.calcite.plan.RelOptMaterializations; -import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelOptRuleOperand; -import org.apache.calcite.plan.RelOptTable; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.plan.hep.HepPlanner; -import org.apache.calcite.plan.hep.HepProgram; -import org.apache.calcite.plan.hep.HepProgramBuilder; -import org.apache.calcite.plan.volcano.VolcanoPlanner; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.RelFactories; -import org.apache.calcite.rel.core.TableScan; -import org.apache.calcite.rel.logical.LogicalProject; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.type.SqlTypeUtil; -import org.apache.calcite.tools.RelBuilderFactory; -import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.Pair; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; - -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; - -/** - * Planner rule that converts joins of multiple tables into a matching - * materialized view - */ -public class MaterializedViewJoinRule extends RelOptRule { - public static final MaterializedViewJoinRule INSTANCE_PROJECT = - new MaterializedViewJoinRule( - operand(LogicalProject.class, - operand(Join.class, - operand(Project.class, - operand(TableScan.class, none())), - operand(Project.class, - operand(TableScan.class, none())))), - RelFactories.LOGICAL_BUILDER, - "MaterializedViewJoinRule(Project-Project)"); - - public static final MaterializedViewJoinRule INSTANCE_TABLE_SCAN = - new MaterializedViewJoinRule( - operand(LogicalProject.class, - operand(Join.class, - operand(TableScan.class, none()), - operand(TableScan.class, none()))), - RelFactories.LOGICAL_BUILDER, - "MaterializedViewJoinRule(TableScan-TableScan)"); - - private final HepProgram multiJoinProgram = new HepProgramBuilder() - .addRuleInstance(ProjectRemoveRule.INSTANCE) - .addRuleInstance(ProjectJoinTransposeRule.INSTANCE) - .addRuleInstance(JoinToMultiJoinRule.INSTANCE) - .addRuleInstance(ProjectMultiJoinMergeRule.INSTANCE) - .addRuleInstance(FilterMultiJoinMergeRule.INSTANCE) - .build(); - - //~ Constructors ----------------------------------------------------------- - - /** Creates a MaterializedViewJoinRule. */ - protected MaterializedViewJoinRule(RelOptRuleOperand operand, RelBuilderFactory relBuilderFactory, - String description) { - super(operand, relBuilderFactory, description); - } - - //~ Methods ---------------------------------------------------------------- - - public void onMatch(RelOptRuleCall call) { - final Project originalProject = call.rel(0); - // Rebuild the tree - final RelNode leftInput; - final RelNode rightInput; - if (call.getRelList().size() == 6) { - leftInput = call.rel(2).copy(call.rel(2).getTraitSet(), ImmutableList.of(call.rel(3))); - rightInput = call.rel(4).copy(call.rel(4).getTraitSet(), ImmutableList.of(call.rel(5))); - } else { - leftInput = call.rel(2); - rightInput = call.rel(3); - } - final RelNode join = call.rel(1).copy(call.rel(1).getTraitSet(), - ImmutableList.of(leftInput, rightInput)); - final RelNode project = call.rel(0).copy(call.rel(0).getTraitSet(), ImmutableList.of(join)); - - // Convert the input expression into a MultiJoin - RelOptPlanner planner = call.getPlanner(); - final HepPlanner hepPlanner = - new HepPlanner(multiJoinProgram, planner.getContext()); - hepPlanner.setRoot(project); - RelNode best = hepPlanner.findBestExp(); - - if (best instanceof Project) { - best = ((Project) best).getInput(); - } - if (!(best instanceof MultiJoin)) { - return; - } - apply(call, (MultiJoin) best, originalProject); - } - - protected void apply(RelOptRuleCall call, MultiJoin join, Project originalProject) { - if (!isSupportedJoin(join)) { - return; - } - SortedMap<Integer, ImmutableBitSet> queryFilter = filterConditions(join); - if (queryFilter == null) { - return; - } - List<RelOptTable> queryTables = RelOptUtil.findAllTables(join); - Map<Integer, Pair<RelOptTable, RexInputRef>> queryFields = - originalFields(join, queryTables); - if (queryFields == null) { - return; - } - - RelOptPlanner planner = call.getPlanner(); - List<RelOptMaterialization> materializations = - planner instanceof VolcanoPlanner - ? ((VolcanoPlanner) planner).getMaterializations() - : ImmutableList.<RelOptMaterialization>of(); - if (!materializations.isEmpty()) { - List<RelOptMaterialization> applicableMaterializations = - RelOptMaterializations.getApplicableMaterializations(join, materializations); - - // Prepare a planner to convert views to MultiJoins - HepPlanner hepPlanner = - new HepPlanner(multiJoinProgram, planner.getContext()); - - for (RelOptMaterialization materialization : applicableMaterializations) { - // Skip over single table views - RelNode target = materialization.queryRel; - if (target instanceof TableScan - || (target instanceof Project - && ((Project) target).getInput() instanceof TableScan)) { - continue; - } - - // Convert the view into a MultiJoin - hepPlanner.setRoot(target); - target = hepPlanner.findBestExp(); - if (!(target instanceof Project)) { continue; } - - Project viewProject = (Project) target; - if (!(viewProject.getInput() instanceof MultiJoin)) { - continue; - } - MultiJoin viewJoin = (MultiJoin) viewProject.getInput(); - if (!isSupportedJoin(viewJoin)) { - continue; - } - - List<RelOptTable> viewTables = RelOptUtil.findAllTables(viewJoin); - - // Check that the same set of tables are in use - if (queryTables.size() != viewTables.size() - || !ImmutableSet.copyOf(queryTables).containsAll(viewTables)) { - continue; - } - - // Extra the conditions and field from the view and ensure - // that they are all supported - SortedMap<Integer, ImmutableBitSet> viewFilter = filterConditions(viewJoin); - if (viewFilter == null) { - continue; - } - Map<Integer, Pair<RelOptTable, RexInputRef>> viewFields = - originalFields(viewJoin, viewTables); - if (viewFields == null) { - continue; - } - - // If we fail to find one of the fields we are required - // to project, we can't use this view - List<RexNode> projects = materializedViewProjects(queryFields, queryFilter, - viewFields, originalProject); - if (projects.size() != originalProject.getNamedProjects().size()) { - continue; - } - - final RelNode newNode = originalProject.copy(originalProject.getTraitSet(), - materialization.tableRel, - projects, originalProject.getRowType()); - call.transformTo(newNode); - } - } - } - - /** - * Checks that the join consists of either table scans or projects of scans - */ - private boolean isSimpleProjects(MultiJoin join) { - for (RelNode input : join.getInputs()) { - if (!(input instanceof TableScan) - && !(input instanceof Project - && ((Project) input).getInput() instanceof TableScan)) { - return false; - } - } - - return true; - } - - private boolean isSupportedJoin(MultiJoin join) { - // We only support inner joins without post join filters over simple projects/scans - return !join.containsOuter() && join.getPostJoinFilter() == null && isSimpleProjects(join); - } - - /** - * Produces a map from fields in a multijoin to references in the - * original tables referenced in the join - */ - private Map<Integer, Pair<RelOptTable, RexInputRef>> originalFields(MultiJoin join, - List<RelOptTable> tables) { - List<ImmutableBitSet> projFields = join.getProjFields(); - Map<Integer, Pair<RelOptTable, RexInputRef>> tableFields = new LinkedHashMap<>(); - List<RelNode> inputs = join.getInputs(); - int fieldNum = 0; - for (int i = 0; i < projFields.size(); i++) { - // Either get the project or construct a list projecting all fields - List<RexNode> projects; - if (inputs.get(i) instanceof Project) { - projects = ((Project) inputs.get(i)).getProjects(); - } else { - assert inputs.get(i) instanceof TableScan; - List<RelDataTypeField> fields = inputs.get(i).getRowType().getFieldList(); - projects = new ArrayList<>(); - for (int j = 0; j < fields.size(); j++) { - projects.add(new RexInputRef(j, fields.get(j).getType())); - } - } - - if (projFields.get(i) == null) { return null; } - - int bit = projFields.get(i).nextSetBit(0); - while (bit != -1) { - // We currently only support rewriting of views with simple field references - if (!(projects.get(bit) instanceof RexInputRef)) { - return null; - } - - tableFields.put(fieldNum, Pair.of(tables.get(i), (RexInputRef) projects.get(bit))); - fieldNum++; - bit = projFields.get(i).nextSetBit(bit + 1); - } - } - - return tableFields; - } - - /** - * If the node represents a field reference, get its index - */ - private Integer getFieldIndex(RexNode operand) { - if (operand.isA(SqlKind.INPUT_REF)) { - return ((RexInputRef) operand).getIndex(); - } else if (operand.isA(SqlKind.CAST)) { - return getFieldIndex(((RexCall) operand).getOperands().get(0)); - } else { - return null; - } - } - - /** - * Construct a map of equivalence classes of all columns - * in all tables used as input to the join - */ - private SortedMap<Integer, ImmutableBitSet> filterConditions(MultiJoin join) { - SortedMap<Integer, ImmutableBitSet> equiv = new TreeMap<>(); - RexNode filter = RexUtil.toCnf(join.getCluster().getRexBuilder(), join.getJoinFilter()); - for (RexNode conjunct : RelOptUtil.conjunctions(filter)) { - List<RexNode> condition = RelOptUtil.disjunctions(conjunct); - if (condition.size() == 1 && condition.get(0).isA(SqlKind.EQUALS)) { - List<RexNode> operands = ((RexCall) condition.get(0)).getOperands(); - Integer index1 = getFieldIndex(operands.get(0)); - Integer index2 = getFieldIndex(operands.get(1)); - if (index1 == null || index2 == null) { - // All operands to a condition must be field references or - // simple casts of field references - return null; - } - equiv.put(index1, ImmutableBitSet.of(index1, index2)); - } else { - // We don't handle disjunctions or inequalities - return null; - } - } - - equiv = ImmutableBitSet.closure(equiv); - return equiv; - } - - - /** - * Construct a list of projects we need on top of the materialized view - */ - private List<RexNode> materializedViewProjects( - Map<Integer, Pair<RelOptTable, RexInputRef>> queryFields, - SortedMap<Integer, ImmutableBitSet> queryFilter, - Map<Integer, Pair<RelOptTable, RexInputRef>> viewFields, - Project originalProject) { - List<Pair<RelOptTable, RexInputRef>> viewFieldList = - Lists.newArrayList(viewFields.values().iterator()); - List<Pair<RelOptTable, RexInputRef>> queryFieldList = - Lists.newArrayList(queryFields.values().iterator()); - List<RexNode> projects = new ArrayList<>(); - for (Map.Entry<Integer, Pair<RelOptTable, RexInputRef>> field - : queryFields.entrySet()) { - int fieldIndex = viewFieldList.indexOf(field.getValue()); - if (fieldIndex == -1) { - // Check for equivalent fields in the view - ImmutableBitSet queryEquiv = queryFilter.get(field.getKey()); - if (queryEquiv != null) { - for (Integer index : queryEquiv) { - fieldIndex = viewFieldList.indexOf(queryFieldList.get(index)); - if (fieldIndex != -1) { - break; - } - } - } - } - - if (fieldIndex == -1) { - break; - } - - RelDataType type = field.getValue().right.getType(); - RelDataType originalType = originalProject.getProjects().get(projects.size()).getType(); - // if (originalType.equals(type)) { - if (!SqlTypeUtil.canCastFrom(originalType, type, false)) { - continue; - } - projects.add(new RexInputRef(fieldIndex, type)); - } - - return projects; - } -} - -// End MaterializedViewJoinRule.java http://git-wip-us.apache.org/repos/asf/calcite/blob/27ca3109/core/src/test/java/org/apache/calcite/test/MaterializationTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/calcite/test/MaterializationTest.java b/core/src/test/java/org/apache/calcite/test/MaterializationTest.java index bce7626..54ddaed 100644 --- a/core/src/test/java/org/apache/calcite/test/MaterializationTest.java +++ b/core/src/test/java/org/apache/calcite/test/MaterializationTest.java @@ -29,7 +29,6 @@ import org.apache.calcite.rel.RelReferentialConstraint; import org.apache.calcite.rel.RelReferentialConstraintImpl; import org.apache.calcite.rel.RelVisitor; import org.apache.calcite.rel.core.TableScan; -import org.apache.calcite.rel.rules.MaterializedViewJoinRule; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeSystem; import org.apache.calcite.rex.RexBuilder; @@ -987,9 +986,7 @@ public class MaterializationTest { + "join \"depts\" using (\"deptno\") where \"empid\" = 1"; final String m = "select \"empid\" \"deptno\" from \"emps\"\n" + "join \"depts\" using (\"deptno\")"; - RuleSet rules = RuleSets.ofList(MaterializedViewJoinRule.INSTANCE_PROJECT, - MaterializedViewJoinRule.INSTANCE_TABLE_SCAN); - checkMaterializeWithRules(m, q, rules); + checkMaterialize(m, q); } @Test public void testUnionAll() { http://git-wip-us.apache.org/repos/asf/calcite/blob/27ca3109/site/_docs/materialized_views.md ---------------------------------------------------------------------- diff --git a/site/_docs/materialized_views.md b/site/_docs/materialized_views.md index cae361b..2d4c682 100644 --- a/site/_docs/materialized_views.md +++ b/site/_docs/materialized_views.md @@ -58,27 +58,18 @@ This can accomplish a large number of rewritings. However, this approach is not scalable in the presence of complex views, e.g., views containing many join operators, since it relies on the planner rules to create the equivalence between expressions. -In turn, two alternative rules that attempt to match queries to views defined using arbitrary queries, -have been proposed. They are both based on the ideas of the [same paper](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.95.113). +In turn, an alternative rule that attempts to match queries to views defined using arbitrary queries +has been proposed. -__MaterializedViewJoinRule__ is the first alternative. There are several limitations to the current implementation: +{AbstractMaterializedViewRule} builds on the ideas presented [here](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.95.113). +The rule can rewrite expressions containing arbitrary chains of Join, Filter, and Project operators. +Additionally, the rule can rewrite expressions rooted at an Aggregate operator, rolling aggregations up if necessary. -1. The query defining the view must use only inner joins -2. Only equality predicates are supported -3. Predicates on tables used in the view must exactly match predicates in the query -4. Rewriting is unoptimized and will attempt to match all views against each query - -These limitations are not fundamental the approach however and will hopefully be removed in the future. -Note that the rule is currently disabled by default. -To make use of the rule, {MaterializedViewJoinRule.INSTANCE_PROJECT} and {MaterializedViewJoinRule.INSTANCE_TABLE_SCAN} need to be added to the planner. - -__AbstractMaterializedViewRule__ is the second alternative. It builds on the same ideas but it attempts to be more generic. -In particular, some of the limitations of the previous rule, such as number `2.` and `3.`, do not exist for this rule. -Additionally, the rule will be able to rewrite expressions rooted at an Aggregate operator, rolling aggregations up if necessary. - -However, this rule still presents some limitations too. In addition to `1.` and `4.` above, the rule presents following +However, this rule still presents some limitations. In particular, the rule presents the following shortcomings that we plan to address with follow-up extensions: +* Rewriting is unoptimized and will attempt to match all views against each query. +* The query defining the view must use only inner joins. * It does not produce rewritings using Union operators, e.g., a given query could be partially answered from the {mv} (year = 2014) and from the query (not(year=2014)). This can be useful if {mv} is stored in a system such as Druid.
