Taewoo Kim has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/2030
Change subject: [ASTERIXDB-1984][COMP] probe-subtree init not required ...................................................................... [ASTERIXDB-1984][COMP] probe-subtree init not required - user model changes: no - storage format changes: no - interface changes: no Details: - Let the IntroduceJoinAccessMethod accept arbitrary forms of sub-tree for the probe-tree. - As a result, an open-type field with an enforced-index on the probe side will not be considered for the index-nested-loop join. An explicit conversion function should be used in such case. - The surrogate-join in the InvertedIndexAccessMethod now does not need a fixed form of sub-tree. Change-Id: Ib353c85bf627d8dd65dba0ea307dee428edb4a26 --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java A asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_04.sqlpp A asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_05.sqlpp A asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_06.sqlpp A asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_04.plan A asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_05.plan A asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_06.plan 8 files changed, 269 insertions(+), 40 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/30/2030/1 diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java index 7fc7902..1a7ccbd 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java @@ -320,25 +320,21 @@ // This will be used to find an applicable index on the dataset. boolean checkLeftSubTreeMetadata = false; boolean checkRightSubTreeMetadata = false; - if (continueCheck && (matchInLeftSubTree || matchInRightSubTree)) { + if (continueCheck && matchInRightSubTree) { // Set dataset and type metadata. if (matchInLeftSubTree) { checkLeftSubTreeMetadata = leftSubTree.setDatasetAndTypeMetadata(metadataProvider); } - if (matchInRightSubTree) { - checkRightSubTreeMetadata = rightSubTree.setDatasetAndTypeMetadata(metadataProvider); - } + checkRightSubTreeMetadata = rightSubTree.setDatasetAndTypeMetadata(metadataProvider); } - if (continueCheck && (checkLeftSubTreeMetadata || checkRightSubTreeMetadata)) { + if (continueCheck && checkRightSubTreeMetadata) { // Map variables to the applicable indexes and find the field name and type. // Then find the applicable indexes for the variables used in the JOIN condition. if (checkLeftSubTreeMetadata) { fillSubTreeIndexExprs(leftSubTree, analyzedAMs, context); } - if (checkRightSubTreeMetadata) { - fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context); - } + fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context); // Prune the access methods based on the function expression and access methods. pruneIndexCandidates(analyzedAMs, context, typeEnvironment); @@ -414,15 +410,18 @@ } joinCond = (AbstractFunctionCallExpression) condExpr; - boolean leftSubTreeInitialized = leftSubTree.initFromSubTree(joinOp.getInputs().get(0)); + // The result of the left subtree initialization does not need to be checked since only the field type + // of the field that is being joined is important. However, if we do not initialize the left sub tree, + // we lose a chance to get the field type of a field if there is an enforced index on it. + leftSubTree.initFromSubTree(joinOp.getInputs().get(0)); boolean rightSubTreeInitialized = rightSubTree.initFromSubTree(joinOp.getInputs().get(1)); - if (!leftSubTreeInitialized || !rightSubTreeInitialized) { + if (!rightSubTreeInitialized) { return false; } - // One of the subtrees must have a datasource scan. - if (leftSubTree.hasDataSourceScan() || rightSubTree.hasDataSourceScan()) { + // The right (inner) subtree must have a datasource scan. + if (rightSubTree.hasDataSourceScan()) { return true; } return false; diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java index 19c6da7..6b8246a 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java @@ -22,8 +22,11 @@ import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Queue; +import java.util.Set; import org.apache.asterix.common.annotations.SkipSecondaryIndexSearchExpressionAnnotation; import org.apache.asterix.common.config.DatasetConfig.IndexType; @@ -49,10 +52,12 @@ import org.apache.asterix.om.types.IAType; import org.apache.asterix.om.types.hierachy.ATypeHierarchy; import org.apache.asterix.om.utils.ConstantExpressionUtil; +import org.apache.asterix.optimizer.rules.util.EquivalenceClassUtils; import org.apache.asterix.runtime.evaluators.functions.FullTextContainsDescriptor; import org.apache.commons.lang3.mutable.Mutable; import org.apache.commons.lang3.mutable.MutableObject; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.common.utils.Pair; import org.apache.hyracks.algebricks.common.utils.Triple; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; @@ -597,11 +602,19 @@ ILogicalExpression joinCond, IOptimizableFuncExpr optFuncExpr, List<LogicalVariable> originalSubTreePKs, List<LogicalVariable> surrogateSubTreePKs, IOptimizationContext context) throws AlgebricksException { - probeSubTree.getPrimaryKeyVars(null, originalSubTreePKs); + // Gets the primary key(s) from the probe subtree. + Pair<ILogicalOperator, Set<LogicalVariable>> RootOpAndPKVars = + EquivalenceClassUtils.findOrCreatePrimaryKeyOpAndVariables(probeSubTree.getRoot(), true, context); + ILogicalOperator rootOp = RootOpAndPKVars.first; + originalSubTreePKs.addAll(RootOpAndPKVars.second); + if (!probeSubTree.getRoot().equals(rootOp)) { + probeSubTree.getRootRef().setValue(rootOp); + probeSubTree.setRoot(rootOp); + } // Create two copies of the original probe subtree. - // The first copy, which becomes the new probe subtree, will retain the primary-key and secondary-search key variables, - // but have all other variables replaced with new ones. + // The first copy, which becomes the new probe subtree, will retain the primary-key and + // secondary-search key variables, but have all other variables replaced with new ones. // The second copy, which will become an input to the top-level equi-join to resolve the surrogates, // will have all primary-key and secondary-search keys replaced, but retains all other original variables. @@ -644,11 +657,7 @@ Mutable<ILogicalOperator> originalProbeSubTreeRootRef = probeSubTree.getRootRef(); // Replace the original probe subtree with its copy. - Dataset origDataset = probeSubTree.getDataset(); - ARecordType origRecordType = probeSubTree.getRecordType(); probeSubTree.initFromSubTree(newProbeSubTreeRootRef); - probeSubTree.setDataset(origDataset); - probeSubTree.setRecordType(origRecordType); // Replace the variables in the join condition based on the mapping of variables // in the new probe subtree. @@ -1112,28 +1121,31 @@ return null; } - for (AbstractLogicalOperator op : subTree.getAssignsAndUnnests()) { - if (op.getOperatorTag() != LogicalOperatorTag.ASSIGN) { - continue; + Queue<Mutable<ILogicalOperator>> queue = new LinkedList<>(); + queue.add(subTree.getRootRef()); + while (!queue.isEmpty()) { + ILogicalOperator descendantOp = queue.poll().getValue(); + if (descendantOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) { + List<Mutable<ILogicalExpression>> exprList = ((AssignOperator) descendantOp).getExpressions(); + for (Mutable<ILogicalExpression> expr : exprList) { + if (expr.getValue().getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { + continue; + } + AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr.getValue(); + if (funcExpr.getFunctionIdentifier() != funcId) { + continue; + } + ILogicalExpression varExpr = funcExpr.getArguments().get(0).getValue(); + if (varExpr.getExpressionTag() != LogicalExpressionTag.VARIABLE) { + continue; + } + if (((VariableReferenceExpression) varExpr).getVariableReference() == targetVar) { + continue; + } + return (ScalarFunctionCallExpression) funcExpr; + } } - List<Mutable<ILogicalExpression>> exprList = ((AssignOperator) op).getExpressions(); - for (Mutable<ILogicalExpression> expr : exprList) { - if (expr.getValue().getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { - continue; - } - AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr.getValue(); - if (funcExpr.getFunctionIdentifier() != funcId) { - continue; - } - ILogicalExpression varExpr = funcExpr.getArguments().get(0).getValue(); - if (varExpr.getExpressionTag() != LogicalExpressionTag.VARIABLE) { - continue; - } - if (((VariableReferenceExpression) varExpr).getVariableReference() == targetVar) { - continue; - } - return (ScalarFunctionCallExpression) funcExpr; - } + queue.addAll(descendantOp.getInputs()); } return null; } diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_04.sqlpp new file mode 100644 index 0000000..15107b8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_04.sqlpp @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Description : Joins three datasets. + * : Since the given dataset in the inner branch has a secondary index on the field + * : that is being joined, we expect this join to be transformed into an indexed nested-loop join. + * Issue : ASTERIXDB-1984 + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +create type TestType as +{ + id : integer, + val : integer +}; + +create dataset testdst(TestType) primary key id; +create dataset testdst2(TestType) primary key id; +create dataset testdst3(TestType) primary key id; + +create index sec3_Idx on testdst3(val) type btree; + +SELECT * FROM +testdst a JOIN testdst2 b ON a.val = b.val JOIN testdst3 c ON b.val /* +indexnl */ = c.val; diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_05.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_05.sqlpp new file mode 100644 index 0000000..31fc953 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_05.sqlpp @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Description : Joins a constant array with a dataset. + * : Since the given dataset in the inner branch has a secondary index on the field that is being joined, + * : we expect this join to be transformed into an indexed nested-loop join. + * Issue : ASTERIXDB-1984 + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +create type TestType as +{ + id : integer, + val : integer +}; + +create dataset testdst(TestType) primary key id; + +create index sec_Idx on testdst (val) type btree; + +SELECT * FROM +[1, 2, 3] AS bar JOIN testdst ON bar /* +indexnl */ = testdst.val; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_06.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_06.sqlpp new file mode 100644 index 0000000..558e172 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries_sqlpp/btree-index-join/secondary-equi-join_06.sqlpp @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Description : Joins two datasets. + * : Since the given dataset in the inner branch has a secondary index on the field that is being joined, + * : we expect this join to be transformed into an indexed nested-loop join. + * Issue : ASTERIXDB-1984 + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +create type TestType as +{ + id : integer, + val : integer +}; + +create dataset testdst(TestType) primary key id; +create dataset testdst2(TestType) primary key id; + +create index sec_Idx on testdst2(val) type btree; + +SELECT * FROM +(SELECT val, COUNT(*) FROM testdst GROUP BY val) AS bar JOIN testdst2 ON bar.val /* +indexnl */ = testdst2.val; diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_04.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_04.plan new file mode 100644 index 0000000..9f60440 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_04.plan @@ -0,0 +1,33 @@ +-- DISTRIBUTE_RESULT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$22(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- HYBRID_HASH_JOIN [$$19][$$15] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$19] |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$15] |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_05.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_05.plan new file mode 100644 index 0000000..f372412 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_05.plan @@ -0,0 +1,18 @@ +-- DISTRIBUTE_RESULT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$13(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- ASSIGN |UNPARTITIONED| + -- UNNEST |UNPARTITIONED| + -- EMPTY_TUPLE_SOURCE |UNPARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_06.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_06.plan new file mode 100644 index 0000000..d7a7847 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/btree-index-join/secondary-equi-join_06.plan @@ -0,0 +1,38 @@ +-- DISTRIBUTE_RESULT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STABLE_SORT [$$32(ASC)] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- BTREE_SEARCH |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- SORT_GROUP_BY[$$30] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- HASH_PARTITION_EXCHANGE [$$30] |PARTITIONED| + -- SORT_GROUP_BY[$$23] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| -- To view, visit https://asterix-gerrit.ics.uci.edu/2030 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib353c85bf627d8dd65dba0ea307dee428edb4a26 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Taewoo Kim <wangs...@gmail.com>