Taewoo Kim has submitted this change and it was merged. Change subject: ASTERIXDB-1700: fixed multiple same type of index application error on the same field ......................................................................
ASTERIXDB-1700: fixed multiple same type of index application error on the same field - Fixed an issue that multiple same type of indexes can be applied for the same field. For this situation, applying only one index will be enough. (e.g., 2-gram and 3-gram index on the same field) Change-Id: I450f3adb20c777d5b9a8f638e010076b9d817942 Reviewed-on: https://asterix-gerrit.ics.uci.edu/1307 Tested-by: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: Jianfeng Jia <[email protected]> --- M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java A asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql A asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql A asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml 7 files changed, 142 insertions(+), 11 deletions(-) Approvals: Jianfeng Jia: Looks good to me, approved Jenkins: Verified; Verified Objections: Jenkins: Violations found diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java index ec29b53..8e78d1a 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -145,9 +146,19 @@ return list.isEmpty() ? null : list.get(0); } + /** + * Choose all indexes that match the given access method. These indexes will be used as index-search + * to replace the given predicates in a SELECT operator. Also, if there are multiple same type of indexes + * on the same field, only of them will be chosen. Allowed cases (AccessMethod, IndexType) are: + * [BTreeAccessMethod , IndexType.BTREE], [RTreeAccessMethod , IndexType.RTREE], + * [InvertedIndexAccessMethod, IndexType.SINGLE_PARTITION_WORD_INVIX || SINGLE_PARTITION_NGRAM_INVIX || + * LENGTH_PARTITIONED_WORD_INVIX || LENGTH_PARTITIONED_NGRAM_INVIX] + */ protected List<Pair<IAccessMethod, Index>> chooseAllIndex( Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) { List<Pair<IAccessMethod, Index>> result = new ArrayList<Pair<IAccessMethod, Index>>(); + // Use variables (fields) to the index types map to check which type of indexes are applied for the vars. + Map<List<Pair<Integer, Integer>>, List<IndexType>> resultVarsToIndexTypesMap = new HashMap<>(); Iterator<Map.Entry<IAccessMethod, AccessMethodAnalysisContext>> amIt = analyzedAMs.entrySet().iterator(); while (amIt.hasNext()) { Map.Entry<IAccessMethod, AccessMethodAnalysisContext> amEntry = amIt.next(); @@ -156,15 +167,6 @@ .iterator(); while (indexIt.hasNext()) { Map.Entry<Index, List<Pair<Integer, Integer>>> indexEntry = indexIt.next(); - // To avoid a case where the chosen access method and a chosen - // index type is different. - // Allowed Case: [BTreeAccessMethod , IndexType.BTREE], - // [RTreeAccessMethod , IndexType.RTREE], - // [InvertedIndexAccessMethod, - // IndexType.SINGLE_PARTITION_WORD_INVIX || - // SINGLE_PARTITION_NGRAM_INVIX || - // LENGTH_PARTITIONED_WORD_INVIX || - // LENGTH_PARTITIONED_NGRAM_INVIX] IAccessMethod chosenAccessMethod = amEntry.getKey(); Index chosenIndex = indexEntry.getKey(); IndexType indexType = chosenIndex.getIndexType(); @@ -172,11 +174,21 @@ || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX || indexType == IndexType.SINGLE_PARTITION_WORD_INVIX || indexType == IndexType.SINGLE_PARTITION_NGRAM_INVIX; - if ((chosenAccessMethod == BTreeAccessMethod.INSTANCE && indexType == IndexType.BTREE) || (chosenAccessMethod == RTreeAccessMethod.INSTANCE && indexType == IndexType.RTREE) || (chosenAccessMethod == InvertedIndexAccessMethod.INSTANCE && isKeywordOrNgramIndexChosen)) { - result.add(new Pair<IAccessMethod, Index>(chosenAccessMethod, chosenIndex)); + if (resultVarsToIndexTypesMap.containsKey(indexEntry.getValue())) { + List<IndexType> appliedIndexTypes = resultVarsToIndexTypesMap.get(indexEntry.getValue()); + if (!appliedIndexTypes.contains(indexType)) { + appliedIndexTypes.add(indexType); + result.add(new Pair<IAccessMethod, Index>(chosenAccessMethod, chosenIndex)); + } + } else { + List<IndexType> addedIndexTypes = new ArrayList<>(); + addedIndexTypes.add(indexType); + resultVarsToIndexTypesMap.put(indexEntry.getValue(), addedIndexTypes); + result.add(new Pair<IAccessMethod, Index>(chosenAccessMethod, chosenIndex)); + } } } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql new file mode 100644 index 0000000..3756b81 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.ddl.aql @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + /* + * Description : Test that conducts an inverted index search on the field with multiple same types of indexes. + * Expected Result : Success + * Issue : ASTERIXDB-1700 + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create nodegroup group1 if not exists on asterix_nc1, asterix_nc2; + +create dataset DBLP(DBLPType) + primary key id on group1; + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql new file mode 100644 index 0000000..88653a2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.2.update.aql @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse test; + +load dataset DBLP +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted; + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql new file mode 100644 index 0000000..0b7ef02 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.3.ddl.aql @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse test; + +create index ngram2_index on DBLP(authors) type ngram(2); + +create index ngram3_index on DBLP(authors) type ngram(3); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql new file mode 100644 index 0000000..b61b765 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.4.query.aql @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse test; + +for $o in dataset('DBLP') +let $ed := edit-distance-check($o.authors, "Amihay Motro", 1) +where $ed[0] +return $o diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm new file mode 100644 index 0000000..528c4a5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/index-selection/inverted-index-ngram-edit-distance-with-two-ngram-index/inverted-index-ngram-edit-distance-with-two-ngram-index.1.adm @@ -0,0 +1 @@ +{ "id": 22, "dblpid": "books/acm/kim95/Motro95", "title": "Management of Uncerainty in database Systems.", "authors": "Amihai Motro", "misc": "2002-01-03 457-476 1995 Modern Database Systems db/books/collections/kim95.html#Motro95" } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml index c5afa97..942b546 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml @@ -2818,6 +2818,11 @@ </compilation-unit> </test-case> <test-case FilePath="index-selection"> + <compilation-unit name="inverted-index-ngram-edit-distance-with-two-ngram-index"> + <output-dir compare="Text">inverted-index-ngram-edit-distance-with-two-ngram-index</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="index-selection"> <compilation-unit name="inverted-index-ngram-edit-distance-word-tokens"> <output-dir compare="Text">inverted-index-ngram-edit-distance-word-tokens</output-dir> </compilation-unit> -- To view, visit https://asterix-gerrit.ics.uci.edu/1307 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: I450f3adb20c777d5b9a8f638e010076b9d817942 Gerrit-PatchSet: 4 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Taewoo Kim <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Jianfeng Jia <[email protected]> Gerrit-Reviewer: Taewoo Kim <[email protected]>
