http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ulist-jaccard-check-let.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ulist-jaccard-check-let.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ulist-jaccard-check-let.sqlpp new file mode 100644 index 0000000..ceac8c6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/ulist-jaccard-check-let.sqlpp @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on lists. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : {{string}}, + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-complex_ulist-jaccard-check-let.adm"; +select element c +from Customers as c +with jacc as test.`similarity-jaccard-check`(c.interests,['databases','computers','wine'],0.700000f) +where jacc[0] +;
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.aql deleted file mode 100644 index d932d23..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-let.adm"; - -for $o in dataset('DBLP') -let $jacc := similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f) -where $jacc[0] -return $o - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.sqlpp new file mode 100644 index 0000000..e4f3b8d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-let.sqlpp @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-let.adm"; +select element o +from DBLP as o +with jacc as test.`similarity-jaccard-check`(test.`word-tokens`(o.title),test.`word-tokens`('Transactions for Cooperative Environments'),0.500000f) +where jacc[0] +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.aql deleted file mode 100644 index 79fe390..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.aql +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-multi-let.adm"; - -// This test is complex because we have three assigns to drill into. -for $paper in dataset('DBLP') -let $paper_tokens := word-tokens($paper.title) -let $query_tokens := word-tokens("Transactions for Cooperative Environments") -let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.8f) -where $jacc[0] -return {"Paper": $paper_tokens, "Query": $query_tokens } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.sqlpp new file mode 100644 index 0000000..56ede6d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-complex/word-jaccard-check-multi-let.sqlpp @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-multi-let.adm"; +select element {'Paper':paper_tokens,'Query':query_tokens} +from DBLP as paper +with paper_tokens as test.`word-tokens`(paper.title), + query_tokens as test.`word-tokens`('Transactions for Cooperative Environments'), + jacc as test.`similarity-jaccard-check`(paper_tokens,query_tokens,0.800000f) +where jacc[0] +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.aql deleted file mode 100644 index b99bc22..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-edit-distance-inline.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $ed := edit-distance($a.authors, $b.authors) -where $ed < 3 and $a.id < $b.id -return {"aauthors": $a.authors, "bauthors": $b.authors, "ed": $ed} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.sqlpp new file mode 100644 index 0000000..c29de43 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-edit-distance-inline.sqlpp @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-edit-distance-inline.adm"; +select element {'aauthors':a.authors,'bauthors':b.authors,'ed':ed} +from DBLP as a, + DBLP as b +with ed as test.`edit-distance`(a.authors,b.authors) +where ((ed < 3) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.aql deleted file mode 100644 index d4491eb..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.aql +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-fuzzyeq-edit-distance.adm"; - -set simfunction 'edit-distance'; -set simthreshold '3'; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where $a.authors ~= $b.authors and $a.id < $b.id -return {"aauthors": $a.authors, "bauthors": $b.authors} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.sqlpp new file mode 100644 index 0000000..1234824 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-fuzzyeq-edit-distance.sqlpp @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-fuzzyeq-edit-distance.adm"; +set `simfunction` `edit-distance`; + +set `simthreshold` `3`; + +select element {'aauthors':a.authors,'bauthors':b.authors} +from DBLP as a, + CSX as b +where ((a.authors ~= b.authors) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.aql deleted file mode 100644 index 04d57a4..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.aql +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-jaccard-inline.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) -where $jacc >= 0.5f and $a.id < $b.id -return {"atitle": $a.title, "btitle": $b.title, "jacc": $jacc} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.sqlpp new file mode 100644 index 0000000..4fdc637 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline.sqlpp @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. + * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ngram-jaccard-inline.adm"; +select element {'atitle':a.title,'btitle':b.title,'jacc':jacc} +from DBLP as a, + DBLP as b +with jacc as test.`similarity-jaccard`(test.`gram-tokens`(a.title,3,false),test.`gram-tokens`(b.title,3,false)) +where ((jacc >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.aql deleted file mode 100644 index 5d48e10..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.aql +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the edit-distance function of its interest lists. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: [string], - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_olist-edit-distance-inline.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -let $ed := edit-distance($a.interests, $b.interests) -where $ed <= 2 and $a.cid < $b.cid -return {"ainterests": $a.interests, "binterests": $b.interests, "ed": $ed} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.sqlpp new file mode 100644 index 0000000..40c8c5f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-edit-distance-inline.sqlpp @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, Customers, based on the edit-distance function of its interest lists. + * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : [string], + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_olist-edit-distance-inline.adm"; +select element {'ainterests':a.interests,'binterests':b.interests,'ed':ed} +from Customers as a, + Customers as b +with ed as test.`edit-distance`(a.interests,b.interests) +where ((ed <= 2) and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.aql deleted file mode 100644 index d074b0f..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.aql +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest lists. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: [string], - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_olist-jaccard-inline.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -let $jacc := /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) -where $jacc >= 0.7f and $a.cid < $b.cid -return {"ainterests": $a.interests, "binterests": $b.interests, "jacc": $jacc } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.sqlpp new file mode 100644 index 0000000..a003dee --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/olist-jaccard-inline.sqlpp @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest lists. + * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : [string], + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_olist-jaccard-inline.adm"; +select element {'ainterests':a.interests,'binterests':b.interests,'jacc':jacc} +from Customers as a, + Customers as b +with jacc as /*+ indexnl */ test.`similarity-jaccard`(a.interests,b.interests) +where ((jacc >= 0.700000f) and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.aql deleted file mode 100644 index 56697c5..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.aql +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: {{string}}, - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ulist-jaccard-inline.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -let $jacc := /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) -where $jacc >= 0.7f and $a.cid < $b.cid -return {"ainterests": $a.interests, "binterests": $b.interests, "jacc": $jacc} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.sqlpp new file mode 100644 index 0000000..1372490 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/ulist-jaccard-inline.sqlpp @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets. + * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : {{string}}, + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_ulist-jaccard-inline.adm"; +select element {'ainterests':a.interests,'binterests':b.interests,'jacc':jacc} +from Customers as a, + Customers as b +with jacc as /*+ indexnl */ test.`similarity-jaccard`(a.interests,b.interests) +where ((jacc >= 0.700000f) and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.aql deleted file mode 100644 index b917cda..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_word-jaccard-inline.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) -where $jacc >= 0.5f and $a.id < $b.id -return {"atitle": $a.title, "btitle": $b.title, "jacc": $jacc} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.sqlpp new file mode 100644 index 0000000..9530163 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join-noeqjoin/word-jaccard-inline.sqlpp @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens. + * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join-noeqjoin_word-jaccard-inline.adm"; +select element {'atitle':a.title,'btitle':b.title,'jacc':jacc} +from DBLP as a, + DBLP as b +with jacc as test.`similarity-jaccard`(test.`word-tokens`(a.title),test.`word-tokens`(b.title)) +where ((jacc >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.aql deleted file mode 100644 index 8ddd03b..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.aql +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Test that left-outer-join may use an available inverted index in index subtree. - * Issue : 741 - * Expected Res : Success - * Date : 16th May 2014 - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type TwitterUserType as { -screen_name: string, -lang: string, -friends_count: int32, -statuses_count: int32, -name: string, -followers_count: int32 -} - -create type TweetMessageType as { -tweetid: int64, -user: TwitterUserType, -sender_location: point?, -send_time: datetime, -referred_topics: {{ string }}, -message_text: string -} - -create dataset TweetMessages(TweetMessageType) primary key tweetid; - -create index topicIIx on TweetMessages(referred_topics) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_issue741.adm"; - -for $t in dataset('TweetMessages') -where $t.send_time >= datetime('2011-06-18T14:10:17') -and -$t.send_time < datetime('2011-06-18T15:10:17') -return { - "tweet": $t.tweetid, - "similar-tweets": for $t2 in dataset('TweetMessages') - let $sim := similarity-jaccard-check($t.referred_topics, $t2.referred_topics, 0.6f) - where $sim[0] and - $t2.tweetid != $t.tweetid - return $t2.tweetid -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.sqlpp new file mode 100644 index 0000000..5a312b3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/issue741.sqlpp @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test that left-outer-join may use an available inverted index in index subtree. + * Issue : 741 + * Expected Res : Success + * Date : 16th May 2014 + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.TwitterUserType as +{ + screen_name : string, + lang : string, + friends_count : integer, + statuses_count : integer, + name : string, + followers_count : integer +}; + +create type test.TweetMessageType as +{ + tweetid : bigint, + user : TwitterUserType, + sender_location : point?, + send_time : datetime, + referred_topics : {{string}}, + message_text : string +}; + +create dataset TweetMessages(TweetMessageType) primary key tweetid; + +create index topicIIx on TweetMessages (referred_topics) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_issue741.adm"; +select element {'tweet':t.tweetid,'similar-tweets':( + select element t2.tweetid + from TweetMessages as t2 + with sim as test.`similarity-jaccard-check`(t.referred_topics,t2.referred_topics,0.600000f) + where (sim[0] and (t2.tweetid != t.tweetid)) + )} +from TweetMessages as t +where ((t.send_time >= test.datetime('2011-06-18T14:10:17')) and (t.send_time < test.datetime('2011-06-18T15:10:17'))) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql deleted file mode 100644 index bd52bac..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree. - * Issue : 730, 741 - * Expected Res : Success - * Date : 8th May 2014 - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type TwitterUserType as closed { - screen-name: string, - lang: string, - friends-count: int32, - statuses-count: int32, - name: string, - followers-count: int32 -} - -create type TweetMessageType as closed { - tweetid: int64, - user: TwitterUserType, - sender-location: point, - send-time: datetime, - referred-topics: {{ string }}, - message-text: string, - countA: int32, - countB: int32 -} - -create dataset TweetMessages(TweetMessageType) -primary key tweetid; - -create index twmSndLocIx on TweetMessages(sender-location) type rtree; -create index msgCountAIx on TweetMessages(countA) type btree; -create index msgCountBIx on TweetMessages(countB) type btree; -create index msgKeywordIx on TweetMessages(message-text) type keyword; -create index msgNgramIx on TweetMessages(message-text) type ngram(3); -create index topicKeywordIx on TweetMessages(referred-topics) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-edit-distance-check_idx_01.adm"; - -for $t1 in dataset('TweetMessages') -where $t1.tweetid > int64("240") -order by $t1.tweetid -return { - "tweet": {"id": $t1.tweetid, "topics" : $t1.message-text} , - "similar-tweets": for $t2 in dataset('TweetMessages') - let $sim := edit-distance-check($t1.message-text, $t2.message-text, 7) - where $sim[0] and - $t2.tweetid != $t1.tweetid - order by $t2.tweetid - return {"id": $t2.tweetid, "topics" : $t2.message-text} -}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.sqlpp new file mode 100644 index 0000000..8c2869f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.sqlpp @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree. + * Issue : 730, 741 + * Expected Res : Success + * Date : 8th May 2014 + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.TwitterUserType as + closed { + `screen-name` : string, + lang : string, + `friends-count` : integer, + `statuses-count` : integer, + name : string, + `followers-count` : integer +}; + +create type test.TweetMessageType as + closed { + tweetid : bigint, + user : TwitterUserType, + `sender-location` : point, + `send-time` : datetime, + `referred-topics` : {{string}}, + `message-text` : string, + countA : integer, + countB : integer +}; + +create dataset TweetMessages(TweetMessageType) primary key tweetid; + +create index twmSndLocIx on TweetMessages (`sender-location`) type rtree; + +create index msgCountAIx on TweetMessages (countA) type btree; + +create index msgCountBIx on TweetMessages (countB) type btree; + +create index msgKeywordIx on TweetMessages (`message-text`) type keyword; + +create index msgNgramIx on TweetMessages (`message-text`) type ngram (3); + +create index topicKeywordIx on TweetMessages (`referred-topics`) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-edit-distance-check_idx_01.adm"; +select element {'tweet':{'id':t1.tweetid,'topics':t1.`message-text`},'similar-tweets':( + select element {'id':t2.tweetid,'topics':t2.`message-text`} + from TweetMessages as t2 + with sim as test.`edit-distance-check`(t1.`message-text`,t2.`message-text`,7) + where (sim[0] and (t2.tweetid != t1.tweetid)) + order by t2.tweetid + )} +from TweetMessages as t1 +where (t1.tweetid > test.bigint('240')) +order by t1.tweetid +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql deleted file mode 100644 index 27b0e37..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree. - * Issue : 730, 741 - * Expected Res : Success - * Date : 8th May 2014 - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type TwitterUserType as closed { - screen-name: string, - lang: string, - friends-count: int32, - statuses-count: int32, - name: string, - followers-count: int32 -} - -create type TweetMessageType as closed { - tweetid: int64, - user: TwitterUserType, - sender-location: point, - send-time: datetime, - referred-topics: {{ string }}, - message-text: string, - countA: int32, - countB: int32 -} - -create dataset TweetMessages(TweetMessageType) -primary key tweetid; - -create index twmSndLocIx on TweetMessages(sender-location) type rtree; -create index msgCountAIx on TweetMessages(countA) type btree; -create index msgCountBIx on TweetMessages(countB) type btree; -create index msgKeywordIx on TweetMessages(message-text) type keyword; -create index msgNgramIx on TweetMessages(message-text) type ngram(3); -create index topicKeywordIx on TweetMessages(referred-topics) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-jaccard-check_idx_01.adm"; - -for $t1 in dataset('TweetMessages') -where $t1.tweetid > int64("240") -order by $t1.tweetid -return { - "tweet": {"id": $t1.tweetid, "topics" : $t1.referred-topics} , - "similar-tweets": for $t2 in dataset('TweetMessages') - let $sim := similarity-jaccard-check($t1.referred-topics, $t2.referred-topics, 0.5f) - where $sim[0] and - $t2.tweetid != $t1.tweetid - order by $t2.tweetid - return {"id": $t2.tweetid, "topics" : $t2.referred-topics} -}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.sqlpp new file mode 100644 index 0000000..9db0bd7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.sqlpp @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree. + * Issue : 730, 741 + * Expected Res : Success + * Date : 8th May 2014 + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.TwitterUserType as + closed { + `screen-name` : string, + lang : string, + `friends-count` : integer, + `statuses-count` : integer, + name : string, + `followers-count` : integer +}; + +create type test.TweetMessageType as + closed { + tweetid : bigint, + user : TwitterUserType, + `sender-location` : point, + `send-time` : datetime, + `referred-topics` : {{string}}, + `message-text` : string, + countA : integer, + countB : integer +}; + +create dataset TweetMessages(TweetMessageType) primary key tweetid; + +create index twmSndLocIx on TweetMessages (`sender-location`) type rtree; + +create index msgCountAIx on TweetMessages (countA) type btree; + +create index msgCountBIx on TweetMessages (countB) type btree; + +create index msgKeywordIx on TweetMessages (`message-text`) type keyword; + +create index msgNgramIx on TweetMessages (`message-text`) type ngram (3); + +create index topicKeywordIx on TweetMessages (`referred-topics`) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-jaccard-check_idx_01.adm"; +select element {'tweet':{'id':t1.tweetid,'topics':t1.`referred-topics`},'similar-tweets':( + select element {'id':t2.tweetid,'topics':t2.`referred-topics`} + from TweetMessages as t2 + with sim as test.`similarity-jaccard-check`(t1.`referred-topics`,t2.`referred-topics`,0.500000f) + where (sim[0] and (t2.tweetid != t1.tweetid)) + order by t2.tweetid + )} +from TweetMessages as t1 +where (t1.tweetid > test.bigint('240')) +order by t1.tweetid +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.aql deleted file mode 100644 index cf2222d..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.aql +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. - * CSX has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.sqlpp new file mode 100644 index 0000000..a6ade32 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_02.sqlpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. + * CSX has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_02.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where (test.`edit-distance-check`(a.authors,b.authors,3)[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql deleted file mode 100644 index 95b2912..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.aql +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance-check function of its authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.sqlpp new file mode 100644 index 0000000..ed8d6b6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_03.sqlpp @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance-check function of its authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_03.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where (test.`edit-distance-check`(a.authors,b.authors,3)[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.aql deleted file mode 100644 index 5961d13..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance-check function of its authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_04.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $ed := edit-distance-check($a.authors, $b.authors, 3) -where $ed[0] and $a.id < $b.id -return {"arec": $a, "brec": $b, "ed": $ed[1] } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.sqlpp new file mode 100644 index 0000000..7a24772 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance-check_04.sqlpp @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance-check function of its authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_04.adm"; +select element {'arec':a,'brec':b,'ed':ed[1]} +from DBLP as a, + DBLP as b +with ed as test.`edit-distance-check`(a.authors,b.authors,3) +where (ed[0] and (a.id < b.id)) +;