http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql new file mode 100644 index 0000000..fe8b831 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query with ~= using Jaccard on 3-gram tokens. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-basic_ngram-fuzzyeq-jaccard.adm"; + +set simfunction 'jaccard'; +set simthreshold '0.8f'; + +for $o in dataset('DBLP') +where gram-tokens($o.title, 3, false) ~= gram-tokens("Transactions for Cooperative Environments", 3, false) +return $o
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql new file mode 100644 index 0000000..fe471d9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-basic_ngram-jaccard-check.adm"; + +for $o in dataset('DBLP') +where similarity-jaccard-check(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f)[0] +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql new file mode 100644 index 0000000..60b95d5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard function on 3-gram tokens. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-basic_ngram-jaccard.adm"; + +for $o in dataset('DBLP') +where similarity-jaccard(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false)) >= 0.5f +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql new file mode 100644 index 0000000..24844f8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the contains function. + * The index should *not* be applied (see below). + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-basic_word-contains.adm"; + +// Contains cannot be answered with a word inverted index. +for $o in dataset('DBLP') +where contains($o.title, "Multimedia") +order by $o.id +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql new file mode 100644 index 0000000..f83c315 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword is applied to optimize a selection query with ~= using Jaccard on word tokens. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-basic_word-fuzzyeq-jaccard.adm"; + +set simfunction 'jaccard'; +set simthreshold '0.5f'; + +for $o in dataset('DBLP') +where word-tokens($o.title) ~= word-tokens("Transactions for Cooperative Environments") +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql new file mode 100644 index 0000000..90d11be --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-basic_word-jaccard-check.adm"; + +for $o in dataset('DBLP') +where similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f)[0] +return $o + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql new file mode 100644 index 0000000..2f29131 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard function on word tokens. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-basic_word-jaccard.adm"; + +for $o in dataset('DBLP') +where similarity-jaccard(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments")) >= 0.5f +return $o + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql new file mode 100644 index 0000000..2d89001 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query using + * two edit-distance-check function of which only the first can be optimized with an index. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let-panic-nopanic_01.adm"; + +// Only the first edit-distance-check can be optimized with an index. +for $o in dataset('DBLP') +let $eda := edit-distance-check($o.authors, "Amihay Motro", 3) +let $edb := edit-distance-check($o.authors, "Amihay Motro", 5) +where $eda[0] and $edb[0] +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql new file mode 100644 index 0000000..361f722 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query using + * two edit-distance-check function of which only the second can be optimized with an index. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let-panic-nopanic_01.adm"; + +// Only the second edit-distance-check can be optimized with an index. +for $o in dataset('DBLP') +let $edb := edit-distance-check($o.authors, "Amihay Motro", 5) +let $eda := edit-distance-check($o.authors, "Amihay Motro", 3) +where $edb[0] and $eda[0] +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql new file mode 100644 index 0000000..a438fe6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on strings. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let.adm"; + +for $o in dataset('DBLP') +let $ed := edit-distance-check($o.authors, "Amihay Motro", 1) +where $ed[0] +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql new file mode 100644 index 0000000..0693557 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index index is applied to optimize a selection query using the similarity-edit-distance-check function on the substring of the field. + * Tests that the optimizer rule correctly drills through the substring function. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title: string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-substring.adm"; + +for $paper in dataset('DBLP') +where edit-distance-check(substring($paper.title, 0, 8), "datbase", 1)[0] +return { + "id" : $paper.id, + "title" : $paper.title +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql new file mode 100644 index 0000000..e9ab5af --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_ngram-jaccard-check-let.adm"; + +for $o in dataset('DBLP') +let $jacc := similarity-jaccard-check(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f) +where $jacc[0] +return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql new file mode 100644 index 0000000..0d1e304 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_ngram-jaccard-check-multi-let.adm"; + +// This test is complex because we have three assigns to drill into. +for $paper in dataset('DBLP') +let $paper_tokens := gram-tokens($paper.title, 3, false) +let $query_tokens := gram-tokens("Transactions for Cooperative Environments", 3, false) +let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f) +where $jacc[0] +return {"Paper": $paper_tokens, "Query": $query_tokens } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql new file mode 100644 index 0000000..bc1fe5d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-let.adm"; + +for $o in dataset('DBLP') +let $jacc := similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f) +where $jacc[0] +return $o + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql new file mode 100644 index 0000000..47ffce4 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. + * Tests that the optimizer rule correctly drills through the let clauses. + * The index should be applied. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-multi-let.adm"; + +// This test is complex because we have three assigns to drill into. +for $paper in dataset('DBLP') +let $paper_tokens := word-tokens($paper.title) +let $query_tokens := word-tokens("Transactions for Cooperative Environments") +let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.8f) +where $jacc[0] +return {"Paper": $paper_tokens, "Query": $query_tokens } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql new file mode 100644 index 0000000..b482b48 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree. + * Issue : 730, 741 + * Expected Res : Success + * Date : 8th May 2014 + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type TwitterUserType as closed { + screen-name: string, + lang: string, + friends-count: int32, + statuses-count: int32, + name: string, + followers-count: int32 +} + +create type TweetMessageType as open { + tweetid: int64, + user: TwitterUserType, + sender-location: point, + send-time: datetime, + referred-topics: {{ string }}, + countA: int32, + countB: int32 +} + +create dataset TweetMessages(TweetMessageType) +primary key tweetid; + +create index msgNgramIx on TweetMessages(message-text: string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-edit-distance-check_idx_01.adm"; + +for $t1 in dataset('TweetMessages') +where $t1.tweetid > int64("240") +order by $t1.tweetid +return { + "tweet": {"id": $t1.tweetid, "topics" : $t1.message-text} , + "similar-tweets": for $t2 in dataset('TweetMessages') + let $sim := edit-distance-check($t1.message-text, $t2.message-text, 7) + where $sim[0] and + $t2.tweetid != $t1.tweetid + order by $t2.tweetid + return {"id": $t2.tweetid, "topics" : $t2.message-text} +}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql new file mode 100644 index 0000000..a5cb417 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. + * CSX has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as closed { + id: int32, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_02.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql new file mode 100644 index 0000000..c689120 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance-check function of its authors. + * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_03.adm"; + +for $a in dataset('DBLP') +for $b in dataset('DBLP') +where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql new file mode 100644 index 0000000..68c255e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. + * DBLP and CSX both have a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index_DBLP on DBLP(authors:string?) type ngram(3) enforced; + +create index ngram_index_CSX on CSX(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql new file mode 100644 index 0000000..860461d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. + * CSX has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as closed { + id: int32, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_02.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql new file mode 100644 index 0000000..19a2cfa --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. + * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_03.adm"; + +for $a in dataset('DBLP') +for $b in dataset('DBLP') +where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql new file mode 100644 index 0000000..b2bf9cf --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. + * DBLP and CSX both have a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index_DBLP on DBLP(authors:string?) type ngram(3) enforced; + +create index ngram_index_CSX on CSX(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql new file mode 100644 index 0000000..7cc50d1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors. + * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as closed { + id: int32, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm"; + +set simfunction 'edit-distance'; +set simthreshold '3'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where $a.authors ~= $b.authors and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql new file mode 100644 index 0000000..956913d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on ~= using edit distance of its authors. + * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + title: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_03.adm"; + +set simfunction 'edit-distance'; +set simthreshold '3'; + +for $a in dataset('DBLP') +for $b in dataset('DBLP') +where $a.authors ~= $b.authors and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql new file mode 100644 index 0000000..237ac57 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, closed DBLP and open CSX, based on ~= using Jaccard their titles' 3-gram tokens. + * CSX has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as closed { + id: int32, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_02.adm"; + +set simfunction 'jaccard'; +set simthreshold '0.5f'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql new file mode 100644 index 0000000..6ef70de --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' 3-gram tokens. + * DBLP has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_03.adm"; + +set simfunction 'jaccard'; +set simthreshold '0.5f'; + +for $a in dataset('DBLP') +for $b in dataset('DBLP') +where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql new file mode 100644 index 0000000..bbaa284 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, closed DBLP and open CSX, based the similarity-jaccard-check function of their titles' 3-gram tokens. + * CSX has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as closed { + id: int32, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_02.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] + and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql new file mode 100644 index 0000000..a0f8683 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins an open dataset DBLP, based on the similarity-jaccard-check function of its titles' 3-gram tokens. + * DBLP has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_03.adm"; + +for $a in dataset('DBLP') +for $b in dataset('DBLP') +where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] + and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql new file mode 100644 index 0000000..26e2504 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard-check function of their titles' 3-gram tokens. + * DBLP and CSX both have a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index_DBLP on DBLP(title:string?) type ngram(3) enforced; + +create index ngram_index_CSX on CSX(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_02.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] + and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql new file mode 100644 index 0000000..82b31c3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard function of their titles' 3-gram tokens. + * CSX has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as closed { + id: int32, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_02.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f + and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql new file mode 100644 index 0000000..e702d56 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. + * DBLP has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_03.adm"; + +for $a in dataset('DBLP') +for $b in dataset('DBLP') +where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f + and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql new file mode 100644 index 0000000..e1a9164 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard function of their titles' 3-gram tokens. + * DBLP and CSX both have a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; +set import-private-functions 'true'; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index_DBLP on DBLP(title:string?) type ngram(3) enforced; + +create index ngram_index_CSX on CSX(title:string?) type ngram(3) enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_02.adm"; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f + and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql new file mode 100644 index 0000000..3990354 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens. + * CSX has an enforced open keyword index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as closed { + id: int32, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as open { + id: int32, + csxid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index keyword_index on CSX(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_02.adm"; + +set simfunction 'jaccard'; +set simthreshold '0.5f'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id +return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/c4dbb614/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_03.aql new file mode 100644 index 0000000..9cccbf0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_03.aql @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' word tokens. + * DBLP has an enforced open keyword index on title?, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPType as open { + id: int32, + dblpid: string, + authors: string, + misc: string +} + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP(title:string?) type keyword enforced; + +write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_03.adm"; + +set simfunction 'jaccard'; +set simthreshold '0.5f'; + +for $a in dataset('DBLP') +for $b in dataset('DBLP') +where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id +return {"arec": $a, "brec": $b }
