http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-edit-distance.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-edit-distance.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-edit-distance.aql deleted file mode 100644 index 5392b22..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-edit-distance.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query with ~= using edit-distance on strings. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_ngram-fuzzyeq-edit-distance.adm"; - -set simfunction 'edit-distance'; -set simthreshold '1'; - -for $o in dataset('DBLP') -where $o.authors ~= "Amihay Motro" -return $o
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql deleted file mode 100644 index fe8b831..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-fuzzyeq-jaccard.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query with ~= using Jaccard on 3-gram tokens. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_ngram-fuzzyeq-jaccard.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.8f'; - -for $o in dataset('DBLP') -where gram-tokens($o.title, 3, false) ~= gram-tokens("Transactions for Cooperative Environments", 3, false) -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql deleted file mode 100644 index fe471d9..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard-check.aql +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_ngram-jaccard-check.adm"; - -for $o in dataset('DBLP') -where similarity-jaccard-check(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f)[0] -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql deleted file mode 100644 index 60b95d5..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/ngram-jaccard.aql +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard function on 3-gram tokens. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_ngram-jaccard.adm"; - -for $o in dataset('DBLP') -where similarity-jaccard(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false)) >= 0.5f -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql deleted file mode 100644 index 24844f8..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-contains.aql +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the contains function. - * The index should *not* be applied (see below). - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title:string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_word-contains.adm"; - -// Contains cannot be answered with a word inverted index. -for $o in dataset('DBLP') -where contains($o.title, "Multimedia") -order by $o.id -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql deleted file mode 100644 index f83c315..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-fuzzyeq-jaccard.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword is applied to optimize a selection query with ~= using Jaccard on word tokens. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title:string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_word-fuzzyeq-jaccard.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $o in dataset('DBLP') -where word-tokens($o.title) ~= word-tokens("Transactions for Cooperative Environments") -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql deleted file mode 100644 index 90d11be..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard-check.aql +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title:string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_word-jaccard-check.adm"; - -for $o in dataset('DBLP') -where similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f)[0] -return $o - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql deleted file mode 100644 index 2f29131..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-basic/word-jaccard.aql +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard function on word tokens. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title:string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-basic_word-jaccard.adm"; - -for $o in dataset('DBLP') -where similarity-jaccard(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments")) >= 0.5f -return $o - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql deleted file mode 100644 index 2d89001..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_01.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query using - * two edit-distance-check function of which only the first can be optimized with an index. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let-panic-nopanic_01.adm"; - -// Only the first edit-distance-check can be optimized with an index. -for $o in dataset('DBLP') -let $eda := edit-distance-check($o.authors, "Amihay Motro", 3) -let $edb := edit-distance-check($o.authors, "Amihay Motro", 5) -where $eda[0] and $edb[0] -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql deleted file mode 100644 index 361f722..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let-panic-nopanic_02.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query using - * two edit-distance-check function of which only the second can be optimized with an index. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let-panic-nopanic_01.adm"; - -// Only the second edit-distance-check can be optimized with an index. -for $o in dataset('DBLP') -let $edb := edit-distance-check($o.authors, "Amihay Motro", 5) -let $eda := edit-distance-check($o.authors, "Amihay Motro", 3) -where $edb[0] and $eda[0] -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql deleted file mode 100644 index a438fe6..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-let.aql +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on strings. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let.adm"; - -for $o in dataset('DBLP') -let $ed := edit-distance-check($o.authors, "Amihay Motro", 1) -where $ed[0] -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql deleted file mode 100644 index 0693557..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-edit-distance-check-substring.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index index is applied to optimize a selection query using the similarity-edit-distance-check function on the substring of the field. - * Tests that the optimizer rule correctly drills through the substring function. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-substring.adm"; - -for $paper in dataset('DBLP') -where edit-distance-check(substring($paper.title, 0, 8), "datbase", 1)[0] -return { - "id" : $paper.id, - "title" : $paper.title -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql deleted file mode 100644 index e9ab5af..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-let.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_ngram-jaccard-check-let.adm"; - -for $o in dataset('DBLP') -let $jacc := similarity-jaccard-check(gram-tokens($o.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f) -where $jacc[0] -return $o http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql deleted file mode 100644 index 0d1e304..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/ngram-jaccard-check-multi-let.aql +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_ngram-jaccard-check-multi-let.adm"; - -// This test is complex because we have three assigns to drill into. -for $paper in dataset('DBLP') -let $paper_tokens := gram-tokens($paper.title, 3, false) -let $query_tokens := gram-tokens("Transactions for Cooperative Environments", 3, false) -let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f) -where $jacc[0] -return {"Paper": $paper_tokens, "Query": $query_tokens } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql deleted file mode 100644 index bc1fe5d..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-let.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title:string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-let.adm"; - -for $o in dataset('DBLP') -let $jacc := similarity-jaccard-check(word-tokens($o.title), word-tokens("Transactions for Cooperative Environments"), 0.5f) -where $jacc[0] -return $o - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql deleted file mode 100644 index 47ffce4..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-complex/word-jaccard-check-multi-let.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens. - * Tests that the optimizer rule correctly drills through the let clauses. - * The index should be applied. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title:string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-complex_word-jaccard-check-multi-let.adm"; - -// This test is complex because we have three assigns to drill into. -for $paper in dataset('DBLP') -let $paper_tokens := word-tokens($paper.title) -let $query_tokens := word-tokens("Transactions for Cooperative Environments") -let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.8f) -where $jacc[0] -return {"Paper": $paper_tokens, "Query": $query_tokens } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql deleted file mode 100644 index b482b48..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree. - * Issue : 730, 741 - * Expected Res : Success - * Date : 8th May 2014 - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type TwitterUserType as closed { - screen-name: string, - lang: string, - friends-count: int32, - statuses-count: int32, - name: string, - followers-count: int32 -} - -create type TweetMessageType as open { - tweetid: int64, - user: TwitterUserType, - sender-location: point, - send-time: datetime, - referred-topics: {{ string }}, - countA: int32, - countB: int32 -} - -create dataset TweetMessages(TweetMessageType) -primary key tweetid; - -create index msgNgramIx on TweetMessages(message-text: string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-edit-distance-check_idx_01.adm"; - -for $t1 in dataset('TweetMessages') -where $t1.tweetid > int64("240") -order by $t1.tweetid -return { - "tweet": {"id": $t1.tweetid, "topics" : $t1.message-text} , - "similar-tweets": for $t2 in dataset('TweetMessages') - let $sim := edit-distance-check($t1.message-text, $t2.message-text, 7) - where $sim[0] and - $t2.tweetid != $t1.tweetid - order by $t2.tweetid - return {"id": $t2.tweetid, "topics" : $t2.message-text} -}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql deleted file mode 100644 index a5cb417..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_02.aql +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. - * CSX has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql deleted file mode 100644 index c689120..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_03.aql +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance-check function of its authors. - * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql deleted file mode 100644 index 68c255e..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance-check_04.aql +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors. - * DBLP and CSX both have a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index_DBLP on DBLP(authors:string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance-check($a.authors, $b.authors, 3)[0] and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql deleted file mode 100644 index 860461d..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_02.aql +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. - * CSX has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql deleted file mode 100644 index 19a2cfa..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_03.aql +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. - * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql deleted file mode 100644 index b2bf9cf..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-edit-distance_04.aql +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. - * DBLP and CSX both have a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index_DBLP on DBLP(authors:string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql deleted file mode 100644 index 7cc50d1..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors. - * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm"; - -set simfunction 'edit-distance'; -set simthreshold '3'; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where $a.authors ~= $b.authors and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql deleted file mode 100644 index 956913d..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on ~= using edit distance of its authors. - * DBLP has a 3-gram enforced open index on authors?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as open { - id: int32, - dblpid: string, - title: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_03.adm"; - -set simfunction 'edit-distance'; -set simthreshold '3'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where $a.authors ~= $b.authors and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql deleted file mode 100644 index 237ac57..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, closed DBLP and open CSX, based on ~= using Jaccard their titles' 3-gram tokens. - * CSX has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_02.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql deleted file mode 100644 index 6ef70de..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' 3-gram tokens. - * DBLP has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_03.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql deleted file mode 100644 index bbaa284..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_02.aql +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, closed DBLP and open CSX, based the similarity-jaccard-check function of their titles' 3-gram tokens. - * CSX has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql deleted file mode 100644 index a0f8683..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_03.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins an open dataset DBLP, based on the similarity-jaccard-check function of its titles' 3-gram tokens. - * DBLP has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql deleted file mode 100644 index 26e2504..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard-check_04.aql +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard-check function of their titles' 3-gram tokens. - * DBLP and CSX both have a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index_DBLP on DBLP(title:string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql deleted file mode 100644 index 82b31c3..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_02.aql +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard function of their titles' 3-gram tokens. - * CSX has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql deleted file mode 100644 index e702d56..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_03.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. - * DBLP has a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql deleted file mode 100644 index e1a9164..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/ngram-jaccard_04.aql +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard function of their titles' 3-gram tokens. - * DBLP and CSX both have a 3-gram enforced open index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as open { - id: int32, - dblpid: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index_DBLP on DBLP(title:string?) type ngram(3) enforced; - -create index ngram_index_CSX on CSX(title:string?) type ngram(3) enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/f372c96d/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql deleted file mode 100644 index 3990354..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/open-index-enforced/inverted-index-join/word-fuzzyeq-jaccard_02.aql +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens. - * CSX has an enforced open keyword index on title?, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as open { - id: int32, - csxid: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index keyword_index on CSX(title:string?) type keyword enforced; - -write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_02.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id -return {"arec": $a, "brec": $b }
