http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.aql deleted file mode 100644 index c1a1817..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.aql +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. - * CSX has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id -return {"arec": $a, "brec": $b }
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.sqlpp new file mode 100644 index 0000000..950a058 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_02.sqlpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors. + * CSX has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_02.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where ((test.`edit-distance`(a.authors,b.authors) < 3) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.aql deleted file mode 100644 index c65b61b..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.aql +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where edit-distance($a.authors, $b.authors) < 3 and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.sqlpp new file mode 100644 index 0000000..dc8ca3c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_03.sqlpp @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_03.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where ((test.`edit-distance`(a.authors,b.authors) < 3) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.aql deleted file mode 100644 index 6287cb0..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $ed := edit-distance($a.authors, $b.authors) -where $ed < 3 and $a.id < $b.id -return {"arec": $a, "brec": $b, "ed": $ed} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.sqlpp new file mode 100644 index 0000000..0284ff4 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-edit-distance_04.sqlpp @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-edit-distance_03.adm"; +select element {'arec':a,'brec':b,'ed':ed} +from DBLP as a, + DBLP as b +with ed as test.`edit-distance`(a.authors,b.authors) +where ((ed < 3) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql deleted file mode 100644 index 66bb2ac..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm"; - -set simfunction 'edit-distance'; -set simthreshold '3'; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where $a.authors ~= $b.authors and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.sqlpp new file mode 100644 index 0000000..2021605 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_01.sqlpp @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm"; +set `simfunction` `edit-distance`; + +set `simthreshold` `3`; + +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where ((a.authors ~= b.authors) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql deleted file mode 100644 index c611d3a..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on ~= using edit distance of its authors. - * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(authors) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_03.adm"; - -set simfunction 'edit-distance'; -set simthreshold '3'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where $a.authors ~= $b.authors and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.sqlpp new file mode 100644 index 0000000..d900a72 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-edit-distance_03.sqlpp @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on ~= using edit distance of its authors. + * DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (authors) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_03.adm"; +set `simfunction` `edit-distance`; + +set `simthreshold` `3`; + +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where ((a.authors ~= b.authors) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql deleted file mode 100644 index 085a383..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.aql +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard their titles' 3-gram tokens. - * CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_02.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.sqlpp new file mode 100644 index 0000000..aac9ca7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_02.sqlpp @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard their titles' 3-gram tokens. + * CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_02.adm"; +set `simfunction` `jaccard`; + +set `simthreshold` `0.5f`; + +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where ((test.`gram-tokens`(a.title,3,false) ~= test.`gram-tokens`(b.title,3,false)) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql deleted file mode 100644 index 444b735..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.aql +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_03.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where gram-tokens($a.title, 3, false) ~= gram-tokens($b.title, 3, false) and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.sqlpp new file mode 100644 index 0000000..d3bcd49 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-fuzzyeq-jaccard_03.sqlpp @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' 3-gram tokens. + * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_03.adm"; +set `simfunction` `jaccard`; + +set `simthreshold` `0.5f`; + +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where ((test.`gram-tokens`(a.title,3,false) ~= test.`gram-tokens`(b.title,3,false)) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.aql deleted file mode 100644 index 31ec7a5..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.aql +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard-check function of their titles' 3-gram tokens. - * CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.sqlpp new file mode 100644 index 0000000..460f1cc --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_02.sqlpp @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard-check function of their titles' 3-gram tokens. + * CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_02.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where (test.`similarity-jaccard-check`(test.`gram-tokens`(a.title,3,false),test.`gram-tokens`(b.title,3,false),0.500000f)[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.aql deleted file mode 100644 index 6255558..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f)[0] - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.sqlpp new file mode 100644 index 0000000..5e180f2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_03.sqlpp @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' 3-gram tokens. + * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_03.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where (test.`similarity-jaccard-check`(test.`gram-tokens`(a.title,3,false),test.`gram-tokens`(b.title,3,false),0.500000f)[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.aql deleted file mode 100644 index f70df08..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_04.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard-check(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false), 0.5f) -where $jacc[0] and $a.id < $b.id -return {"arec": $a, "brec": $b, "jacc": $jacc[1] } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.sqlpp new file mode 100644 index 0000000..2b9aa31 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard-check_04.sqlpp @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' 3-gram tokens. + * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard-check_04.adm"; +select element {'arec':a,'brec':b,'jacc':jacc[1]} +from DBLP as a, + DBLP as b +with jacc as test.`similarity-jaccard-check`(test.`gram-tokens`(a.title,3,false),test.`gram-tokens`(b.title,3,false),0.500000f) +where (jacc[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.aql deleted file mode 100644 index 2e1b065..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.aql +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard function of their titles' 3-gram tokens. - * CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index ngram_index on CSX(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.sqlpp new file mode 100644 index 0000000..a2dc8f2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_02.sqlpp @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based the similarity-jaccard function of their titles' 3-gram tokens. + * CSX has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index ngram_index on CSX (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_02.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where ((test.`similarity-jaccard`(test.`gram-tokens`(a.title,3,false),test.`gram-tokens`(b.title,3,false)) >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.aql deleted file mode 100644 index 31003d4..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) >= 0.5f - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.sqlpp new file mode 100644 index 0000000..fbc230c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_03.sqlpp @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. + * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_03.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where ((test.`similarity-jaccard`(test.`gram-tokens`(a.title,3,false),test.`gram-tokens`(b.title,3,false)) >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.aql deleted file mode 100644 index 1e86763..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. - * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; -set import-private-functions 'true'; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index ngram_index on DBLP(title) type ngram(3); - -write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_04.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard(gram-tokens($a.title, 3, false), gram-tokens($b.title, 3, false)) -where $jacc >= 0.5f and $a.id < $b.id -return {"arec": $a, "brec": $b, "jacc": $jacc } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.sqlpp new file mode 100644 index 0000000..5265874 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ngram-jaccard_04.sqlpp @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens. + * DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +set `import-private-functions` `true`; + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index ngram_index on DBLP (title) type ngram (3); + +write output to asterix_nc1:"rttest/inverted-index-join_ngram-jaccard_04.adm"; +select element {'arec':a,'brec':b,'jacc':jacc} +from DBLP as a, + DBLP as b +with jacc as test.`similarity-jaccard`(test.`gram-tokens`(a.title,3,false),test.`gram-tokens`(b.title,3,false)) +where ((jacc >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.aql deleted file mode 100644 index f074ad1..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.aql +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance-check function of their interest lists. - * Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: [string], - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create dataset Customers2(CustomerType) primary key cid; - -create index interests_index on Customers2(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance-check_02.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers2') -where edit-distance-check($a.interests, $b.interests, 3)[0] and $a.cid < $b.cid -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.sqlpp new file mode 100644 index 0000000..f7e6047 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_02.sqlpp @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance-check function of their interest lists. + * Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : [string], + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create dataset Customers2(CustomerType) primary key cid; + +create index interests_index on Customers2 (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance-check_02.adm"; +select element {'arec':a,'brec':b} +from Customers as a, + Customers2 as b +where (test.`edit-distance-check`(a.interests,b.interests,3)[0] and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.aql deleted file mode 100644 index 723d0b5..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.aql +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the edit-distance-check function of its interest lists. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: [string], - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance-check_03.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -where edit-distance-check($a.interests, $b.interests, 3)[0] and $a.cid < $b.cid -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.sqlpp new file mode 100644 index 0000000..6435990 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_03.sqlpp @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, Customers, based on the edit-distance-check function of its interest lists. + * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : [string], + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance-check_03.adm"; +select element {'arec':a,'brec':b} +from Customers as a, + Customers as b +where (test.`edit-distance-check`(a.interests,b.interests,3)[0] and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.aql deleted file mode 100644 index 0bb95d5..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.aql +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the edit-distance-check function of its interest lists. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: [string], - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance-check_04.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -let $ed := edit-distance-check($a.interests, $b.interests, 3) -where $ed[0] and $a.cid < $b.cid -return {"arec": $a, "brec": $b, "ed": $ed[1] } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.sqlpp new file mode 100644 index 0000000..9f14368 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance-check_04.sqlpp @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, Customers, based on the edit-distance-check function of its interest lists. + * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : [string], + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance-check_04.adm"; +select element {'arec':a,'brec':b,'ed':ed[1]} +from Customers as a, + Customers as b +with ed as test.`edit-distance-check`(a.interests,b.interests,3) +where (ed[0] and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.aql deleted file mode 100644 index e5f5b40..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.aql +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance function of their interest lists. - * Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: [string], - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create dataset Customers2(CustomerType) primary key cid; - -create index interests_index on Customers2(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance_02.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers2') -where edit-distance($a.interests, $b.interests) <= 2 and $a.cid < $b.cid -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.sqlpp new file mode 100644 index 0000000..5ebb518 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_02.sqlpp @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance function of their interest lists. + * Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : [string], + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create dataset Customers2(CustomerType) primary key cid; + +create index interests_index on Customers2 (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance_02.adm"; +select element {'arec':a,'brec':b} +from Customers as a, + Customers2 as b +where ((test.`edit-distance`(a.interests,b.interests) <= 2) and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_03.aql deleted file mode 100644 index f78a6bd..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/olist-edit-distance_03.aql +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the edit-distance function of its interest lists. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: [string], - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_olist-edit-distance_03.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -where edit-distance($a.interests, $b.interests) <= 2 and $a.cid < $b.cid -return {"arec": $a, "brec": $b }