http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.4.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.4.query.aql new file mode 100644 index 0000000..f1f2497 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.1.2/ngram-jaccard-inline.4.query.aql @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; +set import-private-functions 'true'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where gram-tokens($a.nested.title, 3, false) ~= gram-tokens($b.nested.title, 3, false) and $a.nested.id < $b.nested.id +order by $a.nested.id, $b.nested.id +return { "arec": $a.nested, "brec": $b.nested }
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.1.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.1.ddl.aql new file mode 100644 index 0000000..ad4db35 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.1.ddl.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens. + * We expect the join to be transformed into a prefix-based fuzzy join following with a fuzzy select and < select. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPNestedType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type DBLPType as closed { + nested: DBLPNestedType +} + +create type CSXNestedType as closed { + id: int64, + csxid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as closed { + nested: CSXNestedType +} + +create dataset DBLPtmp(DBLPNestedType) primary key id; +create dataset CSXtmp(CSXNestedType) primary key id; + +create dataset DBLP(DBLPType) primary key nested.id; +create dataset CSX(CSXType) primary key nested.id; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.2.update.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.2.update.aql new file mode 100644 index 0000000..a2633b1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.2.update.aql @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +load dataset DBLPtmp +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted; + +load dataset CSXtmp +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + +insert into dataset DBLP( + for $x in dataset DBLPtmp + return { + "nested": $x + } +); + +insert into dataset CSX( + for $x in dataset CSXtmp + return { + "nested": $x + } +); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.3.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.3.ddl.aql new file mode 100644 index 0000000..0359448 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.3.ddl.aql @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +create index keyword_index on DBLP(nested.title) type keyword; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.4.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.4.query.aql new file mode 100644 index 0000000..44a38d0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.1/word-jaccard.4.query.aql @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where word-tokens($a.nested.title) ~= word-tokens($b.nested.title) + and word-tokens($a.nested.misc) ~= word-tokens($b.nested.misc) + and $a.nested.id < $b.nested.id +order by $a.nested.id, $b.nested.id +return { "arec": $a.nested, "brec": $b.nested } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.1.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.1.ddl.aql new file mode 100644 index 0000000..28d554f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.1.ddl.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' 3-gram tokens. + * We expect the join to be transformed into a prefix-based fuzzy join following with a fuzzy select plus a < select. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPNestedType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type DBLPType as closed { + nested: DBLPNestedType +} + +create type CSXNestedType as closed { + id: int64, + csxid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as closed { + nested: CSXNestedType +} + +create dataset DBLPtmp(DBLPNestedType) primary key id; +create dataset CSXtmp(CSXNestedType) primary key id; + +create dataset DBLP(DBLPType) primary key nested.id; +create dataset CSX(CSXType) primary key nested.id; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.2.update.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.2.update.aql new file mode 100644 index 0000000..a2633b1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.2.update.aql @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +load dataset DBLPtmp +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted; + +load dataset CSXtmp +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + +insert into dataset DBLP( + for $x in dataset DBLPtmp + return { + "nested": $x + } +); + +insert into dataset CSX( + for $x in dataset CSXtmp + return { + "nested": $x + } +); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.3.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.3.ddl.aql new file mode 100644 index 0000000..9307af9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.3.ddl.aql @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +create index ngram_index on DBLP(nested.title) type ngram(3); + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.4.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.4.query.aql new file mode 100644 index 0000000..38eaed6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.2.2/ngram-jaccard-inline.4.query.aql @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; +set import-private-functions 'true'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +where gram-tokens($a.nested.title, 3, false) ~= gram-tokens($b.nested.title, 3, false) + and gram-tokens($a.nested.authors, 3, false) ~= gram-tokens($b.nested.authors, 3, false) + and $a.nested.id < $b.nested.id +order by $a.nested.id, $b.nested.id +return { "arec": $a.nested, "brec": $b.nested } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.1.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.1.ddl.aql new file mode 100644 index 0000000..3dd5e5a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.1.ddl.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Three-way fuzzy joins two datasets, DBLP and CSX, CSX, based on the similarity-jaccard function of their two fields. + * We expect the join will be transformed onto two two-way fuzzy joins based on prefix filtering strategies and a < select. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPNestedType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type DBLPType as closed { + nested: DBLPNestedType +} + +create type CSXNestedType as closed { + id: int64, + csxid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as closed { + nested: CSXNestedType +} + +create dataset DBLPtmp(DBLPNestedType) primary key id; +create dataset CSXtmp(CSXNestedType) primary key id; + +create dataset DBLP(DBLPType) primary key nested.id; +create dataset CSX(CSXType) primary key nested.id; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.2.update.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.2.update.aql new file mode 100644 index 0000000..629c74b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.2.update.aql @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +load dataset DBLPtmp +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted; + +load dataset CSXtmp +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + + +insert into dataset DBLP( + for $x in dataset DBLPtmp + return { + "nested": $x + } +); + +insert into dataset CSX( + for $x in dataset CSXtmp + return { + "nested": $x + } +); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.3.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.3.ddl.aql new file mode 100644 index 0000000..0359448 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.3.ddl.aql @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +create index keyword_index on DBLP(nested.title) type keyword; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.4.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.4.query.aql new file mode 100644 index 0000000..18795c2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.1/word-jaccard.4.query.aql @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +for $c in dataset('CSX') +where word-tokens($a.nested.title) /* +indexnl */ ~= word-tokens($b.nested.title) + and word-tokens($a.nested.misc) ~= word-tokens($c.nested.misc) + and $a.nested.id < $b.nested.id +order by $a.nested.id, $b.nested.id +return { "arec": $a.nested, "brec": $b.nested } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.1.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.1.ddl.aql new file mode 100644 index 0000000..309b6d8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.1.ddl.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Three-way fuzzy joins on two datasets, DBLP and CSX, based on the similarity-jaccard function of their fields' 3-gram tokens. + * We expect the join will be transformed onto two two-way fuzzy joins over the 3-gram tokens following with a < select. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPNestedType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type DBLPType as closed { + nested: DBLPNestedType +} + +create type CSXNestedType as closed { + id: int64, + csxid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as closed { + nested: CSXNestedType +} + +create dataset DBLPtmp(DBLPNestedType) primary key id; +create dataset CSXtmp(CSXNestedType) primary key id; + +create dataset DBLP(DBLPType) primary key nested.id; +create dataset CSX(CSXType) primary key nested.id; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.2.update.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.2.update.aql new file mode 100644 index 0000000..629c74b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.2.update.aql @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +load dataset DBLPtmp +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted; + +load dataset CSXtmp +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + + +insert into dataset DBLP( + for $x in dataset DBLPtmp + return { + "nested": $x + } +); + +insert into dataset CSX( + for $x in dataset CSXtmp + return { + "nested": $x + } +); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.3.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.3.ddl.aql new file mode 100644 index 0000000..9307af9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.3.ddl.aql @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +create index ngram_index on DBLP(nested.title) type ngram(3); + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.4.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.4.query.aql new file mode 100644 index 0000000..d514b49 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.3.2/ngram-jaccard-inline.4.query.aql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; +set import-private-functions 'true'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +for $c in dataset('DBLP') +where gram-tokens($a.nested.title, 3, false) /* +indexnl */ ~= gram-tokens($b.nested.title, 3, false) + and gram-tokens($b.nested.authors, 3, false) ~= gram-tokens($c.nested.authors, 3, false) + and $a.nested.id < $b.nested.id +order by $a.nested.id, $b.nested.id +return { "arec": $a.nested, "brec": $b.nested } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.1.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.1.ddl.aql new file mode 100644 index 0000000..d434cb8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.1.ddl.aql @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets in four ways, DBLP and CSX, based on the similarity-jaccard function of their titles/authors' word/ngram tokens. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPNestedType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type DBLPType as closed { + nested: DBLPNestedType +} + +create type CSXNestedType as closed { + id: int64, + csxid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as closed { + nested: CSXNestedType +} + +create dataset DBLPtmp(DBLPNestedType) primary key id; +create dataset CSXtmp(CSXNestedType) primary key id; + +create dataset DBLP(DBLPType) primary key nested.id; +create dataset CSX(CSXType) primary key nested.id; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.2.update.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.2.update.aql new file mode 100644 index 0000000..a2633b1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.2.update.aql @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +load dataset DBLPtmp +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted; + +load dataset CSXtmp +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + +insert into dataset DBLP( + for $x in dataset DBLPtmp + return { + "nested": $x + } +); + +insert into dataset CSX( + for $x in dataset CSXtmp + return { + "nested": $x + } +); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.3.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.3.ddl.aql new file mode 100644 index 0000000..0359448 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.3.ddl.aql @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +create index keyword_index on DBLP(nested.title) type keyword; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.4.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.4.query.aql new file mode 100644 index 0000000..5deffe7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.1/word-jaccard.4.query.aql @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; +set import-private-functions 'true'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +for $c in dataset('CSX') +for $d in dataset('CSX') +where word-tokens($a.nested.title) /* +indexnl */ ~= word-tokens($b.nested.title) + and word-tokens($a.nested.authors) /* +indexnl */ ~= word-tokens($c.nested.authors) + and gram-tokens($a.nested.authors, 3, false) ~= gram-tokens($d.nested.authors, 3, false) + and $a.nested.id < $b.nested.id +order by $a.nested.id, $b.nested.id, $c.nested.id, $d.nested.id +return { "arec": $a.nested, "brec": $b.nested, "cred": $c.nested, "drec": $d.nested } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.1.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.1.ddl.aql new file mode 100644 index 0000000..d437456 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.1.ddl.aql @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins four datasets in four ways, i.e. DBLP, CSX, DBLPtmp, CSXtmp, + * based on the similarity-jaccard function of their titles/authors' word/ngram tokens. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; +use dataverse test; + +create type DBLPNestedType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type DBLPType as closed { + nested: DBLPNestedType +} + +create type CSXNestedType as closed { + id: int64, + csxid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as closed { + nested: CSXNestedType +} + +create dataset DBLPtmp(DBLPNestedType) primary key id; +create dataset CSXtmp(CSXNestedType) primary key id; + +create dataset DBLP(DBLPType) primary key nested.id; +create dataset CSX(CSXType) primary key nested.id; + http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.2.update.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.2.update.aql new file mode 100644 index 0000000..a2633b1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.2.update.aql @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; + +load dataset DBLPtmp +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted; + +load dataset CSXtmp +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + +insert into dataset DBLP( + for $x in dataset DBLPtmp + return { + "nested": $x + } +); + +insert into dataset CSX( + for $x in dataset CSXtmp + return { + "nested": $x + } +); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.3.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.3.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.3.ddl.aql new file mode 100644 index 0000000..042f3ce --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.3.ddl.aql @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.4.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.4.query.aql new file mode 100644 index 0000000..53c83f4 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-4.4.2/word-jaccard.4.query.aql @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse test; +set import-private-functions 'true'; + +for $a in dataset('DBLP') +for $b in dataset('CSX') +for $c in dataset('DBLPtmp') +for $d in dataset('CSXtmp') +where word-tokens($a.nested.title) /* +indexnl */ ~= word-tokens($b.nested.title) + and word-tokens($b.nested.title) /* +indexnl */ ~= word-tokens($c.title) + and gram-tokens($c.authors, 3, false) ~= gram-tokens($d.authors, 3, false) + and $a.nested.id < $b.nested.id +order by $a.nested.id, $b.nested.id, $c.id, $d.id +return { "arec": $a.nested, "brec": $b.nested, "cred": $c, "drec": $d } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.1.ddl.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.1.ddl.aql new file mode 100644 index 0000000..163ed3d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.1.ddl.aql @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +drop dataverse fuzzyjoin if exists; + +create dataverse fuzzyjoin; + +use dataverse fuzzyjoin; + +create type DBLPNestedType as closed { + id: int64, + dblpid: string, + title: string, + authors: string, + misc: string +} + +create type DBLPType as closed { + nested: DBLPNestedType +} + +create type CSXNestedType as closed { + id: int64, + csxid: string, + title: string, + authors: string, + misc: string +} + +create type CSXType as closed { + nested: CSXNestedType +} + +create dataset DBLPtmp(DBLPNestedType) primary key id; +create dataset CSXtmp(CSXNestedType) primary key id; + +create dataset DBLP(DBLPType) primary key nested.id; +create dataset CSX(CSXType) primary key nested.id; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.2.update.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.2.update.aql new file mode 100644 index 0000000..094d124 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.2.update.aql @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use dataverse fuzzyjoin; + +load dataset DBLPtmp +using localfs +(("path"="asterix_nc1://data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted; + +load dataset CSXtmp +using localfs +(("path"="asterix_nc1://data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")); + +insert into dataset DBLP( + for $x in dataset DBLPtmp + return { + "nested": $x + } +); + +insert into dataset CSX( + for $x in dataset CSXtmp + return { + "nested": $x + } +); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.3.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.3.query.aql new file mode 100644 index 0000000..20b74df --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_6/dblp-csx-aqlplus_6.3.query.aql @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use dataverse fuzzyjoin; + +for $s in dataset('DBLP') +for $t in dataset('CSX') +where word-tokens($s.nested.title) ~= word-tokens($t.nested.title) +order by $s.nested.id, $t.nested.id +return {"srec": $s.nested.id, "trec": $t.nested.id} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.3.query.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.3.query.aql index 74f92e3..7845895 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.3.query.aql +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.3.query.aql @@ -23,7 +23,7 @@ set simthreshold '.5f'; for $dblp in dataset('DBLP') for $csx in dataset('CSX') for $dblp2 in dataset('DBLP') -where word-tokens($dblp.title) ~= word-tokens($csx.title) and word-tokens($csx.authors) ~= word-tokens($dblp2.authors) +where word-tokens($dblp.title) /* +indexnl */ ~= word-tokens($csx.title) and word-tokens($csx.authors) ~= word-tokens($dblp2.authors) order by $dblp.id, $csx.id, $dblp2.id return {'dblp': $dblp, 'csx': $csx, 'dblp2': $dblp2} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1/basic-1_1.1.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1/basic-1_1.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1/basic-1_1.1.adm new file mode 100644 index 0000000..f6c2cab --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1/basic-1_1.1.adm @@ -0,0 +1 @@ +{ "psj": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "nsj": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "nvj": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "nvr": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "tpsj": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "tnsj": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "itpsj": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "itnsj": [ { "s1": "Clear, Concise, and fun!", "s2": "Clear, Concise, and Charitable" } ], "vj": [ true, 0.6 ], "sr": true } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.adm new file mode 100644 index 0000000..b72e004 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_1/basic-1_1_1.1.adm @@ -0,0 +1,13 @@ +{ "authors": "Antti Airola Sampo Pyysalo Jari Björne Tapio Pahikkala Filip Ginter Tapio Salakoski", "hdistinct": [ -1703879002, -1473242502, -1260858361, -981055830, -887832212, -658173301, -658173300, 152866250, 1308706204, 1407504920, 1571121603, 1673795737 ], "hcount": 12, "hash": [ 1407504920, -1473242502, -981055830, 1673795737, 152866250, -1260858361, -658173301, -1703879002, -887832212, 1571121603, -658173300, 1308706204 ], "vhcount": 12, "wdistinct": [ "airola", "antti", "björne", "filip", "ginter", "jari", "pahikkala", "pyysalo", "salakoski", "sampo", "tapio" ], "wcount": 11, "word": [ "antti", "airola", "sampo", "pyysalo", "jari", "björne", "tapio", "pahikkala", "filip", "ginter", "tapio", "salakoski" ], "vwcount": 12 } +{ "authors": "Bart Baesens Stijn Viaene Tony Van Gestel Johan A. K. Suykens Guido Dedene Bart De Moor Jan Vanthienen", "hdistinct": [ -1935703338, -1911867284, -1622104926, -1358578075, -935776866, -214094487, 111100446, 178525531, 183137487, 202777881, 387687108, 476250961, 819214362, 1203632773, 1456032451, 2095219701, 2095219702, 2132187494 ], "hcount": 18, "hash": [ 2095219701, -1358578075, -1911867284, -1622104926, 2132187494, -214094487, -935776866, 387687108, 202777881, 1456032451, 111100446, 819214362, 178525531, 2095219702, 476250961, 183137487, 1203632773, -1935703338 ], "vhcount": 18, "wdistinct": [ "a", "baesens", "bart", "de", "dedene", "gestel", "guido", "jan", "johan", "k", "moor", "stijn", "suykens", "tony", "van", "vanthienen", "viaene" ], "wcount": 17, "word": [ "bart", "baesens", "stijn", "viaene", "tony", "van", "gestel", "johan", "a", "k", "suykens", "guido", "dedene", "bart", "de", "moor", "jan", "vanthienen" ], "vwcount": 18 } +{ "authors": "Cheong Hee Park Haesun Park", "hdistinct": [ 201502510, 1474696610, 1474696611, 1810069474, 1813499426 ], "hcount": 5, "hash": [ 1813499426, 1810069474, 1474696610, 201502510, 1474696611 ], "vhcount": 5, "wdistinct": [ "cheong", "haesun", "hee", "park" ], "wcount": 4, "word": [ "cheong", "hee", "park", "haesun", "park" ], "vwcount": 5 } +{ "authors": "Christopher J. C. Burges David J. Crisp", "hdistinct": [ -1980517994, -184499076, -184499075, 471822495, 885695371, 1216682123, 1230204720 ], "hcount": 7, "hash": [ 471822495, -184499076, 1216682123, -1980517994, 1230204720, -184499075, 885695371 ], "vhcount": 7, "wdistinct": [ "burges", "c", "christopher", "crisp", "david", "j" ], "wcount": 6, "word": [ "christopher", "j", "c", "burges", "david", "j", "crisp" ], "vwcount": 7 } +{ "authors": "Dominique Chanet Bjorn De Sutter Bruno De Bus Ludo Van Put Koen De Bosschere", "hdistinct": [ -1827235698, -1316812666, -727391969, -698069955, -690862623, -214094487, -204728799, 38143123, 106094358, 476250961, 476250962, 476250963, 1258550913, 1274211164 ], "hcount": 14, "hash": [ 1258550913, -698069955, 38143123, 476250961, -204728799, -1316812666, 476250962, -1827235698, 1274211164, -214094487, -690862623, -727391969, 476250963, 106094358 ], "vhcount": 14, "wdistinct": [ "bjorn", "bosschere", "bruno", "bus", "chanet", "de", "dominique", "koen", "ludo", "put", "sutter", "van" ], "wcount": 12, "word": [ "dominique", "chanet", "bjorn", "de", "sutter", "bruno", "de", "bus", "ludo", "van", "put", "koen", "de", "bosschere" ], "vwcount": 14 } +{ "authors": "Francis R. Bach Gert R. G. Lanckriet Michael I. Jordan", "hdistinct": [ -2039039025, -1050476689, -448767586, 54851252, 54851253, 419477052, 442128209, 637199601, 900395942, 1674769146 ], "hcount": 10, "hash": [ 419477052, 54851252, 1674769146, 900395942, 54851253, -1050476689, 637199601, -2039039025, 442128209, -448767586 ], "vhcount": 10, "wdistinct": [ "bach", "francis", "g", "gert", "i", "jordan", "lanckriet", "michael", "r" ], "wcount": 9, "word": [ "francis", "r", "bach", "gert", "r", "g", "lanckriet", "michael", "i", "jordan" ], "vwcount": 10 } +{ "authors": "Gavin C. Cawley Nicola L. C. Talbot", "hdistinct": [ -886454414, -534088337, 441647848, 481225853, 829405166, 1216682123, 1216682124 ], "hcount": 7, "hash": [ -534088337, 1216682123, 481225853, 441647848, 829405166, 1216682124, -886454414 ], "vhcount": 7, "wdistinct": [ "c", "cawley", "gavin", "l", "nicola", "talbot" ], "wcount": 6, "word": [ "gavin", "c", "cawley", "nicola", "l", "c", "talbot" ], "vwcount": 7 } +{ "authors": "Gavin C. Cawley Nicola L. C. Talbot Robert J. Foxall Stephen R. Dorling Danilo P. Mandic", "hdistinct": [ -1252796223, -959052990, -886454414, -534088337, -184499076, -130111359, 54851252, 411288970, 414405350, 441647848, 481225853, 829405166, 1216682123, 1216682124, 1304455685, 1784033734 ], "hcount": 16, "hash": [ -534088337, 1216682123, 481225853, 441647848, 829405166, 1216682124, -886454414, 414405350, -184499076, 411288970, -1252796223, 54851252, 1304455685, -130111359, -959052990, 1784033734 ], "vhcount": 16, "wdistinct": [ "c", "cawley", "danilo", "dorling", "foxall", "gavin", "j", "l", "mandic", "nicola", "p", "r", "robert", "stephen", "talbot" ], "wcount": 15, "word": [ "gavin", "c", "cawley", "nicola", "l", "c", "talbot", "robert", "j", "foxall", "stephen", "r", "dorling", "danilo", "p", "mandic" ], "vwcount": 16 } +{ "authors": "R. Venkatesh Babu Patrick Pérez Patrick Bouthemy", "hdistinct": [ -1978580578, -1106952520, -1106952519, 54851252, 199754598, 672789821, 1248452165 ], "hcount": 7, "hash": [ 54851252, 672789821, -1978580578, -1106952520, 199754598, -1106952519, 1248452165 ], "vhcount": 7, "wdistinct": [ "babu", "bouthemy", "patrick", "pérez", "r", "venkatesh" ], "wcount": 6, "word": [ "r", "venkatesh", "babu", "patrick", "pérez", "patrick", "bouthemy" ], "vwcount": 7 } +{ "authors": "Rafael Serrano-Gotarredona Teresa Serrano-Gotarredona Antonio Acosta-Jimenez Bernabé Linares-Barranco", "hdistinct": [ -1956851570, -1956851569, -1726594101, -1691145788, -1282838506, -564025963, -75151904, -75151903, 405246970, 487904168, 819607985, 1017487486 ], "hcount": 12, "hash": [ -564025963, -75151904, -1956851570, -1282838506, -75151903, -1956851569, 1017487486, -1726594101, -1691145788, 819607985, 487904168, 405246970 ], "vhcount": 12, "wdistinct": [ "acosta", "antonio", "barranco", "bernabé", "gotarredona", "jimenez", "linares", "rafael", "serrano", "teresa" ], "wcount": 10, "word": [ "rafael", "serrano", "gotarredona", "teresa", "serrano", "gotarredona", "antonio", "acosta", "jimenez", "bernabé", "linares", "barranco" ], "vwcount": 12 } +{ "authors": "Samuel G. Steckley Shane G. Henderson", "hdistinct": [ -1415708156, -1050476689, -1050476688, 263160025, 723460411, 1321749268 ], "hcount": 6, "hash": [ 723460411, -1050476689, 1321749268, 263160025, -1050476688, -1415708156 ], "vhcount": 6, "wdistinct": [ "g", "henderson", "samuel", "shane", "steckley" ], "wcount": 5, "word": [ "samuel", "g", "steckley", "shane", "g", "henderson" ], "vwcount": 6 } +{ "authors": "Susan Hert Michael Hoffmann Lutz Kettner Sylvain Pion Michael Seel", "hdistinct": [ -2138581461, -2081084774, -2039039025, -2039039024, -1863687409, -735414009, -716104536, -581739473, -340039108, 1858351859 ], "hcount": 10, "hash": [ -340039108, -2138581461, -2039039025, 1858351859, -735414009, -1863687409, -716104536, -2081084774, -2039039024, -581739473 ], "vhcount": 10, "wdistinct": [ "hert", "hoffmann", "kettner", "lutz", "michael", "pion", "seel", "susan", "sylvain" ], "wcount": 9, "word": [ "susan", "hert", "michael", "hoffmann", "lutz", "kettner", "sylvain", "pion", "michael", "seel" ], "vwcount": 10 } +{ "authors": "Tony Van Gestel Bart Baesens Johan A. K. Suykens Dirk Van den Poel Dirk-Emma Baestaens Marleen Willekens", "hdistinct": [ -1424854606, -1358578075, -1067178786, -935776866, -389896470, -214094487, -214094486, 111100446, 202777881, 367084408, 387687108, 395536654, 395536655, 699609214, 1181197775, 1456032451, 2095219701, 2132187494 ], "hcount": 18, "hash": [ 2132187494, -214094487, -935776866, 2095219701, -1358578075, 387687108, 202777881, 1456032451, 111100446, 395536654, -214094486, 1181197775, 367084408, 395536655, 699609214, -1424854606, -1067178786, -389896470 ], "vhcount": 18, "wdistinct": [ "a", "baesens", "baestaens", "bart", "den", "dirk", "emma", "gestel", "johan", "k", "marleen", "poel", "suykens", "tony", "van", "willekens" ], "wcount": 16, "word": [ "tony", "van", "gestel", "bart", "baesens", "johan", "a", "k", "suykens", "dirk", "van", "den", "poel", "dirk", "emma", "baestaens", "marleen", "willekens" ], "vwcount": 18 } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.adm new file mode 100644 index 0000000..714819b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.10.adm @@ -0,0 +1 @@ +[ true, 0.8 ] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.adm new file mode 100644 index 0000000..aec258d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.3.adm @@ -0,0 +1 @@ +0.8 http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.adm new file mode 100644 index 0000000..aec258d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.4.adm @@ -0,0 +1 @@ +0.8 http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.adm new file mode 100644 index 0000000..aec258d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.5.adm @@ -0,0 +1 @@ +0.8 http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.adm new file mode 100644 index 0000000..aec258d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.6.adm @@ -0,0 +1 @@ +0.8 http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.adm new file mode 100644 index 0000000..682547b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.7.adm @@ -0,0 +1 @@ +0.33333334 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.adm new file mode 100644 index 0000000..703bf89 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.8.adm @@ -0,0 +1 @@ +0.35714287 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.adm new file mode 100644 index 0000000..aec258d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_2/basic-1_1_2.9.adm @@ -0,0 +1 @@ +0.8 http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.3.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.3.adm new file mode 100644 index 0000000..1d71ef9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.3.adm @@ -0,0 +1 @@ +0.3 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.4.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.4.adm new file mode 100644 index 0000000..3875782 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_1_3/basic-1_1_2.4.adm @@ -0,0 +1 @@ +[ 0.3, 0.0, 0.0, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3 ] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.adm new file mode 100644 index 0000000..fa92713 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.3.adm @@ -0,0 +1 @@ +[ [ 4, 5 ], 0.33333334, 0.4, 0.33333334, 0.5, 0.5, [ [ [ 1.0, 0.5, 0.0, 0.0, 0.5, 0.33333334 ], [ 1.0, 0.5, 0.0, 0.0, 0.5, 0.33333334 ], [ 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] ] ], [ [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0 ] ], [ [ [ true, 0.5 ], [ true, 0.6363636 ], [ true, 0.6363636 ], [ true, 0.8 ], [ true, 0.5 ], [ true, 0.6363636 ], [ true, 0.7 ], [ true, 0.8 ], [ true, 0.9 ] ] ], [ ], [ ], [ [ null, null, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], [ null, null, 1, 2, 3, 4, 5, 6, 7, 8 ], [ null, null, 1, 2, 3, 4, 5, 6, 7 ], [ null, null, 1, 2, 3, 4, 5, 6 ], [ null, null, 1, 2, 3, 4, 5 ], [ null, null, 1, 2, 3 ], [ null, null, 1, 2 ], [ null, null, 1 ], [ null, null ] ], [ 1, 2, 3, 4, 5, 6 ], [ [ ], [ null, null, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ] ] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.adm new file mode 100644 index 0000000..ba0263b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.4.adm @@ -0,0 +1,4 @@ +[ null, 5, 6 ] +[ null, 3, 4 ] +[ 5, 6 ] +[ 3, 4 ] http://git-wip-us.apache.org/repos/asf/asterixdb/blob/d906bd89/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/basic-1_2_1/basic-1_2_1.5.adm new file mode 100644 index 0000000..e69de29
