Dmitry Lychagin has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1987
Change subject: [ASTERIXDB-2078][SQL] DISTINCT modifier for aggregate functions ...................................................................... [ASTERIXDB-2078][SQL] DISTINCT modifier for aggregate functions - user model changes: yes - storage format changes: no - interface changes: no Details: Adds support for DISTINCT modifier in aggregate functions: AGG(DISTINCT expr), ARRAY_AGG(DISTINCT expr), COLL_AGG(DISTINCT expr) where AGG = COUNT | SUM | AVG | MIN | MAX Change-Id: I52772349cbcbfc68a3a1ff261d610871ca38748d --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/avg_distinct/avg_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/count_distinct/count_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/max_distinct/max_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/min_distinct/min_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/sum_distinct/sum_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/avg_distinct/avg_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/count_distinct/count_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/max_distinct/max_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/min_distinct/min_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/sum_distinct/sum_distinct.1.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.1.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.2.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.3.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.4.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.5.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.6.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.7.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/avg_distinct/avg_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/count_distinct/count_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/max_distinct/max_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/min_distinct/min_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/sum_distinct/sum_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/avg_distinct/avg_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/count_distinct/count_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/max_distinct/max_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/min_distinct/min_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/sum_distinct/sum_distinct.1.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.3.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.4.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.5.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.6.adm A asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.7.adm M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java A asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppDistinctAggregationSugarVisitor.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupBySugarVisitor.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupByVisitor.java M asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java M asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AbstractFunctionDescriptor.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java M hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/expressions/AbstractFunctionCallExpression.java 42 files changed, 924 insertions(+), 117 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/87/1987/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/avg_distinct/avg_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/avg_distinct/avg_distinct.1.query.sqlpp new file mode 100644 index 0000000..cae2b8f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/avg_distinct/avg_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': to_bigint(array_avg(distinct [10,20,30,10,20,30,10])), + 't2': to_bigint(array_avg(distinct [null,missing,null,missing])), + 't3': to_bigint(array_avg(distinct [40,null,missing,50,40,null,missing,50,40])), + 't4': to_bigint(array_avg(distinct [])) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/count_distinct/count_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/count_distinct/count_distinct.1.query.sqlpp new file mode 100644 index 0000000..80dc741 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/count_distinct/count_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': array_count(distinct [10,20,30,10,20,30,10]), + 't2': array_count(distinct [null,missing,null,missing]), + 't3': array_count(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': array_count(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/max_distinct/max_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/max_distinct/max_distinct.1.query.sqlpp new file mode 100644 index 0000000..0f381f7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/max_distinct/max_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': array_max(distinct [10,20,30,10,20,30,10]), + 't2': array_max(distinct [null,missing,null,missing]), + 't3': array_max(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': array_max(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/min_distinct/min_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/min_distinct/min_distinct.1.query.sqlpp new file mode 100644 index 0000000..8a25c31 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/min_distinct/min_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': array_min(distinct [10,20,30,10,20,30,10]), + 't2': array_min(distinct [null,missing,null,missing]), + 't3': array_min(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': array_min(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/sum_distinct/sum_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/sum_distinct/sum_distinct.1.query.sqlpp new file mode 100644 index 0000000..53aacdf --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate-sql/sum_distinct/sum_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': array_sum(distinct [10,20,30,10,20,30,10]), + 't2': array_sum(distinct [null,missing,null,missing]), + 't3': array_sum(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': array_sum(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/avg_distinct/avg_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/avg_distinct/avg_distinct.1.query.sqlpp new file mode 100644 index 0000000..c9a79fb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/avg_distinct/avg_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': to_bigint(coll_avg(distinct [10,20,30,10,20,30,10])), + 't2': to_bigint(coll_avg(distinct [null,missing,null,missing])), + 't3': to_bigint(coll_avg(distinct [40,null,missing,50,40,null,missing,50,40])), + 't4': to_bigint(coll_avg(distinct [])) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/count_distinct/count_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/count_distinct/count_distinct.1.query.sqlpp new file mode 100644 index 0000000..a7d9b90 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/count_distinct/count_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': coll_count(distinct [10,20,30,10,20,30,10]), + 't2': coll_count(distinct [null,missing,null,missing]), + 't3': coll_count(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': coll_count(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/max_distinct/max_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/max_distinct/max_distinct.1.query.sqlpp new file mode 100644 index 0000000..70f273f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/max_distinct/max_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': coll_max(distinct [10,20,30,10,20,30,10]), + 't2': coll_max(distinct [null,missing,null,missing]), + 't3': coll_max(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': coll_max(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/min_distinct/min_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/min_distinct/min_distinct.1.query.sqlpp new file mode 100644 index 0000000..77684a0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/min_distinct/min_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': coll_min(distinct [10,20,30,10,20,30,10]), + 't2': coll_min(distinct [null,missing,null,missing]), + 't3': coll_min(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': coll_min(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/sum_distinct/sum_distinct.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/sum_distinct/sum_distinct.1.query.sqlpp new file mode 100644 index 0000000..6991a74 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/aggregate/sum_distinct/sum_distinct.1.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + { + 't1': coll_sum(distinct [10,20,30,10,20,30,10]), + 't2': coll_sum(distinct [null,missing,null,missing]), + 't3': coll_sum(distinct [40,null,missing,50,40,null,missing,50,40]), + 't4': coll_sum(distinct []) + } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.1.ddl.sqlpp new file mode 100644 index 0000000..0330592 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.1.ddl.sqlpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE gby IF EXISTS; +CREATE DATAVERSE gby; + +USE gby; + +CREATE TYPE EmpType AS { + name : string +}; + +CREATE DATASET Employee(EmpType) PRIMARY KEY name; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.2.update.sqlpp new file mode 100644 index 0000000..3fe9478 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.2.update.sqlpp @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +INSERT INTO Employee ( +[ + { 'name': 'a', 'deptno': 1, 'salary': 2000 }, + { 'name': 'b', 'deptno': 1, 'salary': 3000 }, + { 'name': 'c', 'deptno': 1, 'salary': 2000 }, + { 'name': 'd', 'deptno': 1, 'salary': 3000 }, + { 'name': 'e', 'deptno': 1, 'salary': 2000 }, + + { 'name': 'f', 'deptno': 2, 'salary': 4000 }, + { 'name': 'g', 'deptno': 2, 'salary': 5000 }, + { 'name': 'h', 'deptno': 2, 'salary': 4000 }, + { 'name': 'i', 'deptno': 2, 'salary': 5000 }, + { 'name': 'j', 'deptno': 2, 'salary': 4000 }, + + { 'name': 'k', 'deptno': 3, 'salary': null }, + { 'name': 'l', 'deptno': 3, 'salary': 6000 }, + { 'name': 'm', 'deptno': 3 }, + { 'name': 'n', 'deptno': 3, 'salary': null }, + { 'name': 'o', 'deptno': 3, 'salary': 6000 }, + { 'name': 'p', 'deptno': 3 }, + + { 'name': 'q', 'deptno': 4 } +] +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.3.query.sqlpp new file mode 100644 index 0000000..4c64f14 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.3.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +FROM gby.Employee e +GROUP BY e.deptno AS deptno +SELECT VALUE [ deptno, COUNT(DISTINCT e.salary) ] +ORDER BY deptno; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.4.query.sqlpp new file mode 100644 index 0000000..524a88b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.4.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +FROM gby.Employee e +GROUP BY e.deptno AS deptno +SELECT VALUE [ deptno, SUM(DISTINCT e.salary) ] +ORDER BY deptno; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.5.query.sqlpp new file mode 100644 index 0000000..d0a2aaf --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.5.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +FROM gby.Employee e +GROUP BY e.deptno AS deptno +SELECT VALUE [ deptno, TO_BIGINT(AVG(DISTINCT e.salary)) ] +ORDER BY deptno; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.6.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.6.query.sqlpp new file mode 100644 index 0000000..867ca3a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.6.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +FROM gby.Employee e +GROUP BY e.deptno AS deptno +SELECT VALUE [ deptno, MAX(DISTINCT e.salary) ] +ORDER BY deptno; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.7.query.sqlpp new file mode 100644 index 0000000..6ab01d7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/sugar-06-distinct/sugar-06-distinct.7.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +FROM gby.Employee e +GROUP BY e.deptno AS deptno +SELECT VALUE [ deptno, MIN(DISTINCT e.salary) ] +ORDER BY deptno; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/avg_distinct/avg_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/avg_distinct/avg_distinct.1.adm new file mode 100644 index 0000000..a7b36e6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/avg_distinct/avg_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 20, "t2": null, "t3": 45, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/count_distinct/count_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/count_distinct/count_distinct.1.adm new file mode 100644 index 0000000..445924e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/count_distinct/count_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 3, "t2": 0, "t3": 2, "t4": 0 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/max_distinct/max_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/max_distinct/max_distinct.1.adm new file mode 100644 index 0000000..c974ed2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/max_distinct/max_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 30, "t2": null, "t3": 50, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/min_distinct/min_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/min_distinct/min_distinct.1.adm new file mode 100644 index 0000000..aa09030 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/min_distinct/min_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 10, "t2": null, "t3": 40, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/sum_distinct/sum_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/sum_distinct/sum_distinct.1.adm new file mode 100644 index 0000000..afbe3eb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate-sql/sum_distinct/sum_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 60, "t2": null, "t3": 90, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/avg_distinct/avg_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/avg_distinct/avg_distinct.1.adm new file mode 100644 index 0000000..64d2407 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/avg_distinct/avg_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 20, "t2": null, "t3": null, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/count_distinct/count_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/count_distinct/count_distinct.1.adm new file mode 100644 index 0000000..d30ac1d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/count_distinct/count_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 3, "t2": 2, "t3": 4, "t4": 0 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/max_distinct/max_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/max_distinct/max_distinct.1.adm new file mode 100644 index 0000000..a4ffdd2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/max_distinct/max_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 30, "t2": null, "t3": null, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/min_distinct/min_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/min_distinct/min_distinct.1.adm new file mode 100644 index 0000000..00ae7b2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/min_distinct/min_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 10, "t2": null, "t3": null, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/sum_distinct/sum_distinct.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/sum_distinct/sum_distinct.1.adm new file mode 100644 index 0000000..8366796 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/aggregate/sum_distinct/sum_distinct.1.adm @@ -0,0 +1 @@ +{ "t1": 60, "t2": null, "t3": null, "t4": null } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.3.adm new file mode 100644 index 0000000..bb7e8f4 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.3.adm @@ -0,0 +1,4 @@ +[ 1, 2 ] +[ 2, 2 ] +[ 3, 1 ] +[ 4, 0 ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.4.adm new file mode 100644 index 0000000..b42c458 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.4.adm @@ -0,0 +1,4 @@ +[ 1, 5000 ] +[ 2, 9000 ] +[ 3, 6000 ] +[ 4, null ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.5.adm new file mode 100644 index 0000000..0385e23 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.5.adm @@ -0,0 +1,4 @@ +[ 1, 2500 ] +[ 2, 4500 ] +[ 3, 6000 ] +[ 4, null ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.6.adm new file mode 100644 index 0000000..0e1e38c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.6.adm @@ -0,0 +1,4 @@ +[ 1, 3000 ] +[ 2, 5000 ] +[ 3, 6000 ] +[ 4, null ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.7.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.7.adm new file mode 100644 index 0000000..1549912 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/sugar-06-distinct/sugar-06-distinct.7.adm @@ -0,0 +1,4 @@ +[ 1, 2000 ] +[ 2, 4000 ] +[ 3, 6000 ] +[ 4, null ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 16d4cb3..920f0f0 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -295,6 +295,11 @@ </compilation-unit> </test-case> <test-case FilePath="aggregate"> + <compilation-unit name="avg_distinct"> + <output-dir compare="Text">avg_distinct</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate"> <compilation-unit name="count_01"> <output-dir compare="Text">count_01</output-dir> </compilation-unit> @@ -312,6 +317,11 @@ <test-case FilePath="aggregate"> <compilation-unit name="count_null"> <output-dir compare="Text">count_null</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate"> + <compilation-unit name="count_distinct"> + <output-dir compare="Text">count_distinct</output-dir> </compilation-unit> </test-case> <!-- @@ -406,6 +416,11 @@ </compilation-unit> </test-case> <test-case FilePath="aggregate"> + <compilation-unit name="max_distinct"> + <output-dir compare="Text">max_distinct</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate"> <compilation-unit name="min_empty_01"> <output-dir compare="Text">min_empty_01</output-dir> </compilation-unit> @@ -413,6 +428,11 @@ <test-case FilePath="aggregate"> <compilation-unit name="min_empty_02"> <output-dir compare="Text">min_empty_02</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate"> + <compilation-unit name="min_distinct"> + <output-dir compare="Text">min_distinct</output-dir> </compilation-unit> </test-case> <test-case FilePath="aggregate"> @@ -568,6 +588,11 @@ <test-case FilePath="aggregate"> <compilation-unit name="sum_numeric_null"> <output-dir compare="Text">sum_numeric_null</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate"> + <compilation-unit name="sum_distinct"> + <output-dir compare="Text">sum_distinct</output-dir> </compilation-unit> </test-case> </test-group> @@ -731,6 +756,11 @@ </compilation-unit> </test-case> <test-case FilePath="aggregate-sql"> + <compilation-unit name="avg_distinct"> + <output-dir compare="Text">avg_distinct</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate-sql"> <compilation-unit name="count_01"> <output-dir compare="Text">count_01</output-dir> </compilation-unit> @@ -751,6 +781,11 @@ </compilation-unit> </test-case> <test-case FilePath="aggregate-sql"> + <compilation-unit name="count_distinct"> + <output-dir compare="Text">count_distinct</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate-sql"> <compilation-unit name="max_empty_01"> <output-dir compare="Text">max_empty_01</output-dir> </compilation-unit> @@ -761,6 +796,11 @@ </compilation-unit> </test-case> <test-case FilePath="aggregate-sql"> + <compilation-unit name="max_distinct"> + <output-dir compare="Text">max_distinct</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate-sql"> <compilation-unit name="min_empty_01"> <output-dir compare="Text">min_empty_01</output-dir> </compilation-unit> @@ -768,6 +808,11 @@ <test-case FilePath="aggregate-sql"> <compilation-unit name="min_empty_02"> <output-dir compare="Text">min_empty_02</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate-sql"> + <compilation-unit name="min_distinct"> + <output-dir compare="Text">min_distinct</output-dir> </compilation-unit> </test-case> <test-case FilePath="aggregate-sql"> @@ -923,6 +968,11 @@ <test-case FilePath="aggregate-sql"> <compilation-unit name="sum_numeric_null"> <output-dir compare="Text">sum_numeric_null</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="aggregate-sql"> + <compilation-unit name="sum_distinct"> + <output-dir compare="Text">sum_distinct</output-dir> </compilation-unit> </test-case> </test-group> @@ -2832,6 +2882,11 @@ </compilation-unit> </test-case> <test-case FilePath="group-by"> + <compilation-unit name="sugar-06-distinct"> + <output-dir compare="Text">sugar-06-distinct</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="group-by"> <compilation-unit name="null"> <output-dir compare="Text">null</output-dir> </compilation-unit> diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java index fb5a6fb..d3f6736 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java @@ -56,6 +56,7 @@ import org.apache.asterix.lang.sqlpp.rewrites.visitor.OperatorExpressionVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SetOperationVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppBuiltinFunctionRewriteVisitor; +import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppDistinctAggregationSugarVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppGlobalAggregationSugarVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppGroupByVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppInlineUdfsVisitor; @@ -135,6 +136,9 @@ // names could be case sensitive. rewriteFunctionNames(); + // Rewrites distinct aggregates into regular aggregates + rewriteDistinctAggregations(); + // Resets the variable counter to the previous marked value. // Therefore, the variable ids in the final query plans will not be perturbed // by the additon or removal of intermediate AST rewrites. @@ -152,6 +156,11 @@ topExpr.accept(globalAggregationVisitor, null); } + protected void rewriteDistinctAggregations() throws CompilationException { + SqlppDistinctAggregationSugarVisitor distinctAggregationVisitor = new SqlppDistinctAggregationSugarVisitor(context); + topExpr.accept(distinctAggregationVisitor, null); + } + protected void rewriteListInputFunctions() throws CompilationException { SqlppListInputFunctionRewriteVisitor listInputFunctionVisitor = new SqlppListInputFunctionRewriteVisitor(); topExpr.accept(listInputFunctionVisitor, null); diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppDistinctAggregationSugarVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppDistinctAggregationSugarVisitor.java new file mode 100644 index 0000000..e9b8334 --- /dev/null +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppDistinctAggregationSugarVisitor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.lang.sqlpp.rewrites.visitor; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.asterix.common.exceptions.CompilationException; +import org.apache.asterix.common.functions.FunctionSignature; +import org.apache.asterix.lang.common.base.Expression; +import org.apache.asterix.lang.common.base.ILangExpression; +import org.apache.asterix.lang.common.expression.CallExpr; +import org.apache.asterix.lang.common.expression.VariableExpr; +import org.apache.asterix.lang.common.rewrites.LangRewritingContext; +import org.apache.asterix.lang.common.util.FunctionUtil; +import org.apache.asterix.lang.sqlpp.clause.FromClause; +import org.apache.asterix.lang.sqlpp.clause.FromTerm; +import org.apache.asterix.lang.sqlpp.clause.SelectBlock; +import org.apache.asterix.lang.sqlpp.clause.SelectClause; +import org.apache.asterix.lang.sqlpp.clause.SelectElement; +import org.apache.asterix.lang.sqlpp.clause.SelectSetOperation; +import org.apache.asterix.lang.sqlpp.expression.SelectExpression; +import org.apache.asterix.lang.sqlpp.struct.SetOperationInput; +import org.apache.asterix.lang.sqlpp.visitor.base.AbstractSqlppExpressionScopingVisitor; +import org.apache.asterix.om.functions.BuiltinFunctions; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.core.algebra.functions.IFunctionInfo; + +/** + * An AST pre-processor to rewrite distinct aggregates into regular aggregates as follows: <br/> + * {@code agg-distinct(expr) -> agg((FROM expr AS i SELECT DISTINCT VALUE i))} <br/> + * where {@code agg-distinct} is a distinct aggregate function, {@code agg} - a regular aggregate function + */ +public class SqlppDistinctAggregationSugarVisitor extends AbstractSqlppExpressionScopingVisitor { + public SqlppDistinctAggregationSugarVisitor(LangRewritingContext context) { + super(context); + } + + @Override + public Expression visit(CallExpr callExpr, ILangExpression arg) throws CompilationException { + FunctionSignature signature = callExpr.getFunctionSignature(); + IFunctionInfo finfo = FunctionUtil.getFunctionInfo(signature); + FunctionIdentifier aggFn = + finfo != null ? BuiltinFunctions.getAggregateFunction(finfo.getFunctionIdentifier()) : null; + FunctionIdentifier newAggFn = aggFn != null ? BuiltinFunctions.getAggregateFunctionForDistinct(aggFn) : null; + if (newAggFn == null) { + return super.visit(callExpr, arg); + } + List<Expression> exprList = callExpr.getExprList(); + List<Expression> newExprList = new ArrayList<>(exprList.size()); + for (Expression expr : exprList) { + Expression newExpr = rewriteArgument(expr); + newExprList.add(newExpr.accept(this, arg)); + } + callExpr.setFunctionSignature( + new FunctionSignature(newAggFn.getNamespace(), newAggFn.getName(), newAggFn.getArity())); + callExpr.setExprList(newExprList); + return callExpr; + } + + /** + * rewrites {@code expr -> FROM expr AS i SELECT DISTINCT VALUE i} + */ + private Expression rewriteArgument(Expression argExpr) throws CompilationException { + // From clause + VariableExpr fromBindingVar = new VariableExpr(context.newVariable()); + FromTerm fromTerm = new FromTerm(argExpr, fromBindingVar, null, null); + FromClause fromClause = new FromClause(Collections.singletonList(fromTerm)); + + // Select clause. + SelectClause selectClause = new SelectClause(new SelectElement(fromBindingVar), null, true); + + // Construct the select expression. + SelectBlock selectBlock = new SelectBlock(selectClause, fromClause, null, null, null, null, null); + SelectSetOperation selectSetOperation = new SelectSetOperation(new SetOperationInput(selectBlock, null), null); + return new SelectExpression(null, selectSetOperation, null, null, true); + } +} diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupBySugarVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupBySugarVisitor.java index 6346994..b7604ab 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupBySugarVisitor.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupBySugarVisitor.java @@ -72,10 +72,10 @@ // GROUP AS eis(e AS e, i AS i, s AS s) // SELECT ELEMENT { // 'deptId': deptId, -// 'star_cost': coll_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ) +// 'star_cost': array_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ) // }; // -// where SUM(e.salary + i.bonus) is turned into coll_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ). +// where SUM(e.salary + i.bonus) is turned into array_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ). public class SqlppGroupBySugarVisitor extends AbstractSqlppExpressionScopingVisitor { @@ -102,7 +102,7 @@ } if (rewritten) { // Rewrites the SQL-92 function name to core functions, - // e.g., SUM --> coll_sum + // e.g., SUM --> array_sum callExpr.setFunctionSignature(FunctionMapUtil.sql92ToCoreAggregateFunction(signature)); } callExpr.setExprList(newExprList); diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupByVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupByVisitor.java index 7f2e6c9..dfe371e 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupByVisitor.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppGroupByVisitor.java @@ -72,7 +72,7 @@ // GROUP AS eis(e AS e, i AS i, s AS s) // SELECT ELEMENT { // 'deptId': deptId, -// 'star_cost': coll_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ) +// 'star_cost': array_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ) // }; /** * The transformation include three things: @@ -81,7 +81,7 @@ * expression is not a subquery; * 3. Turn a SQL-92 aggregate function into a SQL++ core aggregate function when performing 2, e.g., * SUM(e.salary + i.bonus) becomes - * coll_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ). + * array_sum( (FROM eis AS p SELECT ELEMENT p.e.salary + p.i.bonus) ). */ public class SqlppGroupByVisitor extends AbstractSqlppExpressionScopingVisitor { diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java index 8f04980..8a8465d 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/FunctionMapUtil.java @@ -104,7 +104,7 @@ if (!isSql92AggregateFunction(fs)) { return fs; } - return new FunctionSignature(fs.getNamespace(), CORE_SQL_AGGREGATE_PREFIX + fs.getName(), + return new FunctionSignature(FunctionConstants.ASTERIX_NS, CORE_SQL_AGGREGATE_PREFIX + fs.getName(), fs.getArity()); } @@ -161,7 +161,7 @@ boolean coreAgg = name.startsWith(CORE_AGGREGATE_PREFIX); String lowerCaseName = coreAgg ? name.substring(CORE_AGGREGATE_PREFIX.length()) : (INTERNAL_SQL_AGGREGATE_PREFIX + name.substring(CORE_SQL_AGGREGATE_PREFIX.length())); - return new FunctionSignature(fs.getNamespace(), lowerCaseName, fs.getArity()); + return new FunctionSignature(FunctionConstants.ASTERIX_NS, lowerCaseName, fs.getArity()); } } diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj index b76adb8..68c1cba 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj +++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj @@ -2484,13 +2484,16 @@ FunctionName funcName = null; String hint = null; boolean star = false; + boolean distinct = false; } { funcName = FunctionName() { hint = funcName.hint; } - <LEFTPAREN> ( ( tmp = Expression() | <MUL> { star = true; } ) + <LEFTPAREN> ( + ( <DISTINCT> { distinct = true; } )? + ( tmp = Expression() | <MUL> { star = true; } ) { if(star){ if(!funcName.function.toLowerCase().equals("count")){ @@ -2509,8 +2512,12 @@ } )*)? <RIGHTPAREN> { + String name = funcName.function; + if (distinct) { + name += "-distinct"; + } // TODO use funcName.library - String fqFunctionName = funcName.library == null ? funcName.function : funcName.library + "#" + funcName.function; + String fqFunctionName = funcName.library == null ? name : funcName.library + "#" + name; FunctionSignature signature = lookupFunctionSignature(funcName.dataverse, fqFunctionName, arity); if (signature == null) { diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AbstractFunctionDescriptor.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AbstractFunctionDescriptor.java index c070719..35440e4 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AbstractFunctionDescriptor.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AbstractFunctionDescriptor.java @@ -40,31 +40,31 @@ @Override public IScalarEvaluatorFactory createEvaluatorFactory(IScalarEvaluatorFactory[] args) throws AlgebricksException { - throw new NotImplementedException("Not Implemented"); + throw new NotImplementedException("Not Implemented: " + getIdentifier()); } @Override public IRunningAggregateEvaluatorFactory createRunningAggregateEvaluatorFactory(IScalarEvaluatorFactory[] args) throws AlgebricksException { - throw new NotImplementedException("Not Implemented"); + throw new NotImplementedException("Not Implemented: " + getIdentifier()); } @Override public ISerializedAggregateEvaluatorFactory createSerializableAggregateEvaluatorFactory( IScalarEvaluatorFactory[] args) throws AlgebricksException { - throw new NotImplementedException("Not Implemented"); + throw new NotImplementedException("Not Implemented: " + getIdentifier()); } @Override public IUnnestingEvaluatorFactory createUnnestingEvaluatorFactory(IScalarEvaluatorFactory[] args) throws AlgebricksException { - throw new NotImplementedException("Not Implemented"); + throw new NotImplementedException("Not Implemented: " + getIdentifier()); } @Override public IAggregateEvaluatorFactory createAggregateEvaluatorFactory(IScalarEvaluatorFactory[] args) throws AlgebricksException { - throw new NotImplementedException("Not Implemented"); + throw new NotImplementedException("Not Implemented: " + getIdentifier()); } } diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java index 695483b..863847b 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java @@ -143,6 +143,9 @@ private static final Map<IFunctionInfo, IFunctionInfo> aggregateToSerializableAggregate = new HashMap<>(); private static final Map<IFunctionInfo, Boolean> builtinUnnestingFunctions = new HashMap<>(); private static final Map<IFunctionInfo, IFunctionInfo> scalarToAggregateFunctionMap = new HashMap<>(); + private static final Map<IFunctionInfo, IFunctionInfo> distinctToRegularScalarAggregateFunctionMap = + new HashMap<>(); + private static final Map<IFunctionInfo, SpatialFilterKind> spatialFilterFunctions = new HashMap<>(); public static final FunctionIdentifier TYPE_OF = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "type-of", 1); @@ -398,6 +401,29 @@ public static final FunctionIdentifier SERIAL_INTERMEDIATE_AVG = new FunctionIdentifier( FunctionConstants.ASTERIX_NS, "intermediate-avg-serial", 1); + // distinct aggregate functions + + public static final FunctionIdentifier COUNT_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-count-distinct", 1); + public static final FunctionIdentifier SCALAR_COUNT_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "count-distinct", 1); + public static final FunctionIdentifier SUM_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-sum-distinct", 1); + public static final FunctionIdentifier SCALAR_SUM_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "sum-distinct", 1); + public static final FunctionIdentifier AVG_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-avg-distinct", 1); + public static final FunctionIdentifier SCALAR_AVG_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "avg-distinct", 1); + public static final FunctionIdentifier MAX_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-max-distinct", 1); + public static final FunctionIdentifier SCALAR_MAX_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "max-distinct", 1); + public static final FunctionIdentifier MIN_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-min-distinct", 1); + public static final FunctionIdentifier SCALAR_MIN_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "min-distinct", 1); + // sql aggregate functions public static final FunctionIdentifier SQL_AVG = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "agg-sql-avg", 1); @@ -452,6 +478,28 @@ FunctionConstants.ASTERIX_NS, "intermediate-sql-avg-serial", 1); public static final FunctionIdentifier SERIAL_LOCAL_SQL_AVG = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "local-sql-avg-serial", 1); + + // distinct sql aggregate functions + public static final FunctionIdentifier SQL_COUNT_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-sql-count-distinct", 1); + public static final FunctionIdentifier SCALAR_SQL_COUNT_DISTINCT = new FunctionIdentifier( + FunctionConstants.ASTERIX_NS, "sql-count-distinct", 1); + public static final FunctionIdentifier SQL_SUM_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-sql-sum-distinct", 1); + public static final FunctionIdentifier SCALAR_SQL_SUM_DISTINCT = new FunctionIdentifier( + FunctionConstants.ASTERIX_NS, "sql-sum-distinct", 1); + public static final FunctionIdentifier SQL_AVG_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-sql-avg-distinct", 1); + public static final FunctionIdentifier SCALAR_SQL_AVG_DISTINCT = new FunctionIdentifier( + FunctionConstants.ASTERIX_NS, "sql-avg-distinct", 1); + public static final FunctionIdentifier SQL_MAX_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-sql-max-distinct", 1); + public static final FunctionIdentifier SCALAR_SQL_MAX_DISTINCT = new FunctionIdentifier( + FunctionConstants.ASTERIX_NS, "sql-max-distinct", 1); + public static final FunctionIdentifier SQL_MIN_DISTINCT = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, + "agg-sql-min-distinct", 1); + public static final FunctionIdentifier SCALAR_SQL_MIN_DISTINCT = new FunctionIdentifier( + FunctionConstants.ASTERIX_NS, "sql-min-distinct", 1); public static final FunctionIdentifier SCAN_COLLECTION = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "scan-collection", 1); @@ -899,9 +947,9 @@ addPrivateFunction(GRAM_TOKENS, OrderedListOfAStringTypeComputer.INSTANCE, true); addPrivateFunction(HASHED_GRAM_TOKENS, OrderedListOfAInt32TypeComputer.INSTANCE, true); addPrivateFunction(HASHED_WORD_TOKENS, OrderedListOfAInt32TypeComputer.INSTANCE, true); - addFunction(IF_MISSING_OR_NULL, IfMissingOrNullTypeComputer.INSTANCE, true); - addFunction(IF_MISSING, IfMissingTypeComputer.INSTANCE, true); - addFunction(IF_NULL, IfNullTypeComputer.INSTANCE, true); + addFunction(IF_MISSING_OR_NULL, IfMissingOrNullTypeComputer.INSTANCE, true); + addFunction(IF_MISSING, IfMissingTypeComputer.INSTANCE, true); + addFunction(IF_NULL, IfNullTypeComputer.INSTANCE, true); addPrivateFunction(INDEX_SEARCH, AnyTypeComputer.INSTANCE, true); addFunction(INT8_CONSTRUCTOR, AInt8TypeComputer.INSTANCE, true); addFunction(INT16_CONSTRUCTOR, AInt16TypeComputer.INSTANCE, true); @@ -1055,6 +1103,33 @@ addPrivateFunction(SERIAL_INTERMEDIATE_AVG, LocalAvgTypeComputer.INSTANCE, true); addPrivateFunction(SERIAL_SUM, NumericAggTypeComputer.INSTANCE, true); addPrivateFunction(SERIAL_LOCAL_SUM, NumericAggTypeComputer.INSTANCE, true); + + // Distinct aggregate functions + + addFunction(COUNT_DISTINCT, AInt64TypeComputer.INSTANCE, true); + addFunction(SCALAR_COUNT_DISTINCT, AInt64TypeComputer.INSTANCE, true); + addFunction(SQL_COUNT_DISTINCT, AInt64TypeComputer.INSTANCE, true); + addFunction(SCALAR_SQL_COUNT_DISTINCT, AInt64TypeComputer.INSTANCE, true); + + addFunction(SUM_DISTINCT, NumericAggTypeComputer.INSTANCE, true); + addFunction(SCALAR_SUM_DISTINCT, ScalarVersionOfAggregateResultType.INSTANCE, true); + addFunction(SQL_SUM_DISTINCT, NumericAggTypeComputer.INSTANCE, true); + addFunction(SCALAR_SQL_SUM_DISTINCT, ScalarVersionOfAggregateResultType.INSTANCE, true); + + addFunction(AVG_DISTINCT, NullableDoubleTypeComputer.INSTANCE, true); + addFunction(SCALAR_AVG_DISTINCT, NullableDoubleTypeComputer.INSTANCE, true); + addFunction(SQL_AVG_DISTINCT, NullableDoubleTypeComputer.INSTANCE, true); + addFunction(SCALAR_SQL_AVG_DISTINCT, NullableDoubleTypeComputer.INSTANCE, true); + + addFunction(MAX_DISTINCT, MinMaxAggTypeComputer.INSTANCE, true); + addFunction(SCALAR_MAX_DISTINCT, ScalarVersionOfAggregateResultType.INSTANCE, true); + addFunction(SQL_MAX_DISTINCT, MinMaxAggTypeComputer.INSTANCE, true); + addFunction(SCALAR_SQL_MAX_DISTINCT, ScalarVersionOfAggregateResultType.INSTANCE, true); + + addFunction(MIN_DISTINCT, MinMaxAggTypeComputer.INSTANCE, true); + addFunction(SCALAR_MIN_DISTINCT, ScalarVersionOfAggregateResultType.INSTANCE, true); + addFunction(SQL_MIN_DISTINCT, MinMaxAggTypeComputer.INSTANCE, true); + addFunction(SCALAR_SQL_MIN_DISTINCT, ScalarVersionOfAggregateResultType.INSTANCE, true); // Similarity functions addFunction(EDIT_DISTANCE_CONTAINS, OrderedListOfAnyTypeComputer.INSTANCE, true); @@ -1212,28 +1287,10 @@ } static { - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_AVG), getAsterixFunctionInfo(AVG)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_COUNT), getAsterixFunctionInfo(COUNT)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_GLOBAL_AVG), getAsterixFunctionInfo(GLOBAL_AVG)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_LOCAL_AVG), getAsterixFunctionInfo(LOCAL_AVG)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_MAX), getAsterixFunctionInfo(MAX)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_MIN), getAsterixFunctionInfo(MIN)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_SUM), getAsterixFunctionInfo(SUM)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_FIRST_ELEMENT), - getAsterixFunctionInfo(FIRST_ELEMENT)); - // SQL Aggregate Functions - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_SQL_AVG), getAsterixFunctionInfo(SQL_AVG)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_SQL_COUNT), getAsterixFunctionInfo(SQL_COUNT)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_GLOBAL_SQL_AVG), - getAsterixFunctionInfo(GLOBAL_SQL_AVG)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_LOCAL_SQL_AVG), - getAsterixFunctionInfo(LOCAL_SQL_AVG)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_SQL_MAX), getAsterixFunctionInfo(SQL_MAX)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_SQL_MIN), getAsterixFunctionInfo(SQL_MIN)); - scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(SCALAR_SQL_SUM), getAsterixFunctionInfo(SQL_SUM)); - } + // Aggregate functions - static { + // AVG + addAgg(AVG); addAgg(LOCAL_AVG); addAgg(GLOBAL_AVG); @@ -1243,53 +1300,13 @@ addIntermediateAgg(GLOBAL_AVG, INTERMEDIATE_AVG); addGlobalAgg(AVG, GLOBAL_AVG); - addAgg(COUNT); - addLocalAgg(COUNT, COUNT); - addIntermediateAgg(COUNT, SUM); - addGlobalAgg(COUNT, SUM); + addScalarAgg(AVG, SCALAR_AVG); + addScalarAgg(GLOBAL_AVG, SCALAR_GLOBAL_AVG); + addScalarAgg(LOCAL_AVG, SCALAR_LOCAL_AVG); - addAgg(MAX); - addAgg(LOCAL_MAX); - addLocalAgg(MAX, LOCAL_MAX); - addIntermediateAgg(LOCAL_MAX, MAX); - addIntermediateAgg(MAX, MAX); - addGlobalAgg(MAX, MAX); - - addAgg(SCALAR_FIRST_ELEMENT); - addAgg(LOCAL_FIRST_ELEMENT); - addLocalAgg(FIRST_ELEMENT, LOCAL_FIRST_ELEMENT); - addIntermediateAgg(LOCAL_FIRST_ELEMENT, FIRST_ELEMENT); - addIntermediateAgg(FIRST_ELEMENT, FIRST_ELEMENT); - addGlobalAgg(FIRST_ELEMENT, FIRST_ELEMENT); - - addAgg(MIN); - addLocalAgg(MIN, LOCAL_MIN); - addIntermediateAgg(LOCAL_MIN, MIN); - addIntermediateAgg(MIN, MIN); - addGlobalAgg(MIN, MIN); - - addAgg(SUM); - addAgg(LOCAL_SUM); - addLocalAgg(SUM, LOCAL_SUM); - addIntermediateAgg(LOCAL_SUM, SUM); - addIntermediateAgg(SUM, SUM); - addGlobalAgg(SUM, SUM); - - addAgg(LISTIFY); - - // serializable aggregate functions addSerialAgg(AVG, SERIAL_AVG); - addSerialAgg(COUNT, SERIAL_COUNT); - addSerialAgg(SUM, SERIAL_SUM); - addSerialAgg(LOCAL_SUM, SERIAL_LOCAL_SUM); addSerialAgg(LOCAL_AVG, SERIAL_LOCAL_AVG); addSerialAgg(GLOBAL_AVG, SERIAL_GLOBAL_AVG); - - addAgg(SERIAL_COUNT); - addLocalAgg(SERIAL_COUNT, SERIAL_COUNT); - addIntermediateAgg(SERIAL_COUNT, SERIAL_SUM); - addGlobalAgg(SERIAL_COUNT, SERIAL_SUM); - addAgg(SERIAL_AVG); addAgg(SERIAL_LOCAL_AVG); addAgg(SERIAL_GLOBAL_AVG); @@ -1299,6 +1316,86 @@ addIntermediateAgg(SERIAL_GLOBAL_AVG, SERIAL_INTERMEDIATE_AVG); addGlobalAgg(SERIAL_AVG, SERIAL_GLOBAL_AVG); + // AVG DISTINCT + + addDistinctAgg(AVG_DISTINCT, SCALAR_AVG); + addScalarAgg(AVG_DISTINCT, SCALAR_AVG_DISTINCT); + + // COUNT + + addAgg(COUNT); + addLocalAgg(COUNT, COUNT); + addIntermediateAgg(COUNT, SUM); + addGlobalAgg(COUNT, SUM); + + addScalarAgg(COUNT, SCALAR_COUNT); + + addSerialAgg(COUNT, SERIAL_COUNT); + addAgg(SERIAL_COUNT); + addLocalAgg(SERIAL_COUNT, SERIAL_COUNT); + addIntermediateAgg(SERIAL_COUNT, SERIAL_SUM); + addGlobalAgg(SERIAL_COUNT, SERIAL_SUM); + + // COUNT DISTINCT + + addDistinctAgg(COUNT_DISTINCT, SCALAR_COUNT); + addScalarAgg(COUNT_DISTINCT, SCALAR_COUNT_DISTINCT); + + // MAX + + addAgg(MAX); + addAgg(LOCAL_MAX); + addLocalAgg(MAX, LOCAL_MAX); + addIntermediateAgg(LOCAL_MAX, MAX); + addIntermediateAgg(MAX, MAX); + addGlobalAgg(MAX, MAX); + + addScalarAgg(MAX, SCALAR_MAX); + + // MAX DISTINCT + + addDistinctAgg(MAX_DISTINCT, SCALAR_MAX); + addScalarAgg(MAX_DISTINCT, SCALAR_MAX_DISTINCT); + + // FIRST_ELEMENT + + addAgg(SCALAR_FIRST_ELEMENT); + addAgg(LOCAL_FIRST_ELEMENT); + addLocalAgg(FIRST_ELEMENT, LOCAL_FIRST_ELEMENT); + addIntermediateAgg(LOCAL_FIRST_ELEMENT, FIRST_ELEMENT); + addIntermediateAgg(FIRST_ELEMENT, FIRST_ELEMENT); + addGlobalAgg(FIRST_ELEMENT, FIRST_ELEMENT); + + addScalarAgg(FIRST_ELEMENT, SCALAR_FIRST_ELEMENT); + + // MIN + + addAgg(MIN); + addLocalAgg(MIN, LOCAL_MIN); + addIntermediateAgg(LOCAL_MIN, MIN); + addIntermediateAgg(MIN, MIN); + addGlobalAgg(MIN, MIN); + + addScalarAgg(MIN, SCALAR_MIN); + + // MIN DISTINCT + + addDistinctAgg(MIN_DISTINCT, SCALAR_MIN); + addScalarAgg(MIN_DISTINCT, SCALAR_MIN_DISTINCT); + + // SUM + + addAgg(SUM); + addAgg(LOCAL_SUM); + addLocalAgg(SUM, LOCAL_SUM); + addIntermediateAgg(LOCAL_SUM, SUM); + addIntermediateAgg(SUM, SUM); + addGlobalAgg(SUM, SUM); + + addScalarAgg(SUM, SCALAR_SUM); + + addSerialAgg(SUM, SERIAL_SUM); + addSerialAgg(LOCAL_SUM, SERIAL_LOCAL_SUM); addAgg(SERIAL_SUM); addAgg(SERIAL_LOCAL_SUM); addLocalAgg(SERIAL_SUM, SERIAL_LOCAL_SUM); @@ -1306,7 +1403,19 @@ addIntermediateAgg(SERIAL_LOCAL_SUM, SERIAL_SUM); addGlobalAgg(SERIAL_SUM, SERIAL_SUM); + // SUM DISTINCT + + addDistinctAgg(SUM_DISTINCT, SCALAR_SUM); + addScalarAgg(SUM_DISTINCT, SCALAR_SUM_DISTINCT); + + // LISTIFY + + addAgg(LISTIFY); + // SQL Aggregate Functions + + // SQL AVG + addAgg(SQL_AVG); addAgg(LOCAL_SQL_AVG); addAgg(GLOBAL_SQL_AVG); @@ -1316,44 +1425,13 @@ addIntermediateAgg(GLOBAL_SQL_AVG, INTERMEDIATE_SQL_AVG); addGlobalAgg(SQL_AVG, GLOBAL_SQL_AVG); - addAgg(SQL_COUNT); - addLocalAgg(SQL_COUNT, SQL_COUNT); - addIntermediateAgg(SQL_COUNT, SQL_SUM); - addGlobalAgg(SQL_COUNT, SQL_SUM); + addScalarAgg(SQL_AVG, SCALAR_SQL_AVG); + addScalarAgg(GLOBAL_SQL_AVG, SCALAR_GLOBAL_SQL_AVG); + addScalarAgg(LOCAL_SQL_AVG, SCALAR_LOCAL_SQL_AVG); - addAgg(SQL_MAX); - addAgg(LOCAL_SQL_MAX); - addLocalAgg(SQL_MAX, LOCAL_SQL_MAX); - addIntermediateAgg(LOCAL_SQL_MAX, SQL_MAX); - addIntermediateAgg(SQL_MAX, SQL_MAX); - addGlobalAgg(SQL_MAX, SQL_MAX); - - addAgg(SQL_MIN); - addLocalAgg(SQL_MIN, LOCAL_SQL_MIN); - addIntermediateAgg(LOCAL_SQL_MIN, SQL_MIN); - addIntermediateAgg(SQL_MIN, SQL_MIN); - addGlobalAgg(SQL_MIN, SQL_MIN); - - addAgg(SQL_SUM); - addAgg(LOCAL_SQL_SUM); - addLocalAgg(SQL_SUM, LOCAL_SQL_SUM); - addIntermediateAgg(LOCAL_SQL_SUM, SQL_SUM); - addIntermediateAgg(SQL_SUM, SQL_SUM); - addGlobalAgg(SQL_SUM, SQL_SUM); - - // SQL serializable aggregate functions addSerialAgg(SQL_AVG, SERIAL_SQL_AVG); - addSerialAgg(SQL_COUNT, SERIAL_SQL_COUNT); - addSerialAgg(SQL_SUM, SERIAL_SQL_SUM); - addSerialAgg(LOCAL_SQL_SUM, SERIAL_LOCAL_SQL_SUM); addSerialAgg(LOCAL_SQL_AVG, SERIAL_LOCAL_SQL_AVG); addSerialAgg(GLOBAL_SQL_AVG, SERIAL_GLOBAL_SQL_AVG); - - addAgg(SERIAL_SQL_COUNT); - addLocalAgg(SERIAL_SQL_COUNT, SERIAL_SQL_COUNT); - addIntermediateAgg(SERIAL_SQL_COUNT, SERIAL_SQL_SUM); - addGlobalAgg(SERIAL_SQL_COUNT, SERIAL_SQL_SUM); - addAgg(SERIAL_SQL_AVG); addAgg(SERIAL_LOCAL_SQL_AVG); addAgg(SERIAL_GLOBAL_SQL_AVG); @@ -1363,6 +1441,75 @@ addIntermediateAgg(SERIAL_GLOBAL_SQL_AVG, SERIAL_INTERMEDIATE_SQL_AVG); addGlobalAgg(SERIAL_SQL_AVG, SERIAL_GLOBAL_SQL_AVG); + // SQL AVG DISTINCT + + addDistinctAgg(SQL_AVG_DISTINCT, SCALAR_SQL_AVG); + addScalarAgg(SQL_AVG_DISTINCT, SCALAR_SQL_AVG_DISTINCT); + + // SQL COUNT + + addAgg(SQL_COUNT); + addLocalAgg(SQL_COUNT, SQL_COUNT); + addIntermediateAgg(SQL_COUNT, SQL_SUM); + addGlobalAgg(SQL_COUNT, SQL_SUM); + + addScalarAgg(SQL_COUNT, SCALAR_SQL_COUNT); + + addSerialAgg(SQL_COUNT, SERIAL_SQL_COUNT); + addAgg(SERIAL_SQL_COUNT); + addLocalAgg(SERIAL_SQL_COUNT, SERIAL_SQL_COUNT); + addIntermediateAgg(SERIAL_SQL_COUNT, SERIAL_SQL_SUM); + addGlobalAgg(SERIAL_SQL_COUNT, SERIAL_SQL_SUM); + + // SQL COUNT DISTINCT + + addDistinctAgg(SQL_COUNT_DISTINCT, SCALAR_SQL_COUNT); + addScalarAgg(SQL_COUNT_DISTINCT, SCALAR_SQL_COUNT_DISTINCT); + + // SQL MAX + + addAgg(SQL_MAX); + addAgg(LOCAL_SQL_MAX); + addLocalAgg(SQL_MAX, LOCAL_SQL_MAX); + addIntermediateAgg(LOCAL_SQL_MAX, SQL_MAX); + addIntermediateAgg(SQL_MAX, SQL_MAX); + addGlobalAgg(SQL_MAX, SQL_MAX); + + addScalarAgg(SQL_MAX, SCALAR_SQL_MAX); + + // SQL MAX DISTINCT + + addDistinctAgg(SQL_MAX_DISTINCT, SCALAR_SQL_MAX); + addScalarAgg(SQL_MAX_DISTINCT, SCALAR_SQL_MAX_DISTINCT); + + // SQL MIN + + addAgg(SQL_MIN); + addLocalAgg(SQL_MIN, LOCAL_SQL_MIN); + addIntermediateAgg(LOCAL_SQL_MIN, SQL_MIN); + addIntermediateAgg(SQL_MIN, SQL_MIN); + addGlobalAgg(SQL_MIN, SQL_MIN); + + addScalarAgg(SQL_MIN, SCALAR_SQL_MIN); + + // SQL MIN DISTINCT + + addDistinctAgg(SQL_MIN_DISTINCT, SCALAR_SQL_MIN); + addScalarAgg(SQL_MIN_DISTINCT, SCALAR_SQL_MIN_DISTINCT); + + // SQL SUM + + addAgg(SQL_SUM); + addAgg(LOCAL_SQL_SUM); + addLocalAgg(SQL_SUM, LOCAL_SQL_SUM); + addIntermediateAgg(LOCAL_SQL_SUM, SQL_SUM); + addIntermediateAgg(SQL_SUM, SQL_SUM); + addGlobalAgg(SQL_SUM, SQL_SUM); + + addScalarAgg(SQL_SUM, SCALAR_SQL_SUM); + + addSerialAgg(SQL_SUM, SERIAL_SQL_SUM); + addSerialAgg(LOCAL_SQL_SUM, SERIAL_LOCAL_SQL_SUM); addAgg(SERIAL_SQL_SUM); addAgg(SERIAL_LOCAL_SQL_SUM); addLocalAgg(SERIAL_SQL_SUM, SERIAL_LOCAL_SQL_SUM); @@ -1370,6 +1517,10 @@ addIntermediateAgg(SERIAL_SQL_SUM, SERIAL_SQL_SUM); addGlobalAgg(SERIAL_SQL_SUM, SERIAL_SQL_SUM); + // SQL SUM DISTINCT + + addDistinctAgg(SQL_SUM_DISTINCT, SCALAR_SQL_SUM); + addScalarAgg(SQL_SUM_DISTINCT, SCALAR_SQL_SUM_DISTINCT); } static { @@ -1502,6 +1653,12 @@ return finfo == null ? null : finfo.getFunctionIdentifier(); } + public static FunctionIdentifier getAggregateFunctionForDistinct(FunctionIdentifier distinctVersionOfAggregate) { + IFunctionInfo finfo = + distinctToRegularScalarAggregateFunctionMap.get(getAsterixFunctionInfo(distinctVersionOfAggregate)); + return finfo == null ? null : finfo.getFunctionIdentifier(); + } + public static void addFunction(FunctionIdentifier fi, IResultTypeComputer typeComputer, boolean isFunctional) { addFunctionWithDomain(fi, ATypeHierarchy.Domain.ANY, typeComputer, isFunctional); } @@ -1548,6 +1705,15 @@ aggregateToSerializableAggregate.put(getAsterixFunctionInfo(fi), getAsterixFunctionInfo(serialfi)); } + private static void addScalarAgg(FunctionIdentifier fi, FunctionIdentifier scalarfi) { + scalarToAggregateFunctionMap.put(getAsterixFunctionInfo(scalarfi), getAsterixFunctionInfo(fi)); + } + + private static void addDistinctAgg(FunctionIdentifier distinctfi, FunctionIdentifier regularscalarfi) { + distinctToRegularScalarAggregateFunctionMap.put(getAsterixFunctionInfo(distinctfi), + getAsterixFunctionInfo(regularscalarfi)); + } + static { spatialFilterFunctions.put(getAsterixFunctionInfo(BuiltinFunctions.SPATIAL_INTERSECT), SpatialFilterKind.SI); diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/expressions/AbstractFunctionCallExpression.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/expressions/AbstractFunctionCallExpression.java index 8bc39b8..80d6f95 100644 --- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/expressions/AbstractFunctionCallExpression.java +++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/expressions/AbstractFunctionCallExpression.java @@ -193,9 +193,14 @@ if (!equal) { return false; } - for (int i = 0; i < arguments.size(); i++) { + int argumentCount = arguments.size(); + List<Mutable<ILogicalExpression>> fceArguments = fce.getArguments(); + if (argumentCount != fceArguments.size()) { + return false; + } + for (int i = 0; i < argumentCount; i++) { ILogicalExpression argument = arguments.get(i).getValue(); - ILogicalExpression fceArgument = fce.getArguments().get(i).getValue(); + ILogicalExpression fceArgument = fceArguments.get(i).getValue(); if (!argument.equals(fceArgument)) { return false; } -- To view, visit https://asterix-gerrit.ics.uci.edu/1987 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I52772349cbcbfc68a3a1ff261d610871ca38748d Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Dmitry Lychagin <[email protected]>
