This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit adcab98511809f6780e01134c786b36db25bdefd Author: Dmitry Lychagin <[email protected]> AuthorDate: Mon Apr 27 15:40:21 2020 -0700 [NO ISSUE][COMP] Extract SQL aggregates from CASE expressions - user model changes: no - storage format changes: no - interface changes: no Details: - Extracts SQL-92 aggregate functions from CASE/IF expressions into LET clauses, so they can be pushed into GROUPBY subplans by the optimizer - Refactor AbstractSqlppExpressionExtractionVisitor to improve its extensibility Change-Id: Ia1ae879e845bac5656749966ca57054cbfce6df6 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6044 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> --- .../asterix/test/optimizer/OptimizerTest.java | 96 +++++++++++++++------- .../queries/group-by/gby-case-01.3.sqlpp | 31 +++++++ .../queries/group-by/gby-case-01.4.sqlpp | 36 ++++++++ .../results/group-by/gby-case-01.3.plan | 24 ++++++ .../results/group-by/gby-case-01.4.plan | 24 ++++++ .../group-by/gby-case-01/gby-case-01.1.ddl.sqlpp | 32 ++++++++ .../gby-case-01/gby-case-01.2.update.sqlpp | 60 ++++++++++++++ .../group-by/gby-case-01/gby-case-01.3.query.sqlpp | 26 ++++++ .../group-by/gby-case-01/gby-case-01.4.query.sqlpp | 24 ++++++ .../results/group-by/gby-case-01/gby-case-01.3.adm | 2 + .../results/group-by/gby-case-01/gby-case-01.4.adm | 2 + .../test/resources/runtimets/testsuite_sqlpp.xml | 5 ++ .../sqlpp/rewrites/SqlppFunctionBodyRewriter.java | 3 + .../lang/sqlpp/rewrites/SqlppQueryRewriter.java | 11 ++- .../AbstractSqlppExpressionExtractionVisitor.java | 86 +++++++++++++------ .../rewrites/visitor/SqlppCaseRewriteVisitor.java | 95 +++++++++++++++++++++ .../visitor/SqlppWindowRewriteVisitor.java | 25 +++--- 17 files changed, 516 insertions(+), 66 deletions(-) diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java index 6e0413c..c3dc821 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/optimizer/OptimizerTest.java @@ -18,17 +18,19 @@ */ package org.apache.asterix.test.optimizer; -import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; -import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.StringReader; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.asterix.api.common.AsterixHyracksIntegrationUtil; import org.apache.asterix.api.java.AsterixJavaClient; @@ -93,6 +95,9 @@ public class OptimizerTest { protected static AsterixHyracksIntegrationUtil integrationUtil = new AsterixHyracksIntegrationUtil(); + private static final String PATTERN_VAR_ID_PREFIX = "\\$\\$"; + private static final Pattern PATTERN_VAR_ID = Pattern.compile(PATTERN_VAR_ID_PREFIX + "(\\d+)"); + @BeforeClass public static void setUp() throws Exception { final File outdir = new File(PATH_ACTUAL); @@ -205,37 +210,36 @@ public class OptimizerTest { throw new Exception("Compile ERROR for " + queryFile + ": " + e.getMessage(), e); } - BufferedReader readerExpected = - new BufferedReader(new InputStreamReader(new FileInputStream(expectedFile), "UTF-8")); - BufferedReader readerActual = - new BufferedReader(new InputStreamReader(new FileInputStream(actualFile), "UTF-8")); + List<String> linesExpected = Files.readAllLines(expectedFile.toPath(), StandardCharsets.UTF_8); + List<String> linesActual = Files.readAllLines(actualFile.toPath(), StandardCharsets.UTF_8); + + int varBaseExpected = findBaseVarId(linesExpected); + int varBaseActual = findBaseVarId(linesActual); + Iterator<String> readerExpected = linesExpected.iterator(); + Iterator<String> readerActual = linesActual.iterator(); String lineExpected, lineActual; int num = 1; - try { - while ((lineExpected = readerExpected.readLine()) != null) { - lineActual = readerActual.readLine(); - if (lineActual == null) { - throw new Exception("Result for " + queryFile + " changed at line " + num + ":\n< " - + lineExpected + "\n> "); - } - if (!lineExpected.equals(lineActual)) { - throw new Exception("Result for " + queryFile + " changed at line " + num + ":\n< " - + lineExpected + "\n> " + lineActual); - } - ++num; - } - lineActual = readerActual.readLine(); - if (lineActual != null) { + while (readerExpected.hasNext()) { + lineExpected = readerExpected.next(); + if (!readerActual.hasNext()) { throw new Exception( - "Result for " + queryFile + " changed at line " + num + ":\n< \n> " + lineActual); + "Result for " + queryFile + " changed at line " + num + ":\n< " + lineExpected + "\n> "); + } + lineActual = readerActual.next(); + + if (!planLineEquals(lineExpected, varBaseExpected, lineActual, varBaseActual)) { + throw new Exception("Result for " + queryFile + " changed at line " + num + ":\n< " + lineExpected + + "\n> " + lineActual); } - LOGGER.info("Test \"" + queryFile.getPath() + "\" PASSED!"); - actualFile.delete(); - } finally { - readerExpected.close(); - readerActual.close(); + ++num; + } + if (readerActual.hasNext()) { + throw new Exception( + "Result for " + queryFile + " changed at line " + num + ":\n< \n> " + readerActual.next()); } + LOGGER.info("Test \"" + queryFile.getPath() + "\" PASSED!"); + actualFile.delete(); } catch (Exception e) { if (!(e instanceof AssumptionViolatedException)) { LOGGER.error("Test \"" + queryFile.getPath() + "\" FAILED!"); @@ -245,4 +249,40 @@ public class OptimizerTest { } } } + + private boolean planLineEquals(String lineExpected, int varIdBaseExpected, String lineActual, int varIdBaseActual) { + String lineExpectedNorm = normalizePlanLine(lineExpected, varIdBaseExpected); + String lineActualNorm = normalizePlanLine(lineActual, varIdBaseActual); + return lineExpectedNorm.equals(lineActualNorm); + } + + // rewrite variable ids in given plan line: $$varId -> $$(varId-varIdBase) + private String normalizePlanLine(String line, int varIdBase) { + if (varIdBase == Integer.MAX_VALUE) { + // plan did not contain any variables -> no rewriting necessary + return line; + } + Matcher m = PATTERN_VAR_ID.matcher(line); + StringBuffer sb = new StringBuffer(line.length()); + while (m.find()) { + int varId = Integer.parseInt(m.group(1)); + int newVarId = varId - varIdBase; + m.appendReplacement(sb, PATTERN_VAR_ID_PREFIX + newVarId); + } + m.appendTail(sb); + return sb.toString(); + } + + private int findBaseVarId(Collection<String> plan) { + int varIdBase = Integer.MAX_VALUE; + Matcher m = PATTERN_VAR_ID.matcher(""); + for (String line : plan) { + m.reset(line); + while (m.find()) { + int varId = Integer.parseInt(m.group(1)); + varIdBase = Math.min(varIdBase, varId); + } + } + return varIdBase; + } } diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/group-by/gby-case-01.3.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/group-by/gby-case-01.3.sqlpp new file mode 100644 index 0000000..37679f7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/group-by/gby-case-01.3.sqlpp @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + +create dataset t1(id integer not null) open type primary key id; + +select x, + case when sum(z)=0 then 0 else sum(y*z)/sum(z) end as res +from t1 +group by x +order by x; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/group-by/gby-case-01.4.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/group-by/gby-case-01.4.sqlpp new file mode 100644 index 0000000..f9d5b77 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/group-by/gby-case-01.4.sqlpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + +create dataset t1(id integer not null) open type primary key id; + +create function f1() { + select x, + case when sum(z)=0 then 0 else sum(y*z)/sum(z) end as res + from t1 + group by x +}; + +select x, res +from f1() f +order by x; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/group-by/gby-case-01.3.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/group-by/gby-case-01.3.plan new file mode 100644 index 0000000..643e12f --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/group-by/gby-case-01.3.plan @@ -0,0 +1,24 @@ +-- DISTRIBUTE_RESULT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- SORT_MERGE_EXCHANGE [$$x(ASC) ] |PARTITIONED| + -- SORT_GROUP_BY[$$93] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- HASH_PARTITION_EXCHANGE [$$93] |PARTITIONED| + -- SORT_GROUP_BY[$$81] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results/group-by/gby-case-01.4.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results/group-by/gby-case-01.4.plan new file mode 100644 index 0000000..27432c6 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results/group-by/gby-case-01.4.plan @@ -0,0 +1,24 @@ +-- DISTRIBUTE_RESULT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- SORT_MERGE_EXCHANGE [$$x(ASC) ] |PARTITIONED| + -- SORT_GROUP_BY[$$120] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- HASH_PARTITION_EXCHANGE [$$120] |PARTITIONED| + -- SORT_GROUP_BY[$$105] |PARTITIONED| + { + -- AGGREGATE |LOCAL| + -- NESTED_TUPLE_SOURCE |LOCAL| + } + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.1.ddl.sqlpp new file mode 100644 index 0000000..4c6addb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.1.ddl.sqlpp @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + +create dataset t1(id integer not null) open type primary key id; + +create function f1() { + select x, + case when sum(z)=0 then 0 else sum(y*z)/sum(z) end as res + from t1 + group by x +}; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.2.update.sqlpp new file mode 100644 index 0000000..9c37cf5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.2.update.sqlpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +insert into t1 +([ + { + "id": 10, + "x": 1, + "y": 2, + "z": 2 + }, + { + "id": 11, + "x": 1, + "y": 4, + "z": 4 + }, + { + "id": 12, + "x": 1, + "y": 8, + "z": 8 + }, + { + "id": 20, + "x": 2, + "y": 2, + "z": 0 + }, + { + "id": 21, + "x": 2, + "y": 4, + "z": 0 + }, + { + "id": 22, + "x": 2, + "y": 8, + "z": 0 + } +]); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.3.query.sqlpp new file mode 100644 index 0000000..90a432c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.3.query.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +select x, + case when sum(z)=0 then 0 else sum(y*z)/sum(z) end as res +from t1 +group by x +order by x; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.4.query.sqlpp new file mode 100644 index 0000000..6cf2ac9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/gby-case-01/gby-case-01.4.query.sqlpp @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +select x, res +from f1() f +order by x; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/gby-case-01/gby-case-01.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/gby-case-01/gby-case-01.3.adm new file mode 100644 index 0000000..11331e8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/gby-case-01/gby-case-01.3.adm @@ -0,0 +1,2 @@ +{ "x": 1, "res": 6.0 } +{ "x": 2, "res": 0 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/gby-case-01/gby-case-01.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/gby-case-01/gby-case-01.4.adm new file mode 100644 index 0000000..11331e8 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/gby-case-01/gby-case-01.4.adm @@ -0,0 +1,2 @@ +{ "x": 1, "res": 6.0 } +{ "x": 2, "res": 0 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index fc5ca89..7dac28d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -5206,6 +5206,11 @@ </compilation-unit> </test-case> <test-case FilePath="group-by"> + <compilation-unit name="gby-case-01"> + <output-dir compare="Text">gby-case-01</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="group-by"> <compilation-unit name="gby-nested-01"> <output-dir compare="Text">gby-nested-01</output-dir> </compilation-unit> diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java index bde40de..db1485c 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppFunctionBodyRewriter.java @@ -58,6 +58,9 @@ class SqlppFunctionBodyRewriter extends SqlppQueryRewriter { // Generate ids for variables (considering scopes) and replace global variable access with the dataset function. variableCheckAndRewrite(); + // Extracts SQL-92 aggregate functions from CASE/IF expressions into LET clauses + rewriteCaseExpressions(); + // Rewrites SQL-92 global aggregations. rewriteGroupByAggregationSugar(); diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java index 8bc319f..8b3e12d 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/SqlppQueryRewriter.java @@ -64,6 +64,7 @@ import org.apache.asterix.lang.sqlpp.rewrites.visitor.InlineWithExpressionVisito import org.apache.asterix.lang.sqlpp.rewrites.visitor.OperatorExpressionVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SetOperationVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppBuiltinFunctionRewriteVisitor; +import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppCaseRewriteVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppGroupByAggregationSugarVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppGroupByVisitor; import org.apache.asterix.lang.sqlpp.rewrites.visitor.SqlppInlineUdfsVisitor; @@ -141,10 +142,13 @@ public class SqlppQueryRewriter implements IQueryRewriter { // Generate ids for variables (considering scopes) and replace global variable access with the dataset function. variableCheckAndRewrite(); + // Extracts SQL-92 aggregate functions from CASE/IF expressions into LET clauses + rewriteCaseExpressions(); + // Rewrites SQL-92 aggregate functions rewriteGroupByAggregationSugar(); - // Rewrite window expression aggregations. + // Rewrites window expression aggregations. rewriteWindowAggregationSugar(); // Rewrites like/not-like expressions. @@ -246,6 +250,11 @@ public class SqlppQueryRewriter implements IQueryRewriter { rewriteTopExpr(windowVisitor, null); } + protected void rewriteCaseExpressions() throws CompilationException { + SqlppCaseRewriteVisitor visitor = new SqlppCaseRewriteVisitor(context); + rewriteTopExpr(visitor, null); + } + protected void inlineDeclaredUdfs(boolean inlineUdfs) throws CompilationException { List<FunctionSignature> funIds = new ArrayList<FunctionSignature>(); for (FunctionDecl fdecl : declaredFunctions) { diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/AbstractSqlppExpressionExtractionVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/AbstractSqlppExpressionExtractionVisitor.java index 57bfad5..b132ea0 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/AbstractSqlppExpressionExtractionVisitor.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/AbstractSqlppExpressionExtractionVisitor.java @@ -23,23 +23,27 @@ import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Deque; import java.util.List; +import java.util.function.Predicate; import org.apache.asterix.common.exceptions.CompilationException; import org.apache.asterix.lang.common.base.AbstractClause; import org.apache.asterix.lang.common.base.Expression; import org.apache.asterix.lang.common.base.ILangExpression; +import org.apache.asterix.lang.common.clause.GroupbyClause; import org.apache.asterix.lang.common.clause.LetClause; import org.apache.asterix.lang.common.expression.VariableExpr; import org.apache.asterix.lang.common.rewrites.LangRewritingContext; import org.apache.asterix.lang.common.struct.VarIdentifier; import org.apache.asterix.lang.sqlpp.clause.FromClause; import org.apache.asterix.lang.sqlpp.clause.SelectBlock; +import org.apache.asterix.lang.sqlpp.clause.SelectClause; import org.apache.asterix.lang.sqlpp.visitor.base.AbstractSqlppSimpleExpressionVisitor; import org.apache.hyracks.algebricks.common.utils.Pair; /** * Base class for visitors that extract expressions into LET clauses. - * Subclasses should call {@link #extractExpressions(List, int)} to perform the extraction. + * Subclasses should call {@link #extractExpressionsFromList(List, int, Predicate)} or + * {@link #extractExpression(Expression)} to perform the extraction. */ abstract class AbstractSqlppExpressionExtractionVisitor extends AbstractSqlppSimpleExpressionVisitor { @@ -57,32 +61,60 @@ abstract class AbstractSqlppExpressionExtractionVisitor extends AbstractSqlppSim stack.push(extractionList); if (selectBlock.hasFromClause()) { - FromClause clause = selectBlock.getFromClause(); - clause.accept(this, arg); - if (!extractionList.isEmpty()) { - handleUnsupportedClause(clause, extractionList); - } + visitFromClause(selectBlock.getFromClause(), arg, extractionList); } List<AbstractClause> letWhereList = selectBlock.getLetWhereList(); if (!letWhereList.isEmpty()) { - visitLetWhereClauses(letWhereList, extractionList, arg); + visitLetWhereClauses(letWhereList, arg, extractionList); } + GroupbyClause groupbyClause = null; if (selectBlock.hasGroupbyClause()) { - selectBlock.getGroupbyClause().accept(this, arg); - introduceLetClauses(extractionList, letWhereList); + groupbyClause = selectBlock.getGroupbyClause(); + visitGroupByClause(groupbyClause, arg, extractionList, letWhereList); } List<AbstractClause> letHavingListAfterGby = selectBlock.getLetHavingListAfterGroupby(); if (!letHavingListAfterGby.isEmpty()) { - visitLetWhereClauses(letHavingListAfterGby, extractionList, arg); + visitLetHavingClausesAfterGby(arg, extractionList, letHavingListAfterGby, groupbyClause); } - selectBlock.getSelectClause().accept(this, arg); - introduceLetClauses(extractionList, selectBlock.hasGroupbyClause() ? letHavingListAfterGby : letWhereList); + visitSelectClause(selectBlock.getSelectClause(), arg, extractionList, + selectBlock.hasGroupbyClause() ? letHavingListAfterGby : letWhereList, groupbyClause); stack.pop(); return null; } - private void visitLetWhereClauses(List<AbstractClause> clauseList, + protected void visitFromClause(FromClause clause, ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList) throws CompilationException { + // Skip extraction because we won't be able to perform it as there are no LET clauses yet. + // Subclasses can override and fail if necessary + } + + protected void visitLetWhereClauses(List<AbstractClause> letWhereList, ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList) throws CompilationException { + visitLetWhereClausesImpl(letWhereList, extractionList, arg); + } + + protected void visitGroupByClause(GroupbyClause groupbyClause, ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList, List<AbstractClause> letWhereList) + throws CompilationException { + groupbyClause.accept(this, arg); + introduceLetClauses(extractionList, letWhereList); + } + + protected void visitLetHavingClausesAfterGby(ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList, List<AbstractClause> letHavingListAfterGby, + GroupbyClause groupbyClause) throws CompilationException { + visitLetWhereClausesImpl(letHavingListAfterGby, extractionList, arg); + } + + protected void visitSelectClause(SelectClause selectClause, ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList, List<AbstractClause> letWhereList, + GroupbyClause groupbyClause) throws CompilationException { + selectClause.accept(this, arg); + introduceLetClauses(extractionList, letWhereList); + } + + private void visitLetWhereClausesImpl(List<AbstractClause> clauseList, List<Pair<Expression, VarIdentifier>> extractionList, ILangExpression arg) throws CompilationException { List<AbstractClause> newClauseList = new ArrayList<>(clauseList.size()); for (AbstractClause letWhereClause : clauseList) { @@ -108,7 +140,8 @@ abstract class AbstractSqlppExpressionExtractionVisitor extends AbstractSqlppSim fromBindingList.clear(); } - List<Expression> extractExpressions(List<Expression> exprList, int limit) { + protected List<Expression> extractExpressionsFromList(List<Expression> exprList, int limit, + Predicate<Expression> exprTest) { List<Pair<Expression, VarIdentifier>> outLetList = stack.peek(); if (outLetList == null) { return null; @@ -117,23 +150,22 @@ abstract class AbstractSqlppExpressionExtractionVisitor extends AbstractSqlppSim List<Expression> newExprList = new ArrayList<>(n); for (int i = 0; i < n; i++) { Expression expr = exprList.get(i); - Expression newExpr; - if (i < limit && isExtractableExpression(expr)) { - VarIdentifier v = context.newVariable(); - VariableExpr vExpr = new VariableExpr(v); - vExpr.setSourceLocation(expr.getSourceLocation()); - outLetList.add(new Pair<>(expr, v)); - newExpr = vExpr; - } else { - newExpr = expr; - } + Expression newExpr = i < limit && exprTest.test(expr) ? extractExpressionImpl(expr, outLetList) : expr; newExprList.add(newExpr); } return newExprList; } - abstract boolean isExtractableExpression(Expression expr); + protected Expression extractExpression(Expression expr) { + List<Pair<Expression, VarIdentifier>> outLetList = stack.peek(); + return outLetList != null ? extractExpressionImpl(expr, outLetList) : null; + } - abstract void handleUnsupportedClause(FromClause clause, List<Pair<Expression, VarIdentifier>> extractionList) - throws CompilationException; + private VariableExpr extractExpressionImpl(Expression expr, List<Pair<Expression, VarIdentifier>> outLetList) { + VarIdentifier v = context.newVariable(); + VariableExpr vExpr = new VariableExpr(v); + vExpr.setSourceLocation(expr.getSourceLocation()); + outLetList.add(new Pair<>(expr, v)); + return vExpr; + } } diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppCaseRewriteVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppCaseRewriteVisitor.java new file mode 100644 index 0000000..5752aef --- /dev/null +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppCaseRewriteVisitor.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.lang.sqlpp.rewrites.visitor; + +import java.util.List; + +import org.apache.asterix.common.exceptions.CompilationException; +import org.apache.asterix.lang.common.base.AbstractClause; +import org.apache.asterix.lang.common.base.Expression; +import org.apache.asterix.lang.common.base.ILangExpression; +import org.apache.asterix.lang.common.clause.GroupbyClause; +import org.apache.asterix.lang.common.expression.CallExpr; +import org.apache.asterix.lang.common.expression.IfExpr; +import org.apache.asterix.lang.common.rewrites.LangRewritingContext; +import org.apache.asterix.lang.common.struct.VarIdentifier; +import org.apache.asterix.lang.sqlpp.expression.CaseExpression; +import org.apache.asterix.lang.sqlpp.expression.SelectExpression; +import org.apache.asterix.lang.sqlpp.util.FunctionMapUtil; +import org.apache.asterix.lang.sqlpp.visitor.base.AbstractSqlppSimpleExpressionVisitor; +import org.apache.hyracks.algebricks.common.utils.Pair; + +/** + * Extracts SQL-92 aggregate functions from CASE/IF expressions into LET clauses, + * so they can be pushed into GROUPBY subplans by the optimizer. + */ +public final class SqlppCaseRewriteVisitor extends AbstractSqlppExpressionExtractionVisitor { + + public SqlppCaseRewriteVisitor(LangRewritingContext context) { + super(context); + } + + @Override + protected void visitLetWhereClauses(List<AbstractClause> letWhereList, ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList) { + // do not perform the extraction + } + + @Override + protected void visitGroupByClause(GroupbyClause groupbyClause, ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList, List<AbstractClause> letWhereList) { + // do not perform the extraction + } + + @Override + public Expression visit(CaseExpression caseExpr, ILangExpression arg) throws CompilationException { + Expression resultExpr = super.visit(caseExpr, arg); + resultExpr.accept(new Sql92AggregateExtractionVisitor(), arg); + return resultExpr; + } + + @Override + public Expression visit(IfExpr ifExpr, ILangExpression arg) throws CompilationException { + Expression resultExpr = super.visit(ifExpr, arg); + resultExpr.accept(new Sql92AggregateExtractionVisitor(), arg); + return resultExpr; + } + + private final class Sql92AggregateExtractionVisitor extends AbstractSqlppSimpleExpressionVisitor { + + @Override + public Expression visit(CallExpr callExpr, ILangExpression arg) throws CompilationException { + CallExpr resultExpr = (CallExpr) super.visit(callExpr, arg); + if (FunctionMapUtil.isSql92AggregateFunction(resultExpr.getFunctionSignature())) { + Expression newExpr = extractExpression(resultExpr); + if (newExpr != null) { + return newExpr; + } + } + return resultExpr; + } + + @Override + public Expression visit(SelectExpression selectExpression, ILangExpression arg) { + // don't visit sub-queries + return selectExpression; + } + } +} diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppWindowRewriteVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppWindowRewriteVisitor.java index ba1e7f9..9167ce8 100644 --- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppWindowRewriteVisitor.java +++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/SqlppWindowRewriteVisitor.java @@ -53,6 +53,16 @@ public final class SqlppWindowRewriteVisitor extends AbstractSqlppExpressionExtr } @Override + protected void visitFromClause(FromClause clause, ILangExpression arg, + List<Pair<Expression, VarIdentifier>> extractionList) throws CompilationException { + clause.accept(this, arg); + if (!extractionList.isEmpty()) { + throw new CompilationException(ErrorCode.COMPILATION_UNEXPECTED_WINDOW_EXPRESSION, + clause.getSourceLocation()); + } + } + + @Override public Expression visit(WindowExpression winExpr, ILangExpression arg) throws CompilationException { super.visit(winExpr, arg); @@ -68,14 +78,16 @@ public final class SqlppWindowRewriteVisitor extends AbstractSqlppExpressionExtr rewriteSpecificWindowFunctions(winfi, winExpr); if (BuiltinFunctions.builtinFunctionHasProperty(winfi, BuiltinFunctions.WindowFunctionProperty.HAS_LIST_ARG)) { - List<Expression> newExprList = extractExpressions(winExpr.getExprList(), 1); + List<Expression> newExprList = extractExpressionsFromList(winExpr.getExprList(), 1, + SqlppWindowRewriteVisitor::isExtractableExpression); if (newExprList == null) { throw new CompilationException(ErrorCode.COMPILATION_ERROR, winExpr.getSourceLocation(), ""); } winExpr.setExprList(newExprList); } } else if (FunctionMapUtil.isSql92AggregateFunction(signature)) { - List<Expression> newExprList = extractExpressions(winExpr.getExprList(), winExpr.getExprList().size()); + List<Expression> newExprList = extractExpressionsFromList(winExpr.getExprList(), + winExpr.getExprList().size(), SqlppWindowRewriteVisitor::isExtractableExpression); if (newExprList == null) { throw new CompilationException(ErrorCode.COMPILATION_ERROR, winExpr.getSourceLocation(), ""); } @@ -88,8 +100,7 @@ public final class SqlppWindowRewriteVisitor extends AbstractSqlppExpressionExtr return winExpr; } - @Override - protected boolean isExtractableExpression(Expression expr) { + protected static boolean isExtractableExpression(Expression expr) { switch (expr.getKind()) { case LITERAL_EXPRESSION: case VARIABLE_EXPRESSION: @@ -99,12 +110,6 @@ public final class SqlppWindowRewriteVisitor extends AbstractSqlppExpressionExtr } } - @Override - void handleUnsupportedClause(FromClause clause, List<Pair<Expression, VarIdentifier>> extractionList) - throws CompilationException { - throw new CompilationException(ErrorCode.COMPILATION_UNEXPECTED_WINDOW_EXPRESSION, clause.getSourceLocation()); - } - /** * Apply rewritings for specific window functions: * <ul>
