yangzhg commented on a change in pull request #3150: Support non-correlated 
subquery in having clause
URL: https://github.com/apache/incubator-doris/pull/3150#discussion_r395505446
 
 

 ##########
 File path: fe/src/main/java/org/apache/doris/analysis/StmtRewriter.java
 ##########
 @@ -97,8 +103,128 @@ private static void rewriteSelectStatement(SelectStmt 
stmt, Analyzer analyzer)
             }
             rewriteWhereClauseSubqueries(stmt, analyzer);
         }
+        // Rewrite all subquery in the having clause
+        if (stmt.getHavingPred() != null && stmt.getHavingPred().getSubquery() 
!= null) {
+            stmt = rewriteHavingClauseSubqueries(stmt, analyzer);
+        }
         stmt.sqlString_ = null;
         if (LOG.isDebugEnabled()) LOG.debug("rewritten stmt: " + stmt.toSql());
+        return stmt;
+    }
+
+    /**
+     * Rewrite having subquery.
+     * Step1: rewrite having subquery to where subquery
+     * Step2: rewrite where subquery
+     * <p>
+     * For example:
+     * select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by 
cs_item_sk
+     * having sum(cs_sales_price) >
+     *        (select min(cs_sales_price) from catalog_sales b where 
a.cs_item_sk = b.cs_item_sk);
+     * <p>
+     * Step1: rewrite having subquery to where subquery
+     * Outer query is changed to inline view in rewritten query
+     * Inline view of outer query:
+     *     from (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price 
from catalog_sales group by cs_item_sk) a
+     * Rewritten subquery of expr:
+     *     where a.sum_cs_sales_price >
+     *           (select min(cs_sales_price) from catalog_sales b where 
a.cs_item_sk = b.cs_item_sk)
+     * Rewritten query:
+     *     select cs_item_sk, a.sum_cs_sales_price from
+     *     (select cs_item_sk, sum(cs_sales_price) sum_cs_sales_price from 
catalog_sales group by cs_item_sk) a
+     *     where a.sum_cs_sales_price >
+     *           (select min(cs_sales_price) from catalog_sales b where 
a.cs_item_sk = b.cs_item_sk)
+     * <p>
+     * Step2: rewrite where subquery
+     * Inline view of subquery:
+     *     from (select b.cs_item_sk, min(cs_sales_price) from catalog_sales b 
group by cs_item_sk) c
+     * Rewritten correlated predicate:
+     *     where c.cs_item_sk = a.cs_item_sk and a.sum_cs_sales_price > 
c.min(cs_sales_price)
+     *
+     * @param stmt
+     * @param analyzer
+     */
+    private static SelectStmt rewriteHavingClauseSubqueries(SelectStmt stmt, 
Analyzer analyzer) throws AnalysisException {
+        // extract having predicate
+        Expr havingPredicate = stmt.getHavingPred();
+        Preconditions.checkState(havingPredicate != null);
+        Preconditions.checkState(havingPredicate.getSubquery() != null);
+        // extract result of stmt
+        List<Expr> leftExprList = stmt.getResultExprs();
+        // extract table alias generator
+        TableAliasGenerator tableAliasGenerator = 
stmt.getTableAliasGenerator();
+
+        /*
+         * The outer query is changed to inline view without having predicate
+         * For example:
+         * Query: select cs_item_sk, sum(cs_sales_price) from catalog_sales a 
group by cs_item_sk having ...;
+         * Inline view:
+         *     from (select cs_item_sk $ColumnA, sum(cs_sales_price) $ColumnB 
from catalog_sales a group by cs_item_sk) $TableA
+         */
+        SelectStmt inlineViewQuery = (SelectStmt) stmt.clone();
+        inlineViewQuery.reset();
+        inlineViewQuery.removeHavingClause();
+        // add a new alias for all of columns in subquery
+        List<String> colAliasOfInlineView = Lists.newArrayList();
+        for (int i = 0; i < inlineViewQuery.getSelectList().getItems().size(); 
++i) {
+            
colAliasOfInlineView.add(inlineViewQuery.getColumnAliasGenerator().getNextAlias());
+        }
+        InlineViewRef inlineViewRef = new 
InlineViewRef(tableAliasGenerator.getNextAlias(), inlineViewQuery,
+                colAliasOfInlineView);
+        try {
+            inlineViewRef.analyze(analyzer);
+        } catch (UserException e) {
+            throw new AnalysisException(e.getMessage());
+        }
+        LOG.debug("Outer query is changed to " + 
inlineViewRef.tableRefToSql());
+
+        /*
+         * Columns which belong to outer query can substitute for output 
columns of inline view
+         * For example:
+         * Having predicate: sum(cs_sales_price) >
+         *                   (select min(cs_sales_price) from catalog_sales b 
where a.cs_item_sk = b.cs_item_sk);
+         * Columns which belong to outer query: sum(cs_sales_price), 
a.cs_item_sk
+         * SMap: <cs_item_sk $ColumnA> <sum(cs_sales_price) $ColumnB>
+         * After substitute: $ColumnB >
+         *                   (select min(cs_sales_price) from catalog_sales b 
where $ColumnA = b.cs_item_sk)
+         */
+        /*
+         * Prepare select list of new query.
+         * Generate a new select item for each original columns in select list
+         */
+        havingPredicate.reset();
+        ExprSubstitutionMap smap = new ExprSubstitutionMap();
+        SelectList newSelectList = new SelectList();
+        for (int i = 0; i < inlineViewQuery.getSelectList().getItems().size(); 
i++) {
+            Expr leftExpr = leftExprList.get(i);
+            Expr rightExpr = new SlotRef(inlineViewRef.getAliasAsName(), 
colAliasOfInlineView.get(i));
+            rightExpr.analyze(analyzer);
+            smap.put(leftExpr, rightExpr);
+            // construct outer query select list
+            SelectListItem selectListItem = new SelectListItem(rightExpr, 
stmt.getColLabels().get(i));
+            newSelectList.addItem(selectListItem);
+        }
+        Expr newWherePredicate = havingPredicate.substitute(smap, 
analyzer,false);
+        LOG.debug("Having predicate is changed to " + 
newWherePredicate.toSql());
+
+        // construct rewritten query
+        List<TableRef> newTableRefList = Lists.newArrayList();
+        newTableRefList.add(inlineViewRef);
+        FromClause newFromClause = new FromClause(newTableRefList);
+        SelectStmt result = new SelectStmt(newSelectList, newFromClause, 
newWherePredicate, null, null, null,
+                LimitElement.NO_LIMIT);
 
 Review comment:
   what if  the origin query is 
   ```
   select cs_item_sk, sum(cs_sales_price) from catalog_sales a group by 
cs_item_sk
        having sum(cs_sales_price) >
        (select min(cs_sales_price) from catalog_sales b where a.cs_item_sk = 
b.cs_item_sk) limit 1;
   ```
   on limit after rewrite
   what about order by ?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to