This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b9f7f63c81 [Fix](planner) Fix wrong planner with count(*) optmizer for 
cross join optimization (#11569)
b9f7f63c81 is described below

commit b9f7f63c81781f2dd756874a09f137bd8176b17e
Author: Kikyou1997 <[email protected]>
AuthorDate: Tue Aug 9 09:01:25 2022 +0800

    [Fix](planner) Fix wrong planner with count(*) optmizer for cross join 
optimization (#11569)
---
 .../org/apache/doris/analysis/AggregateInfo.java   |  6 --
 .../apache/doris/analysis/FunctionCallExpr.java    | 17 -----
 .../org/apache/doris/analysis/InlineViewRef.java   |  2 +-
 .../org/apache/doris/analysis/SlotDescriptor.java  | 15 ++++
 .../apache/doris/planner/SingleNodePlanner.java    | 31 ++++++++
 .../java/org/apache/doris/planner/SortNode.java    | 11 ++-
 .../analysis/CreateTableAsSelectStmtTest.java      |  2 +-
 .../data/query/aggregate/aggregate_count1.out      |  4 ++
 .../suites/query/aggregate/aggregate_count1.groovy | 82 ++++++++++++++++++++++
 9 files changed, 142 insertions(+), 28 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
index cfba7226da..2e8681f2c6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
@@ -110,11 +110,6 @@ public final class AggregateInfo extends AggregateInfoBase 
{
     // if set, a subset of groupingExprs_; set and used during planning
     private List<Expr> partitionExprs;
 
-    // indices into aggregateExprs for those that need to be materialized;
-    // shared between this, mergeAggInfo and secondPhaseDistinctAggInfo
-    private ArrayList<Integer> materializedAggregateSlots = 
Lists.newArrayList();
-    // if true, this AggregateInfo is the first phase of a 2-phase DISTINCT 
computation
-    private boolean isDistinctAgg = false;
     private boolean isUsingSetForDistinct;
 
     // the multi distinct's begin pos  and end pos in groupby exprs
@@ -363,7 +358,6 @@ public final class AggregateInfo extends AggregateInfoBase {
         }
 
         this.isUsingSetForDistinct = 
estimateIfUsingSetForDistinct(distinctAggExprs);
-        isDistinctAgg = true;
 
         // add DISTINCT parameters to grouping exprs
         if (!isUsingSetForDistinct) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index a2376f1d36..6bf620e9c4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -438,23 +438,6 @@ public class FunctionCallExpr extends Expr {
         return fnParams.isDistinct();
     }
 
-    public boolean isCountStar() {
-        if (fnName.getFunction().equalsIgnoreCase(FunctionSet.COUNT)) {
-            if (fnParams.isStar()) {
-                return true;
-            } else if (fnParams.exprs() == null || fnParams.exprs().isEmpty()) 
{
-                return true;
-            } else {
-                for (Expr expr : fnParams.exprs()) {
-                    if (expr.isConstant()) {
-                        return true;
-                    }
-                }
-            }
-        }
-        return false;
-    }
-
     public boolean isCountDistinctBitmapOrHLL() {
         if (!fnParams.isDistinct()) {
             return false;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
index 0fe2b691af..478f76998b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
@@ -206,7 +206,6 @@ public class InlineViewRef extends TableRef {
             desc.setIsMaterialized(true);
             materializedTupleIds.add(desc.getId());
         }
-
         // create sMap and baseTblSmap and register auxiliary eq predicates 
between our
         // tuple descriptor's slots and our *unresolved* select list exprs;
         // we create these auxiliary predicates so that the analyzer can 
compute the value
@@ -222,6 +221,7 @@ public class InlineViewRef extends TableRef {
             String colName = getColLabels().get(i);
             SlotDescriptor slotDesc = 
analyzer.registerColumnRef(getAliasAsName(), colName);
             Expr colExpr = queryStmt.getResultExprs().get(i);
+            slotDesc.setSourceExpr(colExpr);
             SlotRef slotRef = new SlotRef(slotDesc);
             sMap.put(slotRef, colExpr);
             baseTblSmap.put(slotRef, queryStmt.getBaseTblResultExprs().get(i));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
index 8c405011ae..22eae97c1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
@@ -162,6 +162,21 @@ public class SlotDescriptor {
         isMaterialized = value;
     }
 
+    public void materializeSrcExpr() {
+        if (sourceExprs == null) {
+            return;
+        }
+        for (Expr expr : sourceExprs) {
+            if (!(expr instanceof SlotRef)) {
+                continue;
+            }
+            SlotRef slotRef = (SlotRef) expr;
+            SlotDescriptor slotDesc = slotRef.getDesc();
+            slotDesc.setIsMaterialized(true);
+            slotDesc.materializeSrcExpr();
+        }
+    }
+
     public boolean getIsNullable() {
         return isNullable;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index 85d9dc99ee..2e257d8884 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -1031,9 +1031,39 @@ public class SingleNodePlanner {
 
             for (int i = 1; i < selectStmt.getTableRefs().size(); ++i) {
                 TableRef innerRef = selectStmt.getTableRefs().get(i);
+                // Optimization of CountStar would change the materialization 
of some outputSlot,
+                // it would cause the materilization inconsistent of SlotRef 
between outputTupleDesc
+                // and agg/groupBy expr. So if some not materialized slots in 
outputTuple of aggInfo
+                // is set to materialized after the optimization of CountStar, 
we need to call
+                // aggregateInfo.materializeRequiredSlots again to make sure 
all required slots is
+                // materialized.
+                boolean aggOutputNotHaveMaterializedSlot = false;
+                AggregateInfo aggregateInfo = null;
+                TupleDescriptor output = null;
+                if (innerRef instanceof InlineViewRef) {
+                    InlineViewRef inlineViewRef = (InlineViewRef) innerRef;
+                    QueryStmt queryStmt = inlineViewRef.getViewStmt();
+                    if (queryStmt instanceof SelectStmt) {
+                        aggregateInfo = ((SelectStmt) queryStmt).getAggInfo();
+                        if (aggregateInfo != null) {
+                            output = aggregateInfo.getOutputTupleDesc();
+                            aggOutputNotHaveMaterializedSlot =
+                                    
output.getSlots().stream().noneMatch(SlotDescriptor::isMaterialized);
+                        }
+                    }
+                }
+
                 root = createJoinNode(analyzer, root, innerRef, selectStmt);
                 // Have the build side of a join copy data to a compact 
representation
                 // in the tuple buffer.
+                if (aggOutputNotHaveMaterializedSlot
+                        && aggregateInfo
+                        .getOutputTupleDesc()
+                        .getSlots()
+                        .stream()
+                        .anyMatch(SlotDescriptor::isMaterialized)) {
+                    aggregateInfo.materializeRequiredSlots(analyzer, null);
+                }
                 root.getChildren().get(1).setCompactData(true);
                 root.assignConjuncts(analyzer);
             }
@@ -2288,6 +2318,7 @@ public class SingleNodePlanner {
         for (SlotId id : slotIds) {
             final SlotDescriptor slot = analyzer.getDescTbl().getSlotDesc(id);
             slot.setIsMaterialized(true);
+            slot.materializeSrcExpr();
         }
         for (TupleId id : tupleIds) {
             final TupleDescriptor tuple = 
analyzer.getDescTbl().getTupleDesc(id);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index 113f08760d..8395f749b2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -52,6 +52,8 @@ import java.util.Set;
 
 /**
  * Sorting.
+ * Attention, in some corner case, we need enable projection planner to 
promise the correctness of the Plan.
+ * Please refer to this regression 
test:regression-test/suites/query/aggregate/aggregate_count1.groovy.
  */
 public class SortNode extends PlanNode {
     private static final Logger LOG = LogManager.getLogger(SortNode.class);
@@ -211,9 +213,6 @@ public class SortNode extends PlanNode {
         outputSmap = new ExprSubstitutionMap();
 
         for (int i = 0; i < slotExprs.size(); ++i) {
-            if (!sortTupleSlots.get(i).isMaterialized()) {
-                continue;
-            }
             resolvedTupleExprs.add(slotExprs.get(i));
             outputSmap.put(slotExprs.get(i), new 
SlotRef(sortTupleSlots.get(i)));
         }
@@ -274,6 +273,12 @@ public class SortNode extends PlanNode {
 
     @Override
     public Set<SlotId> computeInputSlotIds(Analyzer analyzer) throws 
NotImplementedException {
+        List<SlotDescriptor> slotDescriptorList = 
this.info.getSortTupleDescriptor().getSlots();
+        for (int i = 0; i < slotDescriptorList.size(); i++) {
+            if (!slotDescriptorList.get(i).isMaterialized()) {
+                resolvedTupleExprs.remove(i);
+            }
+        }
         List<SlotId> result = Lists.newArrayList();
         Expr.getIds(resolvedTupleExprs, null, result);
         return new HashSet<>(result);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
index 44cde9ec1a..bb6dcc1497 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
@@ -237,7 +237,7 @@ public class CreateTableAsSelectStmtTest extends 
TestWithFeService {
         createTableAsSelect(selectFromCteAndUnion);
         ShowResultSet showResultSet1 = 
showCreateTableByName("select_cte_union");
         Assertions.assertEquals(
-                "CREATE TABLE `select_cte_union` (\n" + "  `id` tinyint(4) NOT 
NULL\n" + ") ENGINE=OLAP\n"
+                "CREATE TABLE `select_cte_union` (\n" + "  `id` tinyint(4) 
NULL\n" + ") ENGINE=OLAP\n"
                         + "DUPLICATE KEY(`id`)\n" + "COMMENT 'OLAP'\n" + 
"DISTRIBUTED BY HASH(`id`) BUCKETS 10\n"
                         + "PROPERTIES (\n" + "\"replication_allocation\" = 
\"tag.location.default: 1\",\n"
                         + "\"in_memory\" = \"false\",\n" + "\"storage_format\" 
= \"V2\"\n" + ")",
diff --git a/regression-test/data/query/aggregate/aggregate_count1.out 
b/regression-test/data/query/aggregate/aggregate_count1.out
new file mode 100644
index 0000000000..84f6fe8329
--- /dev/null
+++ b/regression-test/data/query/aggregate/aggregate_count1.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+8
+
diff --git a/regression-test/suites/query/aggregate/aggregate_count1.groovy 
b/regression-test/suites/query/aggregate/aggregate_count1.groovy
new file mode 100644
index 0000000000..eaf4efb62f
--- /dev/null
+++ b/regression-test/suites/query/aggregate/aggregate_count1.groovy
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("aggregate_count1", "query") {
+    sql "create table aggregate_count1 (\n" +
+            "\n" +
+            "    name varchar(128) ,\n" +
+            "\n" +
+            "    age INT ,\n" +
+            "\n" +
+            "    identityCode  varchar(128) ,\n" +
+            "\n" +
+            "    cardNo String ,\n" +
+            "\n" +
+            "    number String ,\n" +
+            "\n" +
+            "    birthday DATETIME ,\n" +
+            "\n" +
+            "    country String ,\n" +
+            "\n" +
+            "    gender String ,\n" +
+            "\n" +
+            "    covid BOOLEAN \n" +
+            "\n" +
+            ")UNIQUE KEY(name,age,`identityCode`)\n" +
+            "\n" +
+            "DISTRIBUTED BY HASH(`identityCode`) BUCKETS 10\n" +
+            "\n" +
+            "PROPERTIES(\"replication_num\" = \"1\");\n" +
+            "\n"
+    sql "insert into aggregate_count1 values 
('张三0',11,'1234567','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三1',11,'12345678','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三2',11,'12345671','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三3',11,'12345673','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三4',11,'123456711','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三5',11,'1232134567','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三6',11,'124314567','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三7',11,'123445167','123','321312','1998-02-13','中国','男',false);"
+    qt_select "SELECT count(1) FROM (WITH t1 AS (\n" +
+            "     WITH t AS (\n" +
+            "                SELECT * FROM aggregate_count1\n" +
+            "        )\n" +
+            "        SELECT\n" +
+            "                identityCode,\n" +
+            "                COUNT(1) as dataAmount,\n" +
+            "                ROUND(COUNT(1) / tableWithSum.sumResult,4) as 
proportion,\n" +
+            "               MD5(identityCode) as virtuleUniqKey\n" +
+            "        FROM t,(SELECT COUNT(1) as sumResult from t) 
tableWithSum\n" +
+            "        GROUP BY identityCode ,tableWithSum.sumResult\n" +
+            ")\n" +
+            "SELECT\n" +
+            "        identityCode,dataAmount,\n" +
+            "        (\n" +
+            "                CASE\n" +
+            "                WHEN t1.virtuleUniqKey = 
tableWithMaxId.max_virtuleUniqKey THEN\n" +
+            "                        ROUND(proportion + calcTheTail, 4)\n" +
+            "                ELSE\n" +
+            "                        proportion\n" +
+            "              END\n" +
+            "        ) proportion\n" +
+            "FROM t1,\n" +
+            "        (SELECT (1 - sum(t1.proportion)) as calcTheTail FROM t1 ) 
tableWithTail,\n" +
+            "        (SELECT virtuleUniqKey as  max_virtuleUniqKey FROM t1 
ORDER BY proportion DESC LIMIT 1 ) tableWithMaxId\n" +
+            "ORDER BY identityCode) t_a76fe3e829ddb51;"
+    sql "drop table aggregate_count1"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to