This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b9f7f63c81 [Fix](planner) Fix wrong planner with count(*) optmizer for
cross join optimization (#11569)
b9f7f63c81 is described below
commit b9f7f63c81781f2dd756874a09f137bd8176b17e
Author: Kikyou1997 <[email protected]>
AuthorDate: Tue Aug 9 09:01:25 2022 +0800
[Fix](planner) Fix wrong planner with count(*) optmizer for cross join
optimization (#11569)
---
.../org/apache/doris/analysis/AggregateInfo.java | 6 --
.../apache/doris/analysis/FunctionCallExpr.java | 17 -----
.../org/apache/doris/analysis/InlineViewRef.java | 2 +-
.../org/apache/doris/analysis/SlotDescriptor.java | 15 ++++
.../apache/doris/planner/SingleNodePlanner.java | 31 ++++++++
.../java/org/apache/doris/planner/SortNode.java | 11 ++-
.../analysis/CreateTableAsSelectStmtTest.java | 2 +-
.../data/query/aggregate/aggregate_count1.out | 4 ++
.../suites/query/aggregate/aggregate_count1.groovy | 82 ++++++++++++++++++++++
9 files changed, 142 insertions(+), 28 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
index cfba7226da..2e8681f2c6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
@@ -110,11 +110,6 @@ public final class AggregateInfo extends AggregateInfoBase
{
// if set, a subset of groupingExprs_; set and used during planning
private List<Expr> partitionExprs;
- // indices into aggregateExprs for those that need to be materialized;
- // shared between this, mergeAggInfo and secondPhaseDistinctAggInfo
- private ArrayList<Integer> materializedAggregateSlots =
Lists.newArrayList();
- // if true, this AggregateInfo is the first phase of a 2-phase DISTINCT
computation
- private boolean isDistinctAgg = false;
private boolean isUsingSetForDistinct;
// the multi distinct's begin pos and end pos in groupby exprs
@@ -363,7 +358,6 @@ public final class AggregateInfo extends AggregateInfoBase {
}
this.isUsingSetForDistinct =
estimateIfUsingSetForDistinct(distinctAggExprs);
- isDistinctAgg = true;
// add DISTINCT parameters to grouping exprs
if (!isUsingSetForDistinct) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index a2376f1d36..6bf620e9c4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -438,23 +438,6 @@ public class FunctionCallExpr extends Expr {
return fnParams.isDistinct();
}
- public boolean isCountStar() {
- if (fnName.getFunction().equalsIgnoreCase(FunctionSet.COUNT)) {
- if (fnParams.isStar()) {
- return true;
- } else if (fnParams.exprs() == null || fnParams.exprs().isEmpty())
{
- return true;
- } else {
- for (Expr expr : fnParams.exprs()) {
- if (expr.isConstant()) {
- return true;
- }
- }
- }
- }
- return false;
- }
-
public boolean isCountDistinctBitmapOrHLL() {
if (!fnParams.isDistinct()) {
return false;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
index 0fe2b691af..478f76998b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
@@ -206,7 +206,6 @@ public class InlineViewRef extends TableRef {
desc.setIsMaterialized(true);
materializedTupleIds.add(desc.getId());
}
-
// create sMap and baseTblSmap and register auxiliary eq predicates
between our
// tuple descriptor's slots and our *unresolved* select list exprs;
// we create these auxiliary predicates so that the analyzer can
compute the value
@@ -222,6 +221,7 @@ public class InlineViewRef extends TableRef {
String colName = getColLabels().get(i);
SlotDescriptor slotDesc =
analyzer.registerColumnRef(getAliasAsName(), colName);
Expr colExpr = queryStmt.getResultExprs().get(i);
+ slotDesc.setSourceExpr(colExpr);
SlotRef slotRef = new SlotRef(slotDesc);
sMap.put(slotRef, colExpr);
baseTblSmap.put(slotRef, queryStmt.getBaseTblResultExprs().get(i));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
index 8c405011ae..22eae97c1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
@@ -162,6 +162,21 @@ public class SlotDescriptor {
isMaterialized = value;
}
+ public void materializeSrcExpr() {
+ if (sourceExprs == null) {
+ return;
+ }
+ for (Expr expr : sourceExprs) {
+ if (!(expr instanceof SlotRef)) {
+ continue;
+ }
+ SlotRef slotRef = (SlotRef) expr;
+ SlotDescriptor slotDesc = slotRef.getDesc();
+ slotDesc.setIsMaterialized(true);
+ slotDesc.materializeSrcExpr();
+ }
+ }
+
public boolean getIsNullable() {
return isNullable;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index 85d9dc99ee..2e257d8884 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -1031,9 +1031,39 @@ public class SingleNodePlanner {
for (int i = 1; i < selectStmt.getTableRefs().size(); ++i) {
TableRef innerRef = selectStmt.getTableRefs().get(i);
+ // Optimization of CountStar would change the materialization
of some outputSlot,
+ // it would cause the materilization inconsistent of SlotRef
between outputTupleDesc
+ // and agg/groupBy expr. So if some not materialized slots in
outputTuple of aggInfo
+ // is set to materialized after the optimization of CountStar,
we need to call
+ // aggregateInfo.materializeRequiredSlots again to make sure
all required slots is
+ // materialized.
+ boolean aggOutputNotHaveMaterializedSlot = false;
+ AggregateInfo aggregateInfo = null;
+ TupleDescriptor output = null;
+ if (innerRef instanceof InlineViewRef) {
+ InlineViewRef inlineViewRef = (InlineViewRef) innerRef;
+ QueryStmt queryStmt = inlineViewRef.getViewStmt();
+ if (queryStmt instanceof SelectStmt) {
+ aggregateInfo = ((SelectStmt) queryStmt).getAggInfo();
+ if (aggregateInfo != null) {
+ output = aggregateInfo.getOutputTupleDesc();
+ aggOutputNotHaveMaterializedSlot =
+
output.getSlots().stream().noneMatch(SlotDescriptor::isMaterialized);
+ }
+ }
+ }
+
root = createJoinNode(analyzer, root, innerRef, selectStmt);
// Have the build side of a join copy data to a compact
representation
// in the tuple buffer.
+ if (aggOutputNotHaveMaterializedSlot
+ && aggregateInfo
+ .getOutputTupleDesc()
+ .getSlots()
+ .stream()
+ .anyMatch(SlotDescriptor::isMaterialized)) {
+ aggregateInfo.materializeRequiredSlots(analyzer, null);
+ }
root.getChildren().get(1).setCompactData(true);
root.assignConjuncts(analyzer);
}
@@ -2288,6 +2318,7 @@ public class SingleNodePlanner {
for (SlotId id : slotIds) {
final SlotDescriptor slot = analyzer.getDescTbl().getSlotDesc(id);
slot.setIsMaterialized(true);
+ slot.materializeSrcExpr();
}
for (TupleId id : tupleIds) {
final TupleDescriptor tuple =
analyzer.getDescTbl().getTupleDesc(id);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index 113f08760d..8395f749b2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -52,6 +52,8 @@ import java.util.Set;
/**
* Sorting.
+ * Attention, in some corner case, we need enable projection planner to
promise the correctness of the Plan.
+ * Please refer to this regression
test:regression-test/suites/query/aggregate/aggregate_count1.groovy.
*/
public class SortNode extends PlanNode {
private static final Logger LOG = LogManager.getLogger(SortNode.class);
@@ -211,9 +213,6 @@ public class SortNode extends PlanNode {
outputSmap = new ExprSubstitutionMap();
for (int i = 0; i < slotExprs.size(); ++i) {
- if (!sortTupleSlots.get(i).isMaterialized()) {
- continue;
- }
resolvedTupleExprs.add(slotExprs.get(i));
outputSmap.put(slotExprs.get(i), new
SlotRef(sortTupleSlots.get(i)));
}
@@ -274,6 +273,12 @@ public class SortNode extends PlanNode {
@Override
public Set<SlotId> computeInputSlotIds(Analyzer analyzer) throws
NotImplementedException {
+ List<SlotDescriptor> slotDescriptorList =
this.info.getSortTupleDescriptor().getSlots();
+ for (int i = 0; i < slotDescriptorList.size(); i++) {
+ if (!slotDescriptorList.get(i).isMaterialized()) {
+ resolvedTupleExprs.remove(i);
+ }
+ }
List<SlotId> result = Lists.newArrayList();
Expr.getIds(resolvedTupleExprs, null, result);
return new HashSet<>(result);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
index 44cde9ec1a..bb6dcc1497 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/analysis/CreateTableAsSelectStmtTest.java
@@ -237,7 +237,7 @@ public class CreateTableAsSelectStmtTest extends
TestWithFeService {
createTableAsSelect(selectFromCteAndUnion);
ShowResultSet showResultSet1 =
showCreateTableByName("select_cte_union");
Assertions.assertEquals(
- "CREATE TABLE `select_cte_union` (\n" + " `id` tinyint(4) NOT
NULL\n" + ") ENGINE=OLAP\n"
+ "CREATE TABLE `select_cte_union` (\n" + " `id` tinyint(4)
NULL\n" + ") ENGINE=OLAP\n"
+ "DUPLICATE KEY(`id`)\n" + "COMMENT 'OLAP'\n" +
"DISTRIBUTED BY HASH(`id`) BUCKETS 10\n"
+ "PROPERTIES (\n" + "\"replication_allocation\" =
\"tag.location.default: 1\",\n"
+ "\"in_memory\" = \"false\",\n" + "\"storage_format\"
= \"V2\"\n" + ")",
diff --git a/regression-test/data/query/aggregate/aggregate_count1.out
b/regression-test/data/query/aggregate/aggregate_count1.out
new file mode 100644
index 0000000000..84f6fe8329
--- /dev/null
+++ b/regression-test/data/query/aggregate/aggregate_count1.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+8
+
diff --git a/regression-test/suites/query/aggregate/aggregate_count1.groovy
b/regression-test/suites/query/aggregate/aggregate_count1.groovy
new file mode 100644
index 0000000000..eaf4efb62f
--- /dev/null
+++ b/regression-test/suites/query/aggregate/aggregate_count1.groovy
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("aggregate_count1", "query") {
+ sql "create table aggregate_count1 (\n" +
+ "\n" +
+ " name varchar(128) ,\n" +
+ "\n" +
+ " age INT ,\n" +
+ "\n" +
+ " identityCode varchar(128) ,\n" +
+ "\n" +
+ " cardNo String ,\n" +
+ "\n" +
+ " number String ,\n" +
+ "\n" +
+ " birthday DATETIME ,\n" +
+ "\n" +
+ " country String ,\n" +
+ "\n" +
+ " gender String ,\n" +
+ "\n" +
+ " covid BOOLEAN \n" +
+ "\n" +
+ ")UNIQUE KEY(name,age,`identityCode`)\n" +
+ "\n" +
+ "DISTRIBUTED BY HASH(`identityCode`) BUCKETS 10\n" +
+ "\n" +
+ "PROPERTIES(\"replication_num\" = \"1\");\n" +
+ "\n"
+ sql "insert into aggregate_count1 values
('张三0',11,'1234567','123','321312','1999-02-13','中国','男',false)," +
+
"('张三1',11,'12345678','123','321312','1999-02-13','中国','男',false)," +
+
"('张三2',11,'12345671','123','321312','1999-02-13','中国','男',false)," +
+
"('张三3',11,'12345673','123','321312','1999-02-13','中国','男',false)," +
+
"('张三4',11,'123456711','123','321312','1999-02-13','中国','男',false)," +
+
"('张三5',11,'1232134567','123','321312','1999-02-13','中国','男',false)," +
+
"('张三6',11,'124314567','123','321312','1999-02-13','中国','男',false)," +
+
"('张三7',11,'123445167','123','321312','1998-02-13','中国','男',false);"
+ qt_select "SELECT count(1) FROM (WITH t1 AS (\n" +
+ " WITH t AS (\n" +
+ " SELECT * FROM aggregate_count1\n" +
+ " )\n" +
+ " SELECT\n" +
+ " identityCode,\n" +
+ " COUNT(1) as dataAmount,\n" +
+ " ROUND(COUNT(1) / tableWithSum.sumResult,4) as
proportion,\n" +
+ " MD5(identityCode) as virtuleUniqKey\n" +
+ " FROM t,(SELECT COUNT(1) as sumResult from t)
tableWithSum\n" +
+ " GROUP BY identityCode ,tableWithSum.sumResult\n" +
+ ")\n" +
+ "SELECT\n" +
+ " identityCode,dataAmount,\n" +
+ " (\n" +
+ " CASE\n" +
+ " WHEN t1.virtuleUniqKey =
tableWithMaxId.max_virtuleUniqKey THEN\n" +
+ " ROUND(proportion + calcTheTail, 4)\n" +
+ " ELSE\n" +
+ " proportion\n" +
+ " END\n" +
+ " ) proportion\n" +
+ "FROM t1,\n" +
+ " (SELECT (1 - sum(t1.proportion)) as calcTheTail FROM t1 )
tableWithTail,\n" +
+ " (SELECT virtuleUniqKey as max_virtuleUniqKey FROM t1
ORDER BY proportion DESC LIMIT 1 ) tableWithMaxId\n" +
+ "ORDER BY identityCode) t_a76fe3e829ddb51;"
+ sql "drop table aggregate_count1"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]