This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 66fa1bef6d [refactor](Nereids): avoid useless groupByColStats Map
(#22000)
66fa1bef6d is described below
commit 66fa1bef6dec5a8b62416d1fa9d4bcecf2f98ebc
Author: jakevin <[email protected]>
AuthorDate: Mon Jul 24 12:13:52 2023 +0800
[refactor](Nereids): avoid useless groupByColStats Map (#22000)
---
.../doris/nereids/stats/StatsCalculator.java | 50 +++++++++++-----------
1 file changed, 24 insertions(+), 26 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index a7cf8648fe..2dda0cfb03 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -130,8 +130,8 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.AbstractMap.SimpleEntry;
+import java.util.ArrayList;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -689,38 +689,36 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
private double estimateGroupByRowCount(List<Expression>
groupByExpressions, Statistics childStats) {
double rowCount = 1;
- Map<Expression, ColumnStatistic> groupByColStats = new HashMap<>();
+ // if there is group-bys, output row count is childStats.getRowCount()
* DEFAULT_AGGREGATE_RATIO,
+ // o.w. output row count is 1
+ // example: select sum(A) from T;
+ if (groupByExpressions.isEmpty()) {
+ return 1;
+ }
+ List<Double> groupByNdvs = new ArrayList<>();
for (Expression groupByExpr : groupByExpressions) {
ColumnStatistic colStats =
childStats.findColumnStatistics(groupByExpr);
if (colStats == null) {
colStats = ExpressionEstimation.estimate(groupByExpr,
childStats);
}
- groupByColStats.put(groupByExpr, colStats);
- }
- int groupByCount = groupByExpressions.size();
- if
(groupByColStats.values().stream().anyMatch(ColumnStatistic::isUnKnown)) {
- // if there is group-bys, output row count is
childStats.getRowCount() * DEFAULT_AGGREGATE_RATIO,
- // o.w. output row count is 1
- // example: select sum(A) from T;
- if (groupByCount > 0) {
+ if (colStats.isUnKnown()) {
rowCount = childStats.getRowCount() * DEFAULT_AGGREGATE_RATIO;
- } else {
- rowCount = 1;
+ rowCount = Math.max(1, rowCount);
+ rowCount = Math.min(rowCount, childStats.getRowCount());
+ return rowCount;
}
- } else {
- if (groupByCount > 0) {
- List<Double> groupByNdvs = groupByColStats.values().stream()
- .map(colStats -> colStats.ndv)
-
.sorted(Comparator.reverseOrder()).collect(Collectors.toList());
- rowCount = groupByNdvs.get(0);
- for (int groupByIndex = 1; groupByIndex < groupByCount;
++groupByIndex) {
- rowCount *= Math.max(1, groupByNdvs.get(groupByIndex) *
Math.pow(
- AGGREGATE_COLUMN_CORRELATION_COEFFICIENT,
groupByIndex + 1D));
- if (rowCount > childStats.getRowCount()) {
- rowCount = childStats.getRowCount();
- break;
- }
- }
+ double ndv = colStats.ndv;
+ groupByNdvs.add(ndv);
+ }
+ groupByNdvs.sort(Collections.reverseOrder());
+
+ rowCount = groupByNdvs.get(0);
+ for (int groupByIndex = 1; groupByIndex < groupByExpressions.size();
++groupByIndex) {
+ rowCount *= Math.max(1, groupByNdvs.get(groupByIndex) * Math.pow(
+ AGGREGATE_COLUMN_CORRELATION_COEFFICIENT, groupByIndex +
1D));
+ if (rowCount > childStats.getRowCount()) {
+ rowCount = childStats.getRowCount();
+ break;
}
}
rowCount = Math.max(1, rowCount);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]