This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 80482c5fc4f [opt](nereids) clean count usage in ColumnStatistic during
stats deriving (#40654)
80482c5fc4f is described below
commit 80482c5fc4f0d4098fc8a7c06d1c1f5c07b73833
Author: xzj7019 <[email protected]>
AuthorDate: Thu Sep 19 20:56:45 2024 +0800
[opt](nereids) clean count usage in ColumnStatistic during stats deriving
(#40654)
## Proposed changes
Stats deriving refinement step 1: clean up count usage in
ColumnStatistic during stats deriving(mainly for stats-available), to
avoid serious stats deriving problem.
a. use Statistics rowCount instead of count in ColumnStatistic in stats
deriving, since these two infos may be inconsistent and lead to stats
deriving problem.
b. remove setCount interface to avoid using this count field during
deriving unexpectedly in the future.
c. refine notNullSelectivity computing and corresponding estimation.
Benchmark plan shape change:
- tpcds query74: no performance impact.
---------
Co-authored-by: zhongjian.xzj
<[email protected]>
---
.../doris/datasource/hive/HMSExternalTable.java | 3 +-
.../doris/nereids/stats/ExpressionEstimation.java | 19 +-
.../doris/nereids/stats/FilterEstimation.java | 40 ++---
.../doris/nereids/stats/StatsCalculator.java | 27 ++-
.../trees/expressions/functions/table/Numbers.java | 2 +-
.../org/apache/doris/statistics/ColStatsData.java | 3 +-
.../apache/doris/statistics/ColumnStatistic.java | 33 +---
.../doris/statistics/ColumnStatisticBuilder.java | 19 +-
.../doris/statistics/OlapScanStatsDerive.java | 3 -
.../doris/statistics/PartitionColumnStatistic.java | 11 +-
.../PartitionColumnStatisticBuilder.java | 4 +-
.../org/apache/doris/statistics/Statistics.java | 9 -
.../doris/statistics/StatisticsRepository.java | 3 +-
.../apache/doris/statistics/StatsDeriveResult.java | 21 ---
.../doris/statistics/util/StatisticsUtil.java | 32 ++--
.../doris/nereids/stats/FilterEstimationTest.java | 192 +++++++++------------
.../doris/nereids/stats/JoinEstimateTest.java | 15 +-
.../doris/nereids/stats/StatsCalculatorTest.java | 8 +-
.../doris/statistics/StatsDeriveResultTest.java | 58 -------
.../data/nereids_hint_tpcds_p0/shape/query74.out | 14 +-
.../shape/query74.out | 14 +-
.../noStatsRfPrune/query74.out | 14 +-
.../no_stats_shape/query74.out | 14 +-
.../tpcds_sf100/noStatsRfPrune/query74.out | 14 +-
.../tpcds_sf100/no_stats_shape/query74.out | 14 +-
.../new_shapes_p0/tpcds_sf1000/shape/query74.out | 14 +-
26 files changed, 220 insertions(+), 380 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 3968c61c3f3..1c1a28242f4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -603,9 +603,8 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
if (!parameters.containsKey(NUM_ROWS) ||
Long.parseLong(parameters.get(NUM_ROWS)) == 0) {
return Optional.empty();
}
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
long count = Long.parseLong(parameters.get(NUM_ROWS));
- columnStatisticBuilder.setCount(count);
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(count);
// The tableStats length is at most 1.
for (ColumnStatisticsObj tableStat : tableStats) {
if (!tableStat.isSetStatsData()) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
index 4068fc6b064..126e9041721 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
@@ -243,7 +243,7 @@ public class ExpressionEstimation extends
ExpressionVisitor<ColumnStatistic, Sta
.setMaxValue(literalVal)
.setMinValue(literalVal)
.setNdv(1)
- .setNumNulls(1)
+ .setNumNulls(literal.isNullLiteral() ? 1 : 0)
.setAvgSizeByte(1)
.setMinExpr(literal.toLegacyLiteral())
.setMaxExpr(literal.toLegacyLiteral())
@@ -274,13 +274,13 @@ public class ExpressionEstimation extends
ExpressionVisitor<ColumnStatistic, Sta
int exprResultTypeWidth = binaryArithmetic.getDataType().width();
double dataSize = exprResultTypeWidth * rowCount;
if (binaryArithmetic instanceof Add) {
- return new
ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
+ return new
ColumnStatisticBuilder().setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(leftMin + rightMin)
.setMaxValue(leftMax + rightMax)
.setMinExpr(null).setMaxExpr(null).build();
}
if (binaryArithmetic instanceof Subtract) {
- return new
ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
+ return new
ColumnStatisticBuilder().setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(leftMin - rightMax)
.setMaxValue(leftMax - rightMin).setMinExpr(null)
.setMaxExpr(null).build();
@@ -297,7 +297,7 @@ public class ExpressionEstimation extends
ExpressionVisitor<ColumnStatistic, Sta
Math.max(leftMin * rightMin, leftMin * rightMax),
leftMax * rightMin),
leftMax * rightMax);
- return new
ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
+ return new
ColumnStatisticBuilder().setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(min).setMaxValue(max)
.setMaxExpr(null).setMinExpr(null).build();
}
@@ -312,14 +312,14 @@ public class ExpressionEstimation extends
ExpressionVisitor<ColumnStatistic, Sta
Math.max(leftMin / noneZeroDivisor(rightMin),
leftMin / noneZeroDivisor(rightMax)),
leftMax / noneZeroDivisor(rightMin)),
leftMax / noneZeroDivisor(rightMax));
- return new
ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
+ return new
ColumnStatisticBuilder().setNdv(ndv).setAvgSizeByte(leftColStats.avgSizeByte)
.setNumNulls(numNulls).setDataSize(binaryArithmetic.getDataType().width()).setMinValue(min)
.setMaxValue(max).build();
}
if (binaryArithmetic instanceof Mod) {
double min = -Math.max(Math.abs(rightMin), Math.abs(rightMax));
double max = -min;
- return new ColumnStatisticBuilder().setCount(rowCount).setNdv(ndv)
+ return new ColumnStatisticBuilder().setNdv(ndv)
.setAvgSizeByte(exprResultTypeWidth)
.setDataSize(dataSize)
.setNumNulls(numNulls)
@@ -363,8 +363,7 @@ public class ExpressionEstimation extends
ExpressionVisitor<ColumnStatistic, Sta
public ColumnStatistic visitCount(Count count, Statistics context) {
double width = count.getDataType().width();
// for scalar agg, ndv and row count will be normalized by 1 in
StatsCalculator.computeAggregate()
- return new
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setCount(context.getRowCount())
- .setAvgSizeByte(width).build();
+ return new
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setAvgSizeByte(width).build();
}
// TODO: return a proper estimated stat after supports histogram
@@ -382,14 +381,14 @@ public class ExpressionEstimation extends
ExpressionVisitor<ColumnStatistic, Sta
@Override
public ColumnStatistic visitYear(Year year, Statistics context) {
ColumnStatistic childStat = year.child().accept(this, context);
+ double rowCount = context.getRowCount();
long minYear = 1970;
long maxYear = 2038;
return new ColumnStatisticBuilder()
- .setCount(childStat.count)
.setNdv(maxYear - minYear + 1)
.setAvgSizeByte(4)
.setNumNulls(childStat.numNulls)
- .setDataSize(4 * childStat.count)
+ .setDataSize(4 * rowCount)
.setMinValue(minYear)
.setMaxValue(maxYear).setMinExpr(null).build();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index a65a07fea30..b3576a0e58e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -323,6 +323,8 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
} else {
double ndv = statsForLeft.ndv;
+ double numNulls = statsForLeft.numNulls;
+ double rowCount = context.statistics.getRowCount();
if (statsForRight.isUnKnown) {
if (ndv >= 1.0) {
selectivity = 1.0 / ndv;
@@ -338,7 +340,7 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
} else {
selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
}
- selectivity = getNotNullSelectivity(statsForLeft, selectivity);
+ selectivity = getNotNullSelectivity(numNulls, rowCount, ndv,
selectivity);
}
}
Statistics equalStats = context.statistics.withSel(selectivity);
@@ -451,7 +453,8 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
compareExprStatsBuilder.setNumNulls(0);
Statistics estimated = new
StatisticsBuilder(context.statistics).build();
ColumnStatistic stats = compareExprStatsBuilder.build();
- selectivity = getNotNullSelectivity(stats, selectivity);
+ selectivity = getNotNullSelectivity(compareExprStats.numNulls,
estimated.getRowCount(),
+ compareExprStats.ndv, selectivity);
estimated = estimated.withSel(selectivity);
estimated.addColumnStats(compareExpr, stats);
context.addKeyIfSlot(compareExpr);
@@ -546,7 +549,7 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
outputRowCount = Math.max(outputRowCount, 1);
}
ColumnStatisticBuilder colBuilder = new
ColumnStatisticBuilder(childColStats);
- colBuilder.setCount(outputRowCount).setNumNulls(outputRowCount)
+ colBuilder.setNumNulls(outputRowCount)
.setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setNdv(0);
@@ -597,7 +600,6 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
.setMaxValue(Double.POSITIVE_INFINITY)
.setMaxExpr(null)
.setNdv(0)
- .setCount(0)
.setNumNulls(0);
} else {
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
@@ -615,9 +617,8 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
} else {
sel = Math.max(sel, RANGE_SELECTIVITY_THRESHOLD);
}
- sel = getNotNullSelectivity(leftStats, sel);
+ sel = getNotNullSelectivity(leftStats.numNulls,
context.statistics.getRowCount(), leftStats.ndv, sel);
updatedStatistics = context.statistics.withSel(sel);
-
leftColumnStatisticBuilder.setCount(updatedStatistics.getRowCount());
}
updatedStatistics.addColumnStats(leftExpr,
leftColumnStatisticBuilder.build());
context.addKeyIfSlot(leftExpr);
@@ -720,36 +721,27 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
@Override
public Statistics visitLike(Like like, EstimationContext context) {
StatisticsBuilder statsBuilder = new
StatisticsBuilder(context.statistics);
- statsBuilder.setRowCount(context.statistics.getRowCount() *
DEFAULT_LIKE_COMPARISON_SELECTIVITY);
+ double rowCount = context.statistics.getRowCount() *
DEFAULT_LIKE_COMPARISON_SELECTIVITY;
+ statsBuilder.setRowCount(rowCount);
if (like.left() instanceof Slot) {
ColumnStatistic origin =
context.statistics.findColumnStatistics(like.left());
Preconditions.checkArgument(origin != null,
"col stats not found. slot=%s in %s",
like.left().toSql(), like.toSql());
ColumnStatisticBuilder colBuilder = new
ColumnStatisticBuilder(origin);
- double selectivity =
StatsMathUtil.divide(DEFAULT_LIKE_COMPARISON_SELECTIVITY, origin.ndv);
- double notNullSel = getNotNullSelectivity(origin, selectivity);
- colBuilder.setNdv(origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY)
- .setCount(notNullSel *
context.statistics.getRowCount()).setNumNulls(0);
+ colBuilder.setNdv(origin.ndv *
DEFAULT_LIKE_COMPARISON_SELECTIVITY).setNumNulls(0);
statsBuilder.putColumnStatistics(like.left(), colBuilder.build());
context.addKeyIfSlot(like.left());
}
return statsBuilder.build();
}
- private double getNotNullSelectivity(ColumnStatistic stats, double
origSel) {
- double rowCount = stats.count;
- double numNulls = stats.numNulls;
-
- // comment following check since current rowCount and ndv may be
inconsistant
- // e.g, rowCount has been reduced by one filter but another filter
column's
- // ndv and numNull remains originally, which will unexpectedly go into
the following
- // normalization.
-
- //if (numNulls > rowCount - ndv) {
- // numNulls = rowCount - ndv > 0 ? rowCount - ndv : 0;
- //}
- double notNullSel = rowCount <= 1.0 ? 1.0 : 1 -
Statistics.getValidSelectivity(numNulls / rowCount);
+ private double getNotNullSelectivity(double origNumNulls, double
origRowCount, double origNdv, double origSel) {
+ if (origNumNulls > origRowCount - origNdv) {
+ origNumNulls = origRowCount - origNdv > 0 ? origRowCount - origNdv
: 0;
+ }
+ double notNullSel = origRowCount <= 1.0 ? 1.0 : 1 - Statistics
+ .getValidSelectivity(origNumNulls / origRowCount);
double validSel = origSel * notNullSel;
return Statistics.getValidSelectivity(validSel);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 5946192a27e..346d1ae029f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -403,8 +403,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
for (Slot slot : ((Relation) olapScan).getOutput()) {
if (derivedStats.findColumnStatistics(slot) == null) {
derivedStats.addColumnStats(slot,
- new
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN)
-
.setCount(derivedRowCount).build());
+ new
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN, derivedRowCount).build());
}
}
return derivedStats;
@@ -431,7 +430,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
// get row count from any visible slotReference's colStats
for (Slot slot : ((Plan) olapScan).getOutput()) {
builder.putColumnStatistics(slot,
- new
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setCount(tableRowCount).build());
+ new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN,
tableRowCount).build());
}
setHasUnknownColStatsInStatementContext();
return builder.setRowCount(tableRowCount).build();
@@ -463,8 +462,8 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
});
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache =
getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames);
- ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache);
- colStatsBuilder.setCount(selectedPartitionsRowCount);
+ ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache,
+ selectedPartitionsRowCount);
colStatsBuilder.normalizeAvgSizeByte(slot);
builder.putColumnStatistics(slot, colStatsBuilder.build());
}
@@ -478,8 +477,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
// get table level stats
for (SlotReference slot : visibleOutputSlots) {
ColumnStatistic cache =
getColumnStatsFromTableCache((CatalogRelation) olapScan, slot);
- ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache);
- colStatsBuilder.setCount(tableRowCount);
+ ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache, tableRowCount);
colStatsBuilder.normalizeAvgSizeByte(slot);
builder.putColumnStatistics(slot, colStatsBuilder.build());
}
@@ -1062,8 +1060,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
} else {
cache = getColumnStatsFromTableCache(catalogRelation, slot);
}
- ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache);
- colStatsBuilder.setCount(tableRowCount);
+ ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder(cache, tableRowCount);
builder.putColumnStatistics(slot, colStatsBuilder.build());
}
checkIfUnknownStatsUsedAsKey(builder);
@@ -1187,7 +1184,6 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
ColumnStatistic stats = kv.getValue();
ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(stats);
columnStatisticBuilder
- .setCount(stats.count < 0 ? stats.count :
stats.count * groupingSetNum)
.setNumNulls(stats.numNulls < 0 ? stats.numNulls :
stats.numNulls * groupingSetNum)
.setDataSize(stats.dataSize < 0 ? stats.dataSize :
stats.dataSize * groupingSetNum);
return Pair.of(kv.getKey(),
columnStatisticBuilder.build());
@@ -1322,12 +1318,11 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
double count = stats.getRowCount() *
generate.getGeneratorOutput().size() * statsFactor;
Map<Expression, ColumnStatistic> columnStatsMap = Maps.newHashMap();
for (Map.Entry<Expression, ColumnStatistic> entry :
stats.columnStatistics().entrySet()) {
- ColumnStatistic columnStatistic = new
ColumnStatisticBuilder(entry.getValue()).setCount(count).build();
+ ColumnStatistic columnStatistic = new
ColumnStatisticBuilder(entry.getValue()).build();
columnStatsMap.put(entry.getKey(), columnStatistic);
}
for (Slot output : generate.getGeneratorOutput()) {
ColumnStatistic columnStatistic = new ColumnStatisticBuilder()
- .setCount(count)
.setMinValue(Double.NEGATIVE_INFINITY)
.setMaxValue(Double.POSITIVE_INFINITY)
.setNdv(count)
@@ -1349,8 +1344,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
"need WindowExpression, but we meet " + expr);
WindowExpression windExpr = (WindowExpression)
expr.child(0);
ColumnStatisticBuilder colStatsBuilder = new
ColumnStatisticBuilder();
- colStatsBuilder.setCount(childStats.getRowCount())
- .setOriginal(null);
+ colStatsBuilder.setOriginal(null);
Double partitionCount =
windExpr.getPartitionKeys().stream().map(key -> {
ColumnStatistic keyStats =
childStats.findColumnStatistics(key);
@@ -1365,8 +1359,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
if (partitionCount == -1.0) {
// partition key stats are all unknown
- colStatsBuilder.setCount(childStats.getRowCount())
- .setNdv(1)
+ colStatsBuilder.setNdv(1)
.setMinValue(Double.NEGATIVE_INFINITY)
.setMaxValue(Double.POSITIVE_INFINITY);
} else {
@@ -1411,7 +1404,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
private ColumnStatistic unionColumn(ColumnStatistic leftStats, double
leftRowCount, ColumnStatistic rightStats,
double rightRowCount, DataType dataType) {
if (leftStats.isUnKnown() || rightStats.isUnKnown()) {
- return new ColumnStatisticBuilder(leftStats).setCount(leftRowCount
+ rightRowCount).build();
+ return new ColumnStatisticBuilder(leftStats).build();
}
ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
columnStatisticBuilder.setMaxValue(Math.max(leftStats.maxValue,
rightStats.maxValue));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java
index 845baa045cc..26027d10499 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java
@@ -70,7 +70,7 @@ public class Numbers extends TableValuedFunction {
Map<Expression, ColumnStatistic> columnToStatistics =
Maps.newHashMap();
ColumnStatisticBuilder statBuilder = new ColumnStatisticBuilder()
-
.setCount(rowNum).setAvgSizeByte(8).setNumNulls(0).setDataSize(8);
+ .setAvgSizeByte(8).setNumNulls(0).setDataSize(8);
if (numberTvf.getUseConst()) { // a column of const value
long value = numberTvf.getConstValue();
statBuilder =
statBuilder.setNdv(1).setMinValue(value).setMaxValue(value)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index 7cf75462fee..25a825cbdaa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -137,8 +137,7 @@ public class ColStatsData {
return ColumnStatistic.UNKNOWN;
}
try {
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
- columnStatisticBuilder.setCount(count);
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(count);
columnStatisticBuilder.setNdv(ndv);
columnStatisticBuilder.setNumNulls(nullCount);
columnStatisticBuilder.setDataSize(dataSizeInBytes);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 9713d2d30e1..3edc14577d9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -46,20 +46,18 @@ public class ColumnStatistic {
private static final Logger LOG =
LogManager.getLogger(ColumnStatistic.class);
- public static ColumnStatistic UNKNOWN = new
ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
-
.setNumNulls(1).setCount(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY)
+ public static ColumnStatistic UNKNOWN = new
ColumnStatisticBuilder(1).setAvgSizeByte(1).setNdv(1)
+
.setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY).setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true).setUpdatedTime("")
.build();
- public static ColumnStatistic ZERO = new
ColumnStatisticBuilder().setAvgSizeByte(0).setNdv(0)
-
.setNumNulls(0).setCount(0).setMaxValue(Double.NaN).setMinValue(Double.NaN)
- .build();
-
public static final Set<Type> UNSUPPORTED_TYPE = Sets.newHashSet(
Type.HLL, Type.BITMAP, Type.ARRAY, Type.STRUCT, Type.MAP,
Type.QUANTILE_STATE, Type.JSONB,
Type.VARIANT, Type.TIME, Type.TIMEV2, Type.LAMBDA_FUNCTION
);
+ // ATTENTION: Stats deriving WILL NOT use 'count' field any longer.
+ // Use 'rowCount' field in Statistics if needed.
@SerializedName("count")
public final double count;
@SerializedName("ndv")
@@ -122,9 +120,8 @@ public class ColumnStatistic {
// TODO: use thrift
public static ColumnStatistic fromResultRow(ResultRow row) {
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
double count = Double.parseDouble(row.get(7));
- columnStatisticBuilder.setCount(count);
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(count);
double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
columnStatisticBuilder.setNdv(ndv);
String nullCount = row.getWithDefault(9, "0");
@@ -190,26 +187,6 @@ public class ColumnStatistic {
return rowCount * ALMOST_UNIQUE_FACTOR < ndv;
}
- public ColumnStatistic updateByLimit(long limit, double rowCount) {
- double ratio = 0;
- if (rowCount != 0) {
- ratio = limit / rowCount;
- }
- double newNdv = Math.ceil(Math.min(ndv, limit));
- return new ColumnStatisticBuilder()
- .setCount(Math.ceil(limit))
- .setNdv(newNdv)
- .setAvgSizeByte(Math.ceil(avgSizeByte))
- .setNumNulls(Math.ceil(numNulls * ratio))
- .setDataSize(Math.ceil(dataSize * ratio))
- .setMinValue(minValue)
- .setMaxValue(maxValue)
- .setMinExpr(minExpr)
- .setMaxExpr(maxExpr)
- .setIsUnknown(isUnKnown)
- .build();
- }
-
public boolean hasIntersect(ColumnStatistic other) {
return Math.max(this.minValue, other.minValue) <=
Math.min(this.maxValue, other.maxValue);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
index 4c8df0bf677..47002355de9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
@@ -56,9 +56,24 @@ public class ColumnStatisticBuilder {
this.updatedTime = columnStatistic.updatedTime;
}
- public ColumnStatisticBuilder setCount(double count) {
+ // ATTENTION: DON'T USE FOLLOWING TWO DURING STATS DERIVING EXCEPT FOR
INITIALIZATION
+ public ColumnStatisticBuilder(double count) {
this.count = count;
- return this;
+ }
+
+ public ColumnStatisticBuilder(ColumnStatistic columnStatistic, double
count) {
+ this.count = count;
+ this.ndv = columnStatistic.ndv;
+ this.avgSizeByte = columnStatistic.avgSizeByte;
+ this.numNulls = columnStatistic.numNulls;
+ this.dataSize = columnStatistic.dataSize;
+ this.minValue = columnStatistic.minValue;
+ this.maxValue = columnStatistic.maxValue;
+ this.minExpr = columnStatistic.minExpr;
+ this.maxExpr = columnStatistic.maxExpr;
+ this.isUnknown = columnStatistic.isUnKnown;
+ this.original = columnStatistic.original;
+ this.updatedTime = columnStatistic.updatedTime;
}
public ColumnStatisticBuilder setNdv(double ndv) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
index 7ac4b95d484..753167fb442 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
@@ -67,9 +67,6 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
table.getDatabase().getCatalog().getId(),
table.getDatabase().getId(), table.getId(), -1,
colName);
- if (!statistic.isUnKnown) {
- rowCount = statistic.count;
- }
columnStatisticMap.put(entry.getKey(), statistic);
}
return new StatsDeriveResult(rowCount, columnStatisticMap);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
index eebe910d8b0..7222dc88258 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
@@ -38,16 +38,12 @@ public class PartitionColumnStatistic {
private static final Logger LOG =
LogManager.getLogger(PartitionColumnStatistic.class);
- public static PartitionColumnStatistic UNKNOWN = new
PartitionColumnStatisticBuilder().setAvgSizeByte(1)
- .setNdv(new
Hll128()).setNumNulls(1).setCount(1).setMaxValue(Double.POSITIVE_INFINITY)
+ public static PartitionColumnStatistic UNKNOWN = new
PartitionColumnStatisticBuilder(1).setAvgSizeByte(1)
+ .setNdv(new
Hll128()).setNumNulls(1).setMaxValue(Double.POSITIVE_INFINITY)
.setMinValue(Double.NEGATIVE_INFINITY)
.setIsUnknown(true).setUpdatedTime("")
.build();
- public static PartitionColumnStatistic ZERO = new
PartitionColumnStatisticBuilder().setAvgSizeByte(0)
- .setNdv(new
Hll128()).setNumNulls(0).setCount(0).setMaxValue(Double.NaN).setMinValue(Double.NaN)
- .build();
-
public final double count;
public final Hll128 ndv;
public final double numNulls;
@@ -109,9 +105,8 @@ public class PartitionColumnStatistic {
return PartitionColumnStatistic.UNKNOWN;
}
- PartitionColumnStatisticBuilder partitionStatisticBuilder = new
PartitionColumnStatisticBuilder();
double count = Double.parseDouble(row.get(6));
- partitionStatisticBuilder.setCount(count);
+ PartitionColumnStatisticBuilder partitionStatisticBuilder = new
PartitionColumnStatisticBuilder(count);
String ndv = row.get(7);
Base64.Decoder decoder = Base64.getDecoder();
DataInputStream dis = new DataInputStream(new
ByteArrayInputStream(decoder.decode(ndv)));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
index fe26396f212..b1dc7cdd001 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticBuilder.java
@@ -50,9 +50,9 @@ public class PartitionColumnStatisticBuilder {
this.updatedTime = statistic.updatedTime;
}
- public PartitionColumnStatisticBuilder setCount(double count) {
+ // ATTENTION: DON'T USE FOLLOWING TWO DURING STATS DERIVING EXCEPT FOR
INITIALIZATION
+ public PartitionColumnStatisticBuilder(double count) {
this.count = count;
- return this;
}
public PartitionColumnStatisticBuilder setNdv(Hll128 ndv) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 6883eb0b542..e18dc097920 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -110,7 +110,6 @@ public class Statistics {
ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(columnStatistic);
columnStatisticBuilder.setNdv(ndv);
columnStatisticBuilder.setNumNulls(Math.min(columnStatistic.numNulls, rowCount
- ndv));
- columnStatisticBuilder.setCount(rowCount);
columnStatistic = columnStatisticBuilder.build();
expressionToColumnStats.put(entry.getKey(), columnStatistic);
}
@@ -228,14 +227,6 @@ public class Statistics {
return 1;
}
- public static Statistics zero(Statistics statistics) {
- Statistics zero = new Statistics(0, new HashMap<>());
- for (Map.Entry<Expression, ColumnStatistic> entry :
statistics.expressionToColumnStats.entrySet()) {
- zero.addColumnStats(entry.getKey(), ColumnStatistic.ZERO);
- }
- return zero;
- }
-
public static double getValidSelectivity(double nullSel) {
return nullSel < 0 ? 0 : (nullSel > 1 ? 1 : nullSel);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 0d9fa367462..ba23ab84dc7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -326,10 +326,9 @@ public class StatisticsRepository {
if (rowCount == null) {
throw new RuntimeException("Row count is null.");
}
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
+ ColumnStatisticBuilder builder = new
ColumnStatisticBuilder(Double.parseDouble(rowCount));
String colName = alterColumnStatsStmt.getColumnName();
Column column = objects.table.getColumn(colName);
- builder.setCount(Double.parseDouble(rowCount));
if (ndv != null) {
double dNdv = Double.parseDouble(ndv);
builder.setNdv(dNdv);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
index 8c301f911be..977518d47ed 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
@@ -20,8 +20,6 @@ package org.apache.doris.statistics;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.trees.expressions.Slot;
-import com.google.common.base.Preconditions;
-
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -125,25 +123,6 @@ public class StatsDeriveResult {
return statsDeriveResult;
}
- public StatsDeriveResult updateByLimit(long limit) {
- Preconditions.checkArgument(limit >= 0);
- limit = Math.min(limit, (long) rowCount);
- StatsDeriveResult statsDeriveResult = new StatsDeriveResult(limit,
width, penalty);
- for (Entry<Id, ColumnStatistic> entry :
slotIdToColumnStats.entrySet()) {
- statsDeriveResult.addColumnStats(entry.getKey(),
entry.getValue().updateByLimit(limit, rowCount));
- }
- // When the table is first created, rowCount is empty.
- // This leads to NPE if there is SetOperation outside the limit.
- // Therefore, when rowCount is empty, slotIdToColumnStats is also
imported,
- // but the possible problem is that the first query statistics are not
derived accurately.
- if (statsDeriveResult.slotIdToColumnStats.isEmpty()) {
- for (Entry<Id, ColumnStatistic> entry :
slotIdToColumnStats.entrySet()) {
- statsDeriveResult.addColumnStats(entry.getKey(),
entry.getValue());
- }
- }
- return statsDeriveResult;
- }
-
public StatsDeriveResult copy() {
return new StatsDeriveResult(this);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 61e92ee3f3c..405b1882e74 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -83,7 +83,6 @@ import com.google.common.base.Preconditions;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
-import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.TableScan;
@@ -656,20 +655,25 @@ public class StatisticsUtil {
*/
public static Optional<ColumnStatistic> getIcebergColumnStats(String
colName, org.apache.iceberg.Table table) {
TableScan tableScan = table.newScan().includeColumnStats();
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder();
- columnStatisticBuilder.setCount(0);
- columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
- columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
- columnStatisticBuilder.setDataSize(0);
- columnStatisticBuilder.setAvgSizeByte(0);
- columnStatisticBuilder.setNumNulls(0);
+ double totalDataSize = 0;
+ double totalDataCount = 0;
+ double totalNumNull = 0;
try (CloseableIterable<FileScanTask> fileScanTasks =
tableScan.planFiles()) {
for (FileScanTask task : fileScanTasks) {
- processDataFile(task.file(), task.spec(), colName,
columnStatisticBuilder);
+ int colId = getColId(task.spec(), colName);
+ totalDataSize += task.file().columnSizes().get(colId);
+ totalDataCount += task.file().recordCount();
+ totalNumNull += task.file().nullValueCounts().get(colId);
}
} catch (IOException e) {
LOG.warn("Error to close FileScanTask.", e);
}
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(totalDataCount);
+ columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
+ columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+ columnStatisticBuilder.setDataSize(totalDataSize);
+ columnStatisticBuilder.setAvgSizeByte(0);
+ columnStatisticBuilder.setNumNulls(totalNumNull);
if (columnStatisticBuilder.getCount() > 0) {
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getDataSize()
/ columnStatisticBuilder.getCount());
@@ -677,8 +681,7 @@ public class StatisticsUtil {
return Optional.of(columnStatisticBuilder.build());
}
- private static void processDataFile(DataFile dataFile, PartitionSpec
partitionSpec,
- String colName, ColumnStatisticBuilder columnStatisticBuilder) {
+ private static int getColId(PartitionSpec partitionSpec, String colName) {
int colId = -1;
for (Types.NestedField column : partitionSpec.schema().columns()) {
if (column.name().equals(colName)) {
@@ -689,12 +692,7 @@ public class StatisticsUtil {
if (colId == -1) {
throw new RuntimeException(String.format("Column %s not exist.",
colName));
}
- // Update the data size, count and num of nulls in
columnStatisticBuilder.
- // TODO: Get min max value.
-
columnStatisticBuilder.setDataSize(columnStatisticBuilder.getDataSize() +
dataFile.columnSizes().get(colId));
- columnStatisticBuilder.setCount(columnStatisticBuilder.getCount() +
dataFile.recordCount());
- columnStatisticBuilder.setNumNulls(columnStatisticBuilder.getNumNulls()
- + dataFile.nullValueCounts().get(colId));
+ return colId;
}
public static boolean isUnsupportedType(Type type) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 6e76c3f6a33..15f2f4d7e9c 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -70,10 +70,10 @@ class FilterEstimationTest {
LessThan lessThan = new LessThan(b, int100);
Or or = new Or(greaterThan1, lessThan);
Map<Expression, ColumnStatistic> columnStat = new HashMap<>();
- ColumnStatistic aStats = new
ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4)
+ ColumnStatistic aStats = new
ColumnStatisticBuilder(500).setNdv(500).setAvgSizeByte(4)
.setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
- ColumnStatistic bStats = new
ColumnStatisticBuilder().setCount(500).setNdv(500).setAvgSizeByte(4)
+ ColumnStatistic bStats = new
ColumnStatisticBuilder(500).setNdv(500).setAvgSizeByte(4)
.setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build();
columnStat.put(a, aStats);
@@ -99,10 +99,10 @@ class FilterEstimationTest {
LessThan lessThan = new LessThan(b, int100);
And and = new And(greaterThan1, lessThan);
Map<Expression, ColumnStatistic> columnStat = new HashMap<>();
- ColumnStatistic aStats = new
ColumnStatisticBuilder().setCount(500).setNdv(500)
+ ColumnStatistic aStats = new ColumnStatisticBuilder(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
- ColumnStatistic bStats = new
ColumnStatisticBuilder().setCount(500).setNdv(500)
+ ColumnStatistic bStats = new ColumnStatisticBuilder(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).setIsUnknown(true).build();
columnStat.put(a, aStats);
@@ -165,7 +165,7 @@ class FilterEstimationTest {
LessThan le = new LessThan(a, int200);
And and = new And(ge, le);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
- ColumnStatistic aStats = new
ColumnStatisticBuilder().setCount(300).setNdv(30)
+ ColumnStatistic aStats = new ColumnStatisticBuilder(300).setNdv(30)
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(300).build();
slotToColumnStat.put(a, aStats);
@@ -184,8 +184,7 @@ class FilterEstimationTest {
SlotReference ym = new SlotReference("a", new VarcharType(7));
double rowCount = 404962.0;
double ndv = 14.0;
- ColumnStatistic ymStats = new ColumnStatisticBuilder()
- .setCount(rowCount)
+ ColumnStatistic ymStats = new ColumnStatisticBuilder(rowCount)
.setNdv(ndv)
.setMinExpr(new StringLiteral("2023-07"))
.setMinValue(14126741000630328.000000)
@@ -211,8 +210,7 @@ class FilterEstimationTest {
SlotReference ym = new SlotReference("a", new VarcharType(7));
double rowCount = 404962.0;
double ndv = 0.5;
- ColumnStatistic ymStats = new ColumnStatisticBuilder()
- .setCount(rowCount)
+ ColumnStatistic ymStats = new ColumnStatisticBuilder(rowCount)
.setNdv(ndv)
.setMinExpr(new StringLiteral("2023-07"))
.setMinValue(14126741000630328.000000)
@@ -267,13 +265,13 @@ class FilterEstimationTest {
And and = new And(greaterThan1, lessThan);
Or or = new Or(and, equalTo);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
- ColumnStatistic aStats = new
ColumnStatisticBuilder().setCount(500).setNdv(500)
+ ColumnStatistic aStats = new ColumnStatisticBuilder(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
- ColumnStatistic bStats = new
ColumnStatisticBuilder().setCount(500).setNdv(500)
+ ColumnStatistic bStats = new ColumnStatisticBuilder(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
- ColumnStatistic cStats = new
ColumnStatisticBuilder().setCount(500).setNdv(500)
+ ColumnStatistic cStats = new ColumnStatisticBuilder(500).setNdv(500)
.setAvgSizeByte(4).setNumNulls(0).setDataSize(0)
.setMinValue(0).setMaxValue(1000).setMinExpr(null).build();
slotToColumnStat.put(a, aStats);
@@ -312,7 +310,7 @@ class FilterEstimationTest {
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(or, stat);
Assertions.assertTrue(
- Precision.equals(512.5,
+ Precision.equals(503.12,
expected.getRowCount(), 0.01));
}
@@ -334,7 +332,7 @@ class FilterEstimationTest {
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(ge, stat);
- Assertions.assertEquals(1000 * 1.0 / 500, expected.getRowCount());
+ Assertions.assertEquals(1000 * (500.0 / 1000) * (1.0 / 500),
expected.getRowCount());
}
// a <= 500
@@ -355,7 +353,7 @@ class FilterEstimationTest {
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(le, stat);
- Assertions.assertEquals(1000 * 1.0 / 500, expected.getRowCount());
+ Assertions.assertEquals(1000 * (500.0 / 1000) * (1.0 / 500),
expected.getRowCount());
}
// a < 500
@@ -376,7 +374,7 @@ class FilterEstimationTest {
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(less, stat);
- Assertions.assertEquals(2, expected.getRowCount());
+ Assertions.assertEquals(1, expected.getRowCount());
}
// a > 1000
@@ -397,7 +395,7 @@ class FilterEstimationTest {
Statistics stat = new Statistics(1000, slotToColumnStat);
FilterEstimation filterEstimation = new FilterEstimation();
Statistics expected = filterEstimation.estimate(ge, stat);
- Assertions.assertEquals(2, expected.getRowCount());
+ Assertions.assertEquals(1, expected.getRowCount());
}
// a > b
@@ -659,27 +657,24 @@ class FilterEstimationTest {
IntegerLiteral i300 = new IntegerLiteral(300);
GreaterThan ge = new GreaterThan(c, i300);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
- ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builderA = new ColumnStatisticBuilder(1000)
.setNdv(1000)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(1000)
- .setMaxValue(10000)
- .setCount(1000);
- ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
+ .setMaxValue(10000);
+ ColumnStatisticBuilder builderB = new ColumnStatisticBuilder(1000)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(0)
- .setMaxValue(500)
- .setCount(1000);
- ColumnStatisticBuilder builderC = new ColumnStatisticBuilder()
+ .setMaxValue(500);
+ ColumnStatisticBuilder builderC = new ColumnStatisticBuilder(1000)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(0)
- .setMaxValue(200)
- .setCount(1000);
+ .setMaxValue(200);
slotToColumnStat.put(a, builderA.build());
slotToColumnStat.put(b, builderB.build());
slotToColumnStat.put(c, builderC.build());
@@ -799,22 +794,19 @@ class FilterEstimationTest {
IntegerLiteral i200 = new IntegerLiteral(200);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
- ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builderA = new ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(0)
- .setMaxValue(100)
- .setCount(100);
- ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
- .setCount(100)
+ .setMaxValue(100);
+ ColumnStatisticBuilder builderB = new ColumnStatisticBuilder(100)
.setNdv(20)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(0)
.setMaxValue(500);
- ColumnStatisticBuilder builderC = new ColumnStatisticBuilder()
- .setCount(100)
+ ColumnStatisticBuilder builderC = new ColumnStatisticBuilder(100)
.setNdv(40)
.setAvgSizeByte(4)
.setNumNulls(0)
@@ -868,22 +860,19 @@ class FilterEstimationTest {
IntegerLiteral i10 = new IntegerLiteral(10);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
- ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builderA = new ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(0)
- .setMaxValue(100)
- .setCount(100);
- ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
- .setCount(100)
+ .setMaxValue(100);
+ ColumnStatisticBuilder builderB = new ColumnStatisticBuilder(100)
.setNdv(20)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMinValue(0)
.setMaxValue(500);
- ColumnStatisticBuilder builderC = new ColumnStatisticBuilder()
- .setCount(100)
+ ColumnStatisticBuilder builderC = new ColumnStatisticBuilder(100)
.setNdv(40)
.setAvgSizeByte(4)
.setNumNulls(0)
@@ -914,15 +903,14 @@ class FilterEstimationTest {
@Test
public void testBetweenCastFilter() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMaxExpr(new IntLiteral(100))
.setMaxValue(100)
.setMinExpr(new IntLiteral(0))
- .setMinValue(0)
- .setCount(100);
+ .setMinValue(0);
DoubleLiteral begin = new DoubleLiteral(40.0);
DoubleLiteral end = new DoubleLiteral(50.0);
LessThan less = new LessThan(new Cast(a, DoubleType.INSTANCE), end);
@@ -943,13 +931,12 @@ class FilterEstimationTest {
DateLiteral from = new DateLiteral("1990-01-01");
DateLiteral to = new DateLiteral("2000-01-01");
SlotReference a = new SlotReference("a", DateType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMaxValue(to.getDouble())
- .setMinValue(from.getDouble())
- .setCount(100);
+ .setMinValue(from.getDouble());
DateLiteral mid = new DateLiteral("1999-01-01");
GreaterThan greaterThan = new GreaterThan(a, mid);
Statistics stats = new Statistics(100, new HashMap<>());
@@ -962,13 +949,12 @@ class FilterEstimationTest {
@Test
public void testIsNull() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(10)
.setMaxValue(100)
- .setMinValue(0)
- .setCount(100);
+ .setMinValue(0);
IsNull isNull = new IsNull(a);
Statistics stats = new Statistics(100, new HashMap<>());
stats.addColumnStats(a, builder.build());
@@ -980,13 +966,12 @@ class FilterEstimationTest {
@Test
public void testIsNotNull() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(10)
.setMaxValue(100)
- .setMinValue(0)
- .setCount(100);
+ .setMinValue(0);
IsNull isNull = new IsNull(a);
Not not = new Not(isNull);
Statistics stats = new Statistics(100, new HashMap<>());
@@ -1002,13 +987,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsEqualTo() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
EqualTo equalTo = new EqualTo(a, int1);
Statistics stats = new Statistics(10, new HashMap<>());
@@ -1024,13 +1008,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsComparable() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
GreaterThan greaterThan = new GreaterThan(a, int1);
Statistics stats = new Statistics(10, new HashMap<>());
@@ -1046,13 +1029,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsIn() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
IntegerLiteral int2 = new IntegerLiteral(2);
InPredicate in = new InPredicate(a, Lists.newArrayList(int1, int2));
@@ -1060,7 +1042,7 @@ class FilterEstimationTest {
stats.addColumnStats(a, builder.build());
FilterEstimation filterEstimation = new FilterEstimation();
Statistics result = filterEstimation.estimate(in, stats);
- Assertions.assertEquals(result.getRowCount(), 10.0, 0.01);
+ Assertions.assertEquals(result.getRowCount(), 2.0, 0.01);
}
/**
@@ -1069,13 +1051,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsNotEqualTo() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
EqualTo equalTo = new EqualTo(a, int1);
Not not = new Not(equalTo);
@@ -1092,13 +1073,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsNotIn() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
IntegerLiteral int2 = new IntegerLiteral(2);
InPredicate in = new InPredicate(a, Lists.newArrayList(int1, int2));
@@ -1116,13 +1096,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsAnd() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
IntegerLiteral int2 = new IntegerLiteral(2);
GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int1);
@@ -1141,23 +1120,21 @@ class FilterEstimationTest {
@Test
public void testNumNullsAndTwoCol() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builderA = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builderA = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
EqualTo equalTo = new EqualTo(a, int1);
SlotReference b = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builderB = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
Not isNotNull = new Not(new IsNull(b));
And and = new And(equalTo, isNotNull);
Statistics stats = new Statistics(10, new HashMap<>());
@@ -1174,13 +1151,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsOr() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
IntegerLiteral int2 = new IntegerLiteral(2);
GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int2);
@@ -1199,13 +1175,12 @@ class FilterEstimationTest {
@Test
public void testNumNullsOrIsNull() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
+ ColumnStatisticBuilder builder = new ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
IntegerLiteral int1 = new IntegerLiteral(1);
GreaterThanEqual greaterThanEqual = new GreaterThanEqual(a, int1);
IsNull isNull = new IsNull(a);
@@ -1219,23 +1194,22 @@ class FilterEstimationTest {
@Test
public void testNullSafeEqual() {
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(10)
.setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(8)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
+ .setMinValue(1);
ColumnStatistic aStats = columnStatisticBuilder.build();
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
- columnStatisticBuilder.setNdv(2)
+ ColumnStatisticBuilder columnStatisticBuilder2 = new
ColumnStatisticBuilder(10)
+ .setNdv(2)
.setAvgSizeByte(4)
.setNumNulls(7)
.setMaxValue(2)
- .setMinValue(1)
- .setCount(10);
- ColumnStatistic bStats = columnStatisticBuilder.build();
+ .setMinValue(1);
+ ColumnStatistic bStats = columnStatisticBuilder2.build();
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
StatisticsBuilder statsBuilder = new StatisticsBuilder();
@@ -1258,15 +1232,14 @@ class FilterEstimationTest {
@Test
public void testStringRangeColToLiteral() {
SlotReference a = new SlotReference("a", new VarcharType(25));
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(25)
.setNumNulls(0)
.setMaxExpr(new StringLiteral("200"))
.setMaxValue(new VarcharLiteral("200").getDouble())
.setMinExpr(new StringLiteral("100"))
- .setMinValue(new VarcharLiteral("100").getDouble())
- .setCount(100);
+ .setMinValue(new VarcharLiteral("100").getDouble());
StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(100);
statsBuilder.putColumnStatistics(a, columnStatisticBuilder.build());
@@ -1287,15 +1260,14 @@ class FilterEstimationTest {
@Test
public void testStringRangeColToDateLiteral() {
SlotReference a = new SlotReference("a", new VarcharType(25));
- ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilder = new
ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(25)
.setNumNulls(0)
.setMaxExpr(new StringLiteral("2022-01-01"))
.setMaxValue(new VarcharLiteral("2022-01-01").getDouble())
.setMinExpr(new StringLiteral("2020-01-01"))
- .setMinValue(new VarcharLiteral("2020-01-01").getDouble())
- .setCount(100);
+ .setMinValue(new VarcharLiteral("2020-01-01").getDouble());
StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(100);
statsBuilder.putColumnStatistics(a, columnStatisticBuilder.build());
@@ -1316,37 +1288,34 @@ class FilterEstimationTest {
@Test
public void testStringRangeColToCol() {
SlotReference a = new SlotReference("a", new VarcharType(25));
- ColumnStatisticBuilder columnStatisticBuilderA = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilderA = new
ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(25)
.setNumNulls(0)
.setMaxExpr(new StringLiteral("2022-01-01"))
.setMaxValue(new VarcharLiteral("2022-01-01").getDouble())
.setMinExpr(new StringLiteral("2020-01-01"))
- .setMinValue(new VarcharLiteral("2020-01-01").getDouble())
- .setCount(100);
+ .setMinValue(new VarcharLiteral("2020-01-01").getDouble());
SlotReference b = new SlotReference("b", new VarcharType(25));
- ColumnStatisticBuilder columnStatisticBuilderB = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilderB = new
ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(25)
.setNumNulls(0)
.setMaxExpr(new StringLiteral("2012-01-01"))
.setMaxValue(new VarcharLiteral("2012-01-01").getDouble())
.setMinExpr(new StringLiteral("2010-01-01"))
- .setMinValue(new VarcharLiteral("2010-01-01").getDouble())
- .setCount(100);
+ .setMinValue(new VarcharLiteral("2010-01-01").getDouble());
SlotReference c = new SlotReference("c", new VarcharType(25));
- ColumnStatisticBuilder columnStatisticBuilderC = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilderC = new
ColumnStatisticBuilder(100)
.setNdv(100)
.setAvgSizeByte(25)
.setNumNulls(0)
.setMaxExpr(new StringLiteral("2021-01-01"))
.setMaxValue(new VarcharLiteral("2021-01-01").getDouble())
.setMinExpr(new StringLiteral("2010-01-01"))
- .setMinValue(new VarcharLiteral("2010-01-01").getDouble())
- .setCount(100);
+ .setMinValue(new VarcharLiteral("2010-01-01").getDouble());
StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(100);
@@ -1372,9 +1341,8 @@ class FilterEstimationTest {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
long tenB = 1000000000;
long row = 1600000000;
- ColumnStatistic colStats = new ColumnStatisticBuilder()
+ ColumnStatistic colStats = new ColumnStatisticBuilder(row)
.setAvgSizeByte(10)
- .setCount(row)
.setNdv(10000)
.setMinExpr(new IntLiteral(0))
.setMinValue(0)
@@ -1399,18 +1367,16 @@ class FilterEstimationTest {
void testAndWithInfinity() {
Double row = 1000.0;
SlotReference a = new SlotReference("a", new VarcharType(25));
- ColumnStatisticBuilder columnStatisticBuilderA = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilderA = new
ColumnStatisticBuilder(row)
.setNdv(10)
.setAvgSizeByte(4)
- .setNumNulls(0)
- .setCount(row);
+ .setNumNulls(0);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
- ColumnStatisticBuilder columnStatisticBuilderB = new
ColumnStatisticBuilder()
+ ColumnStatisticBuilder columnStatisticBuilderB = new
ColumnStatisticBuilder(row)
.setNdv(488)
.setAvgSizeByte(25)
- .setNumNulls(0)
- .setCount(row);
+ .setNumNulls(0);
StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(row);
statsBuilder.putColumnStatistics(a, columnStatisticBuilderA.build());
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/JoinEstimateTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/JoinEstimateTest.java
index 168650c6351..8e37234a0c6 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/JoinEstimateTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/JoinEstimateTest.java
@@ -55,15 +55,13 @@ public class JoinEstimateTest {
EqualTo eq = new EqualTo(a, b);
Statistics leftStats = new
StatisticsBuilder().setRowCount(100).build();
leftStats.addColumnStats(a,
- new ColumnStatisticBuilder()
- .setCount(100)
+ new ColumnStatisticBuilder(100)
.setNdv(10)
.build()
);
Statistics rightStats = new
StatisticsBuilder().setRowCount(80).build();
rightStats.addColumnStats(b,
- new ColumnStatisticBuilder()
- .setCount(80)
+ new ColumnStatisticBuilder(80)
.setNdv(5)
.build()
);
@@ -101,20 +99,17 @@ public class JoinEstimateTest {
EqualTo eq = new EqualTo(a, b);
Statistics leftStats = new
StatisticsBuilder().setRowCount(100).build();
leftStats.addColumnStats(a,
- new ColumnStatisticBuilder()
- .setCount(100)
+ new ColumnStatisticBuilder(100)
.setNdv(10)
.build()
);
Statistics rightStats = new
StatisticsBuilder().setRowCount(80).build();
rightStats.addColumnStats(b,
- new ColumnStatisticBuilder()
- .setCount(80)
+ new ColumnStatisticBuilder(80)
.setNdv(0)
.build()
).addColumnStats(c,
- new ColumnStatisticBuilder()
- .setCount(80)
+ new ColumnStatisticBuilder(80)
.setNdv(20)
.build()
);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
index 84c162ac9cf..9fc1d3e1a22 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
@@ -145,13 +145,17 @@ public class StatsCalculatorTest {
GroupExpression groupExpression = new GroupExpression(logicalFilter,
ImmutableList.of(childGroup));
Group ownerGroup = new Group(null, groupExpression, null);
StatsCalculator.estimate(groupExpression, null);
- Assertions.assertEquals((10000 * 0.1 * 0.05),
ownerGroup.getStatistics().getRowCount(), 0.001);
+ // consider the nonNullSelectivity
+ // TODO: current normalization of numNulls by ratio will be refined in
the future
+ Assertions.assertEquals(49.90005,
ownerGroup.getStatistics().getRowCount(), 0.001);
LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or,
groupPlan);
GroupExpression groupExpressionOr = new
GroupExpression(logicalFilterOr, ImmutableList.of(childGroup));
Group ownerGroupOr = new Group(null, groupExpressionOr, null);
StatsCalculator.estimate(groupExpressionOr, null);
- Assertions.assertEquals((long) (10000 * (0.1 + 0.05 - 0.1 * 0.05)),
+ // consider the nonNullSelectivity
+ // TODO: current normalization of numNulls by ratio will be refined in
the future
+ Assertions.assertEquals(1448.59995,
ownerGroupOr.getStatistics().getRowCount(), 0.001);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
deleted file mode 100644
index c3f04bccfc8..00000000000
---
a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.statistics;
-
-import org.apache.doris.common.Id;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-import java.util.Date;
-
-public class StatsDeriveResultTest {
- @Test
- public void testUpdateRowCountByLimit() {
- StatsDeriveResult stats = new StatsDeriveResult(100);
- ColumnStatistic a = new ColumnStatistic(100, 10, null, 1, 5, 10,
- 1, 100, null, null, false,
- new Date().toString());
- Id id = new Id(1);
- stats.addColumnStats(id, a);
- StatsDeriveResult res = stats.updateByLimit(0);
- Assertions.assertEquals(0, res.getRowCount());
- Assertions.assertEquals(1, res.getSlotIdToColumnStats().size());
- ColumnStatistic resColStats = res.getColumnStatsBySlotId(id);
- Assertions.assertEquals(0, resColStats.ndv);
- Assertions.assertEquals(1, resColStats.avgSizeByte);
- Assertions.assertEquals(0, resColStats.numNulls);
- Assertions.assertEquals(1, resColStats.dataSize);
- Assertions.assertEquals(1, resColStats.minValue);
- Assertions.assertEquals(100, resColStats.maxValue);
- Assertions.assertEquals(false, resColStats.isUnKnown);
-
- res = stats.updateByLimit(1);
- resColStats = res.getColumnStatsBySlotId(id);
- Assertions.assertEquals(1, resColStats.ndv);
- Assertions.assertEquals(1, resColStats.avgSizeByte);
- Assertions.assertEquals(1, resColStats.numNulls);
- Assertions.assertEquals(1, resColStats.dataSize);
- Assertions.assertEquals(1, resColStats.minValue);
- Assertions.assertEquals(100, resColStats.maxValue);
- Assertions.assertEquals(false, resColStats.isUnKnown);
- }
-}
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query74.out
b/regression-test/data/nereids_hint_tpcds_p0/shape/query74.out
index e6f3e10d22f..8b171914ebd 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query74.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query74.out
@@ -35,20 +35,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
-------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8))
/ year_total), NULL) > if((year_total > 0.00), (cast(year_total as
DECIMALV3(13, 8)) / year_total), NULL))) build RFs:RF5
customer_id->[customer_id,customer_id,customer_id]
+------------hashJoin[INNER_JOIN shuffleBucket]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8))
/ year_total), NULL) > if((year_total > 0.00), (cast(year_total as
DECIMALV3(13, 8)) / year_total), NULL))) build RFs:RF5
customer_id->[customer_id]
+--------------PhysicalProject
+----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
+------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))
otherCondition=() build RFs:RF4 customer_id->[customer_id,customer_id]
------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))
otherCondition=() build RFs:RF3 customer_id->[customer_id]
--------------------PhysicalProject
----------------------filter((t_s_secyear.sale_type = 's') and
(t_s_secyear.year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4 RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4
--------------------PhysicalProject
----------------------filter((t_s_firstyear.sale_type = 's') and
(t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.00))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------PhysicalProject
--------------------filter((t_w_firstyear.sale_type = 'w') and
(t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.00))
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
---------------PhysicalProject
-----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query74.out
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query74.out
index e6f3e10d22f..8b171914ebd 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query74.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query74.out
@@ -35,20 +35,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
-------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8))
/ year_total), NULL) > if((year_total > 0.00), (cast(year_total as
DECIMALV3(13, 8)) / year_total), NULL))) build RFs:RF5
customer_id->[customer_id,customer_id,customer_id]
+------------hashJoin[INNER_JOIN shuffleBucket]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8))
/ year_total), NULL) > if((year_total > 0.00), (cast(year_total as
DECIMALV3(13, 8)) / year_total), NULL))) build RFs:RF5
customer_id->[customer_id]
+--------------PhysicalProject
+----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
+------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))
otherCondition=() build RFs:RF4 customer_id->[customer_id,customer_id]
------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))
otherCondition=() build RFs:RF3 customer_id->[customer_id]
--------------------PhysicalProject
----------------------filter((t_s_secyear.sale_type = 's') and
(t_s_secyear.year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4 RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4
--------------------PhysicalProject
----------------------filter((t_s_firstyear.sale_type = 's') and
(t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.00))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------PhysicalProject
--------------------filter((t_w_firstyear.sale_type = 'w') and
(t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.00))
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
---------------PhysicalProject
-----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query74.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query74.out
index 84771c7fe6a..d8a82ca998a 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query74.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query74.out
@@ -35,20 +35,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
-------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id,customer_id,customer_id]
+------------hashJoin[INNER_JOIN shuffleBucket]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id]
+--------------PhysicalProject
+----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
+------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))
otherCondition=() build RFs:RF4 customer_id->[customer_id,customer_id]
------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))
otherCondition=() build RFs:RF3 customer_id->[customer_id]
--------------------PhysicalProject
----------------------filter((t_s_secyear.sale_type = 's') and
(t_s_secyear.year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4 RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4
--------------------PhysicalProject
----------------------filter((t_s_firstyear.sale_type = 's') and
(t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.0))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------PhysicalProject
--------------------filter((t_w_firstyear.sale_type = 'w') and
(t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.0))
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
---------------PhysicalProject
-----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query74.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query74.out
index e48fc87588c..64a56e4e850 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query74.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query74.out
@@ -35,20 +35,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
-------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id,customer_id,customer_id]
+------------hashJoin[INNER_JOIN shuffleBucket]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id]
+--------------PhysicalProject
+----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
+------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))
otherCondition=() build RFs:RF4 customer_id->[customer_id,customer_id]
------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))
otherCondition=() build RFs:RF3 customer_id->[customer_id]
--------------------PhysicalProject
----------------------filter((t_s_secyear.sale_type = 's') and
(t_s_secyear.year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4 RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4
--------------------PhysicalProject
----------------------filter((t_s_firstyear.sale_type = 's') and
(t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.0))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------PhysicalProject
--------------------filter((t_w_firstyear.sale_type = 'w') and
(t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.0))
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
---------------PhysicalProject
-----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git
a/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query74.out
b/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query74.out
index 84771c7fe6a..d8a82ca998a 100644
--- a/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query74.out
+++ b/regression-test/data/new_shapes_p0/tpcds_sf100/noStatsRfPrune/query74.out
@@ -35,20 +35,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
-------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id,customer_id,customer_id]
+------------hashJoin[INNER_JOIN shuffleBucket]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id]
+--------------PhysicalProject
+----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
+------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))
otherCondition=() build RFs:RF4 customer_id->[customer_id,customer_id]
------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))
otherCondition=() build RFs:RF3 customer_id->[customer_id]
--------------------PhysicalProject
----------------------filter((t_s_secyear.sale_type = 's') and
(t_s_secyear.year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4 RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4
--------------------PhysicalProject
----------------------filter((t_s_firstyear.sale_type = 's') and
(t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.0))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------PhysicalProject
--------------------filter((t_w_firstyear.sale_type = 'w') and
(t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.0))
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
---------------PhysicalProject
-----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git
a/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query74.out
b/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query74.out
index e48fc87588c..64a56e4e850 100644
--- a/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query74.out
+++ b/regression-test/data/new_shapes_p0/tpcds_sf100/no_stats_shape/query74.out
@@ -35,20 +35,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
-------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id,customer_id,customer_id]
+------------hashJoin[INNER_JOIN shuffleBucket]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) >
if((year_total > 0.0), (year_total / year_total), NULL))) build RFs:RF5
customer_id->[customer_id]
+--------------PhysicalProject
+----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
+------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))
otherCondition=() build RFs:RF4 customer_id->[customer_id,customer_id]
------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))
otherCondition=() build RFs:RF3 customer_id->[customer_id]
--------------------PhysicalProject
----------------------filter((t_s_secyear.sale_type = 's') and
(t_s_secyear.year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4 RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4
--------------------PhysicalProject
----------------------filter((t_s_firstyear.sale_type = 's') and
(t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.0))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------PhysicalProject
--------------------filter((t_w_firstyear.sale_type = 'w') and
(t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.0))
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
---------------PhysicalProject
-----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query74.out
b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query74.out
index e6f3e10d22f..8b171914ebd 100644
--- a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query74.out
+++ b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query74.out
@@ -35,20 +35,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
-------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8))
/ year_total), NULL) > if((year_total > 0.00), (cast(year_total as
DECIMALV3(13, 8)) / year_total), NULL))) build RFs:RF5
customer_id->[customer_id,customer_id,customer_id]
+------------hashJoin[INNER_JOIN shuffleBucket]
hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id))
otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8))
/ year_total), NULL) > if((year_total > 0.00), (cast(year_total as
DECIMALV3(13, 8)) / year_total), NULL))) build RFs:RF5
customer_id->[customer_id]
+--------------PhysicalProject
+----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
+------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id))
otherCondition=() build RFs:RF4 customer_id->[customer_id,customer_id]
------------------hashJoin[INNER_JOIN shuffle]
hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id))
otherCondition=() build RFs:RF3 customer_id->[customer_id]
--------------------PhysicalProject
----------------------filter((t_s_secyear.sale_type = 's') and
(t_s_secyear.year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4 RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3
RF4
--------------------PhysicalProject
----------------------filter((t_s_firstyear.sale_type = 's') and
(t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.00))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
RF5
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------PhysicalProject
--------------------filter((t_w_firstyear.sale_type = 'w') and
(t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.00))
-----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5
---------------PhysicalProject
-----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year =
2000))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]