Kikyou1997 commented on code in PR #18129:
URL: https://github.com/apache/doris/pull/18129#discussion_r1149980436
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java:
##########
@@ -52,37 +55,105 @@ private static Statistics estimateInnerJoin(Statistics
crossJoinStats, List<Expr
for (int i = 0; i < sortedJoinConditions.size(); i++) {
sel *= Math.pow(sortedJoinConditions.get(i).second, 1 /
Math.pow(2, i));
}
- return crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount()
* sel);
+ Statistics innerJoinStats =
crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel);
+
+ if (!join.getOtherJoinConjuncts().isEmpty()) {
+ FilterEstimation filterEstimation = new FilterEstimation();
+ innerJoinStats = filterEstimation.estimate(
+ ExpressionUtils.and(join.getOtherJoinConjuncts()),
innerJoinStats);
+ }
+ return innerJoinStats;
}
private static double estimateJoinConditionSel(Statistics crossJoinStats,
Expression joinCond) {
Statistics statistics = new FilterEstimation().estimate(joinCond,
crossJoinStats);
return statistics.getRowCount() / crossJoinStats.getRowCount();
}
+ private static double adjustSemiOrAntiByOtherJoinConditions(Join join) {
+ final double non_equal_ratio = 0.5;
+ int otherConditionCount = join.getOtherJoinConjuncts().size();
+ double sel = 1.0;
+ for (int i = 0; i < otherConditionCount; i++) {
+ sel *= Math.pow(non_equal_ratio, 1 / Math.pow(2, i));
+ }
+ return sel;
+ }
+
+ private static double estimateSemiOrAntiRowCountByEqual(Statistics
leftStats,
+ Statistics rightStats, Join join, EqualTo equalTo) {
+ Expression eqLeft = equalTo.left();
+ Expression eqRight = equalTo.right();
+ ColumnStatistic probColStats = leftStats.findColumnStatistics(eqLeft);
+ ColumnStatistic buildColStats;
+ if (probColStats == null) {
+ probColStats = leftStats.findColumnStatistics(eqRight);
+ buildColStats = rightStats.findColumnStatistics(eqLeft);
+ } else {
+ buildColStats = rightStats.findColumnStatistics(eqRight);
+ }
+ if (probColStats == null || buildColStats == null) {
+ return Double.POSITIVE_INFINITY;
+ }
+
+ double rowCount;
+ if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+ rowCount = leftStats.getRowCount() * buildColStats.ndv /
buildColStats.originalNdv;
+ } else {
+ //right semi or anti
+ rowCount = rightStats.getRowCount() * probColStats.ndv /
probColStats.originalNdv;
+ }
+ return rowCount;
+ }
+
+ private static Statistics estimateSemiOrAnti(Statistics leftStats,
Statistics rightStats, Join join) {
+ // primaryConjunct is the most effective conjunct.
+ double rowCount = Double.POSITIVE_INFINITY;
+ for (Expression conjunct : join.getHashJoinConjuncts()) {
+ double eqRowCount = estimateSemiOrAntiRowCountByEqual(leftStats,
rightStats, join, (EqualTo) conjunct);
+ if (rowCount > eqRowCount) {
+ rowCount = eqRowCount;
+ }
+ }
+ if (rowCount == Double.POSITIVE_INFINITY) {
+ //fall back to original alg.
+ return null;
+ }
+ rowCount = rowCount * adjustSemiOrAntiByOtherJoinConditions(join);
+
+ StatisticsBuilder builder;
+ if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+ leftStats.fix(rowCount, leftStats.getRowCount());
Review Comment:
`fix` method here actually changed the stats of child which should be
avoided as much as possible.
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java:
##########
@@ -52,37 +55,105 @@ private static Statistics estimateInnerJoin(Statistics
crossJoinStats, List<Expr
for (int i = 0; i < sortedJoinConditions.size(); i++) {
sel *= Math.pow(sortedJoinConditions.get(i).second, 1 /
Math.pow(2, i));
}
- return crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount()
* sel);
+ Statistics innerJoinStats =
crossJoinStats.updateRowCountOnly(crossJoinStats.getRowCount() * sel);
+
+ if (!join.getOtherJoinConjuncts().isEmpty()) {
+ FilterEstimation filterEstimation = new FilterEstimation();
+ innerJoinStats = filterEstimation.estimate(
+ ExpressionUtils.and(join.getOtherJoinConjuncts()),
innerJoinStats);
+ }
+ return innerJoinStats;
}
private static double estimateJoinConditionSel(Statistics crossJoinStats,
Expression joinCond) {
Statistics statistics = new FilterEstimation().estimate(joinCond,
crossJoinStats);
return statistics.getRowCount() / crossJoinStats.getRowCount();
}
+ private static double adjustSemiOrAntiByOtherJoinConditions(Join join) {
+ final double non_equal_ratio = 0.5;
+ int otherConditionCount = join.getOtherJoinConjuncts().size();
+ double sel = 1.0;
+ for (int i = 0; i < otherConditionCount; i++) {
+ sel *= Math.pow(non_equal_ratio, 1 / Math.pow(2, i));
+ }
+ return sel;
+ }
+
+ private static double estimateSemiOrAntiRowCountByEqual(Statistics
leftStats,
+ Statistics rightStats, Join join, EqualTo equalTo) {
+ Expression eqLeft = equalTo.left();
+ Expression eqRight = equalTo.right();
+ ColumnStatistic probColStats = leftStats.findColumnStatistics(eqLeft);
+ ColumnStatistic buildColStats;
+ if (probColStats == null) {
+ probColStats = leftStats.findColumnStatistics(eqRight);
+ buildColStats = rightStats.findColumnStatistics(eqLeft);
+ } else {
+ buildColStats = rightStats.findColumnStatistics(eqRight);
+ }
+ if (probColStats == null || buildColStats == null) {
+ return Double.POSITIVE_INFINITY;
+ }
+
+ double rowCount;
+ if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+ rowCount = leftStats.getRowCount() * buildColStats.ndv /
buildColStats.originalNdv;
+ } else {
+ //right semi or anti
+ rowCount = rightStats.getRowCount() * probColStats.ndv /
probColStats.originalNdv;
+ }
+ return rowCount;
+ }
+
+ private static Statistics estimateSemiOrAnti(Statistics leftStats,
Statistics rightStats, Join join) {
+ // primaryConjunct is the most effective conjunct.
+ double rowCount = Double.POSITIVE_INFINITY;
+ for (Expression conjunct : join.getHashJoinConjuncts()) {
+ double eqRowCount = estimateSemiOrAntiRowCountByEqual(leftStats,
rightStats, join, (EqualTo) conjunct);
+ if (rowCount > eqRowCount) {
+ rowCount = eqRowCount;
+ }
+ }
+ if (rowCount == Double.POSITIVE_INFINITY) {
+ //fall back to original alg.
+ return null;
+ }
+ rowCount = rowCount * adjustSemiOrAntiByOtherJoinConditions(join);
+
+ StatisticsBuilder builder;
+ if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+ leftStats.fix(rowCount, leftStats.getRowCount());
+ builder = new StatisticsBuilder(leftStats);
+ builder.setRowCount(rowCount);
+ } else {
+ //right semi or anti
+ rightStats.fix(rowCount, rightStats.getRowCount());
Review Comment:
ditto
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]