korlov42 commented on code in PR #6543: URL: https://github.com/apache/ignite-3/pull/6543#discussion_r2320974538
########## modules/sql-engine/src/test/resources/tpcds/plan/q5.plan: ########## @@ -1,205 +1,148 @@ Sort collation: [CHANNEL ASC, ID ASC] fetch: 100 - est: (rows=100) Review Comment: please update `.plan` files with full version of a plan including estimates. There is semiautomatic way to do this: find `updateQueryPlan` in test class and specify `targetDirectory` variable at the very first line; then update `@TpcSuiteInfo` with `planUpdater = "updateQueryPlan"` and just run the test ########## modules/sql-engine/src/test/java/org/apache/ignite/internal/sql/engine/planner/JoinRowCountEstimationTest.java: ########## @@ -84,7 +103,6 @@ void joinByPrimaryKeys() { .check(); // Defined by IgniteMdSelectivity.guessSelectivity. Review Comment: let's remove comment as well here and below ########## modules/sql-engine/src/main/java/org/apache/ignite/internal/sql/engine/metadata/IgniteMdRowCount.java: ########## @@ -231,37 +237,91 @@ public double getRowCount(IgniteLimit rel, RelMetadataQuery mq) { return RelMdUtil.getJoinRowCount(mq, rel, rel.getCondition()); } - double postFiltrationAdjustment = joinContexts.size() == 1 && joinInfo.isEqui() ? 1.0 + double postFiltrationAdjustment = 1.0; + + switch (rel.getJoinType()) { + case INNER: + case SEMI: // Extra join keys as well as non-equi conditions serves as post-filtration, // therefore we need to adjust final result with a little factor. - : 0.7; + postFiltrationAdjustment = joinContexts.size() == 1 && joinInfo.isEqui() ? 1.0 : NON_EQUI_COEFF; + + break; + default: + break; + } - double baseRowCount; - Double percentageAdjustment; + double baseRowCount = 0.0; + Double percentageAdjustment = null; if (context.joinType() == JoiningRelationType.PK_ON_PK) { - // Assume we have two fact tables SALES and RETURNS sharing the same primary key. Every item - // can be sold, but only items which were sold can be returned back, therefore - // size(SALES) > size(RETURNS). When joining SALES on RETURNS by primary key, the estimated - // result size will be the same as the size of the smallest table (RETURNS in this case), - // adjusted by the percentage of rows of the biggest table (SALES in this case; percentage - // adjustment is required to account for predicates pushed down to the table, e.g. we are - // interested in returns of items with certain category) - if (leftRowCount > rightRowCount) { - baseRowCount = rightRowCount; - percentageAdjustment = mq.getPercentageOriginalRows(rel.getLeft()); - } else { + if (rel.getJoinType() == JoinRelType.INNER || rel.getJoinType() == JoinRelType.SEMI) { + // Assume we have two fact tables SALES and RETURNS sharing the same primary key. Every item + // can be sold, but only items which were sold can be returned back, therefore + // size(SALES) > size(RETURNS). When joining SALES on RETURNS by primary key, the estimated + // result size will be the same as the size of the smallest table (RETURNS in this case), + // adjusted by the percentage of rows of the biggest table (SALES in this case; percentage + // adjustment is required to account for predicates pushed down to the table, e.g. we are + // interested in returns of items with certain category) + if (leftRowCount > rightRowCount) { + baseRowCount = rightRowCount; + percentageAdjustment = mq.getPercentageOriginalRows(rel.getLeft()); + } else { + baseRowCount = leftRowCount; + percentageAdjustment = mq.getPercentageOriginalRows(rel.getRight()); + } + } else if (rel.getJoinType() == JoinRelType.LEFT) { baseRowCount = leftRowCount; - percentageAdjustment = mq.getPercentageOriginalRows(rel.getRight()); + } else if (rel.getJoinType() == JoinRelType.RIGHT) { + baseRowCount = rightRowCount; + } else if (rel.getJoinType() == JoinRelType.FULL) { + Double selectivity = mq.getSelectivity(rel, rel.getCondition()); + + // Fall-back to calcite's implementation. + if (selectivity == null) { + return RelMdUtil.getJoinRowCount(mq, rel, rel.getCondition()); + } + + baseRowCount = rightRowCount + leftRowCount; + percentageAdjustment = 1.0 - selectivity; } - } else { + } else if (context.joinType() == JoiningRelationType.FK_ON_PK) { // For foreign key joins the base table is the one which is joined by non-primary key columns. - if (context.joinType() == JoiningRelationType.FK_ON_PK) { + if (rel.getJoinType() == JoinRelType.INNER || rel.getJoinType() == JoinRelType.SEMI) { baseRowCount = leftRowCount; percentageAdjustment = mq.getPercentageOriginalRows(rel.getRight()); - } else { - assert context.joinType() == JoiningRelationType.PK_ON_FK : context.joinType(); + } else if (rel.getJoinType() == JoinRelType.LEFT) { + baseRowCount = leftRowCount; + } else if (rel.getJoinType() == JoinRelType.RIGHT) { + baseRowCount = rightRowCount; Review Comment: this estimation is not quite correct. Even though it's right join, I believe FK table should be a major factor in estimation. Here is an example: Assume we have tables of JIRA's and dictionary called Priorities. Every JIRA has priority, and there are only 5 priorities: minor, medium, major, critical, blocker. The number of JIRA's in the table is huge (like hundreds of thousands). FK_ON_PK type of relation is `FROM jiras j JOIN priorities p ON j.p_id = p.id`. The fact that we use RIGHT join instead of INNER doesn't change the size of the result set significantly: the only change will be inclusion of non-used priorities, which I believe is not a major factor in the estimation. The similar problem is in PK_ON_FK branch ########## modules/sql-engine/src/main/java/org/apache/ignite/internal/sql/engine/metadata/IgniteMdRowCount.java: ########## @@ -231,37 +237,91 @@ public double getRowCount(IgniteLimit rel, RelMetadataQuery mq) { return RelMdUtil.getJoinRowCount(mq, rel, rel.getCondition()); } - double postFiltrationAdjustment = joinContexts.size() == 1 && joinInfo.isEqui() ? 1.0 + double postFiltrationAdjustment = 1.0; + + switch (rel.getJoinType()) { + case INNER: + case SEMI: // Extra join keys as well as non-equi conditions serves as post-filtration, // therefore we need to adjust final result with a little factor. - : 0.7; + postFiltrationAdjustment = joinContexts.size() == 1 && joinInfo.isEqui() ? 1.0 : NON_EQUI_COEFF; + + break; + default: + break; + } - double baseRowCount; - Double percentageAdjustment; + double baseRowCount = 0.0; + Double percentageAdjustment = null; if (context.joinType() == JoiningRelationType.PK_ON_PK) { - // Assume we have two fact tables SALES and RETURNS sharing the same primary key. Every item - // can be sold, but only items which were sold can be returned back, therefore - // size(SALES) > size(RETURNS). When joining SALES on RETURNS by primary key, the estimated - // result size will be the same as the size of the smallest table (RETURNS in this case), - // adjusted by the percentage of rows of the biggest table (SALES in this case; percentage - // adjustment is required to account for predicates pushed down to the table, e.g. we are - // interested in returns of items with certain category) - if (leftRowCount > rightRowCount) { - baseRowCount = rightRowCount; - percentageAdjustment = mq.getPercentageOriginalRows(rel.getLeft()); - } else { + if (rel.getJoinType() == JoinRelType.INNER || rel.getJoinType() == JoinRelType.SEMI) { + // Assume we have two fact tables SALES and RETURNS sharing the same primary key. Every item + // can be sold, but only items which were sold can be returned back, therefore + // size(SALES) > size(RETURNS). When joining SALES on RETURNS by primary key, the estimated + // result size will be the same as the size of the smallest table (RETURNS in this case), + // adjusted by the percentage of rows of the biggest table (SALES in this case; percentage + // adjustment is required to account for predicates pushed down to the table, e.g. we are + // interested in returns of items with certain category) + if (leftRowCount > rightRowCount) { + baseRowCount = rightRowCount; + percentageAdjustment = mq.getPercentageOriginalRows(rel.getLeft()); + } else { + baseRowCount = leftRowCount; + percentageAdjustment = mq.getPercentageOriginalRows(rel.getRight()); + } + } else if (rel.getJoinType() == JoinRelType.LEFT) { baseRowCount = leftRowCount; - percentageAdjustment = mq.getPercentageOriginalRows(rel.getRight()); + } else if (rel.getJoinType() == JoinRelType.RIGHT) { + baseRowCount = rightRowCount; + } else if (rel.getJoinType() == JoinRelType.FULL) { + Double selectivity = mq.getSelectivity(rel, rel.getCondition()); + + // Fall-back to calcite's implementation. + if (selectivity == null) { + return RelMdUtil.getJoinRowCount(mq, rel, rel.getCondition()); + } + + baseRowCount = rightRowCount + leftRowCount; + percentageAdjustment = 1.0 - selectivity; } - } else { + } else if (context.joinType() == JoiningRelationType.FK_ON_PK) { // For foreign key joins the base table is the one which is joined by non-primary key columns. - if (context.joinType() == JoiningRelationType.FK_ON_PK) { + if (rel.getJoinType() == JoinRelType.INNER || rel.getJoinType() == JoinRelType.SEMI) { baseRowCount = leftRowCount; percentageAdjustment = mq.getPercentageOriginalRows(rel.getRight()); - } else { - assert context.joinType() == JoiningRelationType.PK_ON_FK : context.joinType(); + } else if (rel.getJoinType() == JoinRelType.LEFT) { + baseRowCount = leftRowCount; + } else if (rel.getJoinType() == JoinRelType.RIGHT) { + baseRowCount = rightRowCount; + } else if (rel.getJoinType() == JoinRelType.FULL) { + Double selectivity = mq.getSelectivity(rel, rel.getCondition()); + + // Fall-back to calcite's implementation. + if (selectivity == null) { + return RelMdUtil.getJoinRowCount(mq, rel, rel.getCondition()); + } + + baseRowCount = rightRowCount + leftRowCount; + percentageAdjustment = 1.0 - selectivity; + } + } else { // PK_ON_FK + if (rel.getJoinType() == JoinRelType.INNER || rel.getJoinType() == JoinRelType.SEMI) { baseRowCount = rightRowCount; percentageAdjustment = mq.getPercentageOriginalRows(rel.getLeft()); + } else if (rel.getJoinType() == JoinRelType.LEFT) { + baseRowCount = leftRowCount; + } else if (rel.getJoinType() == JoinRelType.RIGHT) { + baseRowCount = rightRowCount; + } else if (rel.getJoinType() == JoinRelType.FULL) { + Double selectivity = mq.getSelectivity(rel, rel.getCondition()); + + /// Fall-back to calcite's implementation. Review Comment: ```suggestion // Fall-back to calcite's implementation. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org