This is an automated email from the ASF dual-hosted git repository. jhyde pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/calcite.git
commit 70d59fedfdb9fc956f3b1d1764833cbded7ae44d Author: liyafan82 <[email protected]> AuthorDate: Tue Feb 23 13:52:16 2021 +0800 [CALCITE-4511] Distinct row count and population size for constant columns should be 1 Close apache/calcite#2355 --- .../rel/metadata/RelMdDistinctRowCount.java | 15 ++++++++ .../calcite/rel/metadata/RelMdPopulationSize.java | 15 ++++++++ .../main/java/org/apache/calcite/rex/RexUtil.java | 23 ++++++++++++ .../org/apache/calcite/test/RelMetadataTest.java | 41 ++++++++++++++++++++++ 4 files changed, 94 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java index 9596342..a8b42cf 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java @@ -227,6 +227,21 @@ public class RelMdDistinctRowCount return 1D; } } + + // try to remove const columns from the group keys, as they do not + // affect the distinct row count + ImmutableBitSet nonConstCols = RexUtil.getNonConstColumns(groupKey, rel.getProjects()); + if (nonConstCols.cardinality() == 0) { + // all columns are constants, the distinct row count should be 1 + return 1D; + } + + if (nonConstCols.cardinality() < groupKey.cardinality()) { + // some const columns can be removed, call the method recursively + // with the trimmed columns + return getDistinctRowCount(rel, mq, nonConstCols, predicate); + } + ImmutableBitSet.Builder baseCols = ImmutableBitSet.builder(); ImmutableBitSet.Builder projCols = ImmutableBitSet.builder(); List<RexNode> projExprs = rel.getProjects(); diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java index ca39cc8..7c34639 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java @@ -27,6 +27,7 @@ import org.apache.calcite.rel.core.TableModify; import org.apache.calcite.rel.core.Union; import org.apache.calcite.rel.core.Values; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; @@ -107,6 +108,20 @@ public class RelMdPopulationSize public @Nullable Double getPopulationSize(Project rel, RelMetadataQuery mq, ImmutableBitSet groupKey) { + // try to remove const columns from the group keys, as they do not + // affect the population size + ImmutableBitSet nonConstCols = RexUtil.getNonConstColumns(groupKey, rel.getProjects()); + if (nonConstCols.cardinality() == 0) { + // all columns are constants, the population size should be 1 + return 1D; + } + + if (nonConstCols.cardinality() < groupKey.cardinality()) { + // some const columns can be removed, call the method recursively + // with the trimmed columns + return getPopulationSize(rel, mq, nonConstCols); + } + ImmutableBitSet.Builder baseCols = ImmutableBitSet.builder(); ImmutableBitSet.Builder projCols = ImmutableBitSet.builder(); List<RexNode> projExprs = rel.getProjects(); diff --git a/core/src/main/java/org/apache/calcite/rex/RexUtil.java b/core/src/main/java/org/apache/calcite/rex/RexUtil.java index cfe0a8f..4870a82 100644 --- a/core/src/main/java/org/apache/calcite/rex/RexUtil.java +++ b/core/src/main/java/org/apache/calcite/rex/RexUtil.java @@ -40,6 +40,7 @@ import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.util.ControlFlowException; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Litmus; import org.apache.calcite.util.Pair; import org.apache.calcite.util.RangeSets; @@ -2332,6 +2333,28 @@ public class RexUtil { return occurrences; } + /** + * Given some expressions, gets the indices of the non-constant ones. + */ + public static ImmutableBitSet getNonConstColumns(List<RexNode> expressions) { + ImmutableBitSet cols = ImmutableBitSet.range(0, expressions.size()); + return getNonConstColumns(cols, expressions); + } + + /** + * Given some expressions and columns, gets the indices of the non-constant ones. + */ + public static ImmutableBitSet getNonConstColumns( + ImmutableBitSet columns, List<RexNode> expressions) { + ImmutableBitSet.Builder nonConstCols = ImmutableBitSet.builder(); + for (int col : columns) { + if (!isLiteral(expressions.get(col), true)) { + nonConstCols.set(col); + } + } + return nonConstCols.build(); + } + //~ Inner Classes ---------------------------------------------------------- /** diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java index 7d84dbb..55b2217 100644 --- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java @@ -92,6 +92,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexProgram; import org.apache.calcite.rex.RexTableInputRef; import org.apache.calcite.rex.RexTableInputRef.RelTableRef; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.runtime.SqlFunctions; import org.apache.calcite.sql.SqlExplainLevel; import org.apache.calcite.sql.SqlKind; @@ -3240,6 +3241,46 @@ public class RelMetadataTest extends SqlToRelTestBase { checkNodeTypeCount(sql, expected); } + @Test void testConstColumnsNdv() { + final String sql = "select ename, 100, 200 from emp"; + final RelNode rel = convertSql(sql); + RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); + + assertThat(rel, instanceOf(Project.class)); + + Project project = (Project) rel; + assertThat(project.getProjects().size(), is(3)); + + // a non-const column, followed by two constant columns. + assertThat(RexUtil.isLiteral(project.getProjects().get(0), true), is(false)); + assertThat(RexUtil.isLiteral(project.getProjects().get(1), true), is(true)); + assertThat(RexUtil.isLiteral(project.getProjects().get(2), true), is(true)); + + // the distinct row count of const columns should be 1 + assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(), null), is(1.0)); + assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(1), null), is(1.0)); + assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(1, 2), null), is(1.0)); + + // the population size of const columns should be 1 + assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of()), is(1.0)); + assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(1)), is(1.0)); + assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(1, 2)), is(1.0)); + + // the distinct row count of mixed columns depends on the distinct row + // count of non-const columns + assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0, 1), null), + is(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0), null))); + assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0, 1, 2), null), + is(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0), null))); + + // the population size of mixed columns depends on the population size of + // non-const columns + assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(0, 1)), + is(mq.getPopulationSize(rel, ImmutableBitSet.of(0)))); + assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(0, 1, 2)), + is(mq.getPopulationSize(rel, ImmutableBitSet.of(0)))); + } + private static final SqlOperator NONDETERMINISTIC_OP = new SqlSpecialOperator( "NDC", SqlKind.OTHER_FUNCTION,
