This is an automated email from the ASF dual-hosted git repository. rubenql pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new f7069cc524 [CALCITE-5647] RelMdPopulationSize should use mq.getRowCount(rel) instead of rel.estimateRowCount(mq) f7069cc524 is described below commit f7069cc5245c22f816c565669f52b4f30b046f4d Author: Adam Kennedy <adamkennedybac...@gmail.com> AuthorDate: Fri Apr 14 15:40:06 2023 -0700 [CALCITE-5647] RelMdPopulationSize should use mq.getRowCount(rel) instead of rel.estimateRowCount(mq) Use RelMetadataQuery#getRowCount() instead of estimateRowCount() when calculating RelMdPopulatioSize#getRowCount() --- .../calcite/rel/metadata/RelMdPopulationSize.java | 2 +- .../org/apache/calcite/test/RelMetadataTest.java | 39 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java index 963c3bac5f..bbd4c0fa82 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java @@ -113,7 +113,7 @@ public class RelMdPopulationSize public Double getPopulationSize(Values rel, RelMetadataQuery mq, ImmutableBitSet groupKey) { // assume half the rows are duplicates - return rel.estimateRowCount(mq) / 2; + return mq.getRowCount(rel) / 2; } public @Nullable Double getPopulationSize(Project rel, RelMetadataQuery mq, diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java index 24155414e6..db1f65c242 100644 --- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java @@ -15,6 +15,7 @@ * limitations under the License. */ package org.apache.calcite.test; + import org.apache.calcite.adapter.enumerable.EnumerableConvention; import org.apache.calcite.adapter.enumerable.EnumerableLimit; import org.apache.calcite.adapter.enumerable.EnumerableMergeJoin; @@ -76,6 +77,7 @@ import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelColumnOrigin; import org.apache.calcite.rel.metadata.RelMdCollation; import org.apache.calcite.rel.metadata.RelMdColumnUniqueness; +import org.apache.calcite.rel.metadata.RelMdPopulationSize; import org.apache.calcite.rel.metadata.RelMdUtil; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; @@ -3639,6 +3641,43 @@ public class RelMetadataTest { is(mq.getPopulationSize(rel, bitSetOf(0)))); } + /** + * Test that RelMdPopulationSize is calculated based on the RelMetadataQuery#getRowCount(). + * + * @see <a href="https://issues.apache.org/jira/browse/CALCITE-5647">[CALCITE-5647]</a> + */ + @Test public void testPopulationSizeFromValues() { + final String sql = "values(1,2,3),(1,2,3),(1,2,3),(1,2,3)"; + final RelNode rel = sql(sql).toRel(); + assertThat(rel, instanceOf(Values.class)); + + RelMetadataProvider provider = RelMdPopulationSize.SOURCE; + + List<MetadataHandler<?>> handlers = + provider.handlers(BuiltInMetadata.PopulationSize.Handler.class); + + // The population size is calculated to be half the row count. (The assumption is that half + // the rows are duplicated.) With the default handler it should evaluate to 2 since there + // are 4 rows. + RelMdPopulationSize populationSize = (RelMdPopulationSize) handlers.get(0); + Double popSize = + populationSize.getPopulationSize((Values) rel, rel.getCluster().getMetadataQuery(), + bitSetOf(0, 1, 2)); + assertEquals(2.0, popSize); + + // If we use a custom RelMetadataQuery and override the row count, the population size + // should be half the reported row count. In this case we will have the RelMetadataQuery say + // the row count is 12 for testing purposes, so we should expect a population size of 6. + RelMetadataQuery customQuery = new RelMetadataQuery() { + @Override public Double getRowCount(RelNode rel) { + return 12.0; + } + }; + + popSize = populationSize.getPopulationSize((Values) rel, customQuery, bitSetOf(0, 1, 2)); + assertEquals(6.0, popSize); + } + private static final SqlOperator NONDETERMINISTIC_OP = SqlBasicFunction.create("NDC", ReturnTypes.BOOLEAN, OperandTypes.VARIADIC) .withDeterministic(false);