This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new f7069cc524 [CALCITE-5647] RelMdPopulationSize should use 
mq.getRowCount(rel) instead of rel.estimateRowCount(mq)
f7069cc524 is described below

commit f7069cc5245c22f816c565669f52b4f30b046f4d
Author: Adam Kennedy <adamkennedybac...@gmail.com>
AuthorDate: Fri Apr 14 15:40:06 2023 -0700

    [CALCITE-5647] RelMdPopulationSize should use mq.getRowCount(rel) instead 
of rel.estimateRowCount(mq)
    
    Use RelMetadataQuery#getRowCount() instead of estimateRowCount()
    when calculating RelMdPopulatioSize#getRowCount()
---
 .../calcite/rel/metadata/RelMdPopulationSize.java  |  2 +-
 .../org/apache/calcite/test/RelMetadataTest.java   | 39 ++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java
index 963c3bac5f..bbd4c0fa82 100644
--- 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java
+++ 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java
@@ -113,7 +113,7 @@ public class RelMdPopulationSize
   public Double getPopulationSize(Values rel, RelMetadataQuery mq,
       ImmutableBitSet groupKey) {
     // assume half the rows are duplicates
-    return rel.estimateRowCount(mq) / 2;
+    return mq.getRowCount(rel) / 2;
   }
 
   public @Nullable Double getPopulationSize(Project rel, RelMetadataQuery mq,
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java 
b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index 24155414e6..db1f65c242 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 package org.apache.calcite.test;
+
 import org.apache.calcite.adapter.enumerable.EnumerableConvention;
 import org.apache.calcite.adapter.enumerable.EnumerableLimit;
 import org.apache.calcite.adapter.enumerable.EnumerableMergeJoin;
@@ -76,6 +77,7 @@ import 
org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelColumnOrigin;
 import org.apache.calcite.rel.metadata.RelMdCollation;
 import org.apache.calcite.rel.metadata.RelMdColumnUniqueness;
+import org.apache.calcite.rel.metadata.RelMdPopulationSize;
 import org.apache.calcite.rel.metadata.RelMdUtil;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
@@ -3639,6 +3641,43 @@ public class RelMetadataTest {
         is(mq.getPopulationSize(rel, bitSetOf(0))));
   }
 
+  /**
+   * Test that RelMdPopulationSize is calculated based on the 
RelMetadataQuery#getRowCount().
+   *
+   * @see <a 
href="https://issues.apache.org/jira/browse/CALCITE-5647";>[CALCITE-5647]</a>
+   */
+  @Test public void testPopulationSizeFromValues() {
+    final String sql = "values(1,2,3),(1,2,3),(1,2,3),(1,2,3)";
+    final RelNode rel = sql(sql).toRel();
+    assertThat(rel, instanceOf(Values.class));
+
+    RelMetadataProvider provider = RelMdPopulationSize.SOURCE;
+
+    List<MetadataHandler<?>> handlers =
+        provider.handlers(BuiltInMetadata.PopulationSize.Handler.class);
+
+    // The population size is calculated to be half the row count. (The 
assumption is that half
+    // the rows are duplicated.) With the default handler it should evaluate 
to 2 since there
+    // are 4 rows.
+    RelMdPopulationSize populationSize = (RelMdPopulationSize) handlers.get(0);
+    Double popSize =
+        populationSize.getPopulationSize((Values) rel, 
rel.getCluster().getMetadataQuery(),
+            bitSetOf(0, 1, 2));
+    assertEquals(2.0, popSize);
+
+    // If we use a custom RelMetadataQuery and override the row count, the 
population size
+    // should be half the reported row count. In this case we will have the 
RelMetadataQuery say
+    // the row count is 12 for testing purposes, so we should expect a 
population size of 6.
+    RelMetadataQuery customQuery = new RelMetadataQuery() {
+      @Override public Double getRowCount(RelNode rel) {
+        return 12.0;
+      }
+    };
+
+    popSize = populationSize.getPopulationSize((Values) rel, customQuery, 
bitSetOf(0, 1, 2));
+    assertEquals(6.0, popSize);
+  }
+
   private static final SqlOperator NONDETERMINISTIC_OP =
       SqlBasicFunction.create("NDC", ReturnTypes.BOOLEAN, 
OperandTypes.VARIADIC)
           .withDeterministic(false);

Reply via email to