This is an automated email from the ASF dual-hosted git repository.

jhyde pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git

commit 70d59fedfdb9fc956f3b1d1764833cbded7ae44d
Author: liyafan82 <[email protected]>
AuthorDate: Tue Feb 23 13:52:16 2021 +0800

    [CALCITE-4511] Distinct row count and population size for constant columns 
should be 1
    
    Close apache/calcite#2355
---
 .../rel/metadata/RelMdDistinctRowCount.java        | 15 ++++++++
 .../calcite/rel/metadata/RelMdPopulationSize.java  | 15 ++++++++
 .../main/java/org/apache/calcite/rex/RexUtil.java  | 23 ++++++++++++
 .../org/apache/calcite/test/RelMetadataTest.java   | 41 ++++++++++++++++++++++
 4 files changed, 94 insertions(+)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
index 9596342..a8b42cf 100644
--- 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
+++ 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
@@ -227,6 +227,21 @@ public class RelMdDistinctRowCount
         return 1D;
       }
     }
+
+    // try to remove const columns from the group keys, as they do not
+    // affect the distinct row count
+    ImmutableBitSet nonConstCols = RexUtil.getNonConstColumns(groupKey, 
rel.getProjects());
+    if (nonConstCols.cardinality() == 0) {
+      // all columns are constants, the distinct row count should be 1
+      return 1D;
+    }
+
+    if (nonConstCols.cardinality() < groupKey.cardinality()) {
+      // some const columns can be removed, call the method recursively
+      // with the trimmed columns
+      return getDistinctRowCount(rel, mq, nonConstCols, predicate);
+    }
+
     ImmutableBitSet.Builder baseCols = ImmutableBitSet.builder();
     ImmutableBitSet.Builder projCols = ImmutableBitSet.builder();
     List<RexNode> projExprs = rel.getProjects();
diff --git 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java
index ca39cc8..7c34639 100644
--- 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java
+++ 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdPopulationSize.java
@@ -27,6 +27,7 @@ import org.apache.calcite.rel.core.TableModify;
 import org.apache.calcite.rel.core.Union;
 import org.apache.calcite.rel.core.Values;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.util.BuiltInMethod;
 import org.apache.calcite.util.ImmutableBitSet;
 
@@ -107,6 +108,20 @@ public class RelMdPopulationSize
 
   public @Nullable Double getPopulationSize(Project rel, RelMetadataQuery mq,
       ImmutableBitSet groupKey) {
+    // try to remove const columns from the group keys, as they do not
+    // affect the population size
+    ImmutableBitSet nonConstCols = RexUtil.getNonConstColumns(groupKey, 
rel.getProjects());
+    if (nonConstCols.cardinality() == 0) {
+      // all columns are constants, the population size should be 1
+      return 1D;
+    }
+
+    if (nonConstCols.cardinality() < groupKey.cardinality()) {
+      // some const columns can be removed, call the method recursively
+      // with the trimmed columns
+      return getPopulationSize(rel, mq, nonConstCols);
+    }
+
     ImmutableBitSet.Builder baseCols = ImmutableBitSet.builder();
     ImmutableBitSet.Builder projCols = ImmutableBitSet.builder();
     List<RexNode> projExprs = rel.getProjects();
diff --git a/core/src/main/java/org/apache/calcite/rex/RexUtil.java 
b/core/src/main/java/org/apache/calcite/rex/RexUtil.java
index cfe0a8f..4870a82 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexUtil.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexUtil.java
@@ -40,6 +40,7 @@ import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.sql.type.SqlTypeUtil;
 import org.apache.calcite.sql.validate.SqlValidatorUtil;
 import org.apache.calcite.util.ControlFlowException;
+import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.Litmus;
 import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.RangeSets;
@@ -2332,6 +2333,28 @@ public class RexUtil {
     return occurrences;
   }
 
+  /**
+   * Given some expressions, gets the indices of the non-constant ones.
+   */
+  public static ImmutableBitSet getNonConstColumns(List<RexNode> expressions) {
+    ImmutableBitSet cols = ImmutableBitSet.range(0, expressions.size());
+    return getNonConstColumns(cols, expressions);
+  }
+
+  /**
+   * Given some expressions and columns, gets the indices of the non-constant 
ones.
+   */
+  public static ImmutableBitSet getNonConstColumns(
+      ImmutableBitSet columns, List<RexNode> expressions) {
+    ImmutableBitSet.Builder nonConstCols = ImmutableBitSet.builder();
+    for (int col : columns) {
+      if (!isLiteral(expressions.get(col), true)) {
+        nonConstCols.set(col);
+      }
+    }
+    return nonConstCols.build();
+  }
+
   //~ Inner Classes ----------------------------------------------------------
 
   /**
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java 
b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index 7d84dbb..55b2217 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -92,6 +92,7 @@ import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexProgram;
 import org.apache.calcite.rex.RexTableInputRef;
 import org.apache.calcite.rex.RexTableInputRef.RelTableRef;
+import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.runtime.SqlFunctions;
 import org.apache.calcite.sql.SqlExplainLevel;
 import org.apache.calcite.sql.SqlKind;
@@ -3240,6 +3241,46 @@ public class RelMetadataTest extends SqlToRelTestBase {
     checkNodeTypeCount(sql, expected);
   }
 
+  @Test void testConstColumnsNdv() {
+    final String sql = "select ename, 100, 200 from emp";
+    final RelNode rel = convertSql(sql);
+    RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+
+    assertThat(rel, instanceOf(Project.class));
+
+    Project project = (Project) rel;
+    assertThat(project.getProjects().size(), is(3));
+
+    // a non-const column, followed by two constant columns.
+    assertThat(RexUtil.isLiteral(project.getProjects().get(0), true), 
is(false));
+    assertThat(RexUtil.isLiteral(project.getProjects().get(1), true), 
is(true));
+    assertThat(RexUtil.isLiteral(project.getProjects().get(2), true), 
is(true));
+
+    // the distinct row count of const columns should be 1
+    assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(), null), 
is(1.0));
+    assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(1), null), 
is(1.0));
+    assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(1, 2), null), 
is(1.0));
+
+    // the population size of const columns should be 1
+    assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of()), is(1.0));
+    assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(1)), is(1.0));
+    assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(1, 2)), is(1.0));
+
+    // the distinct row count of mixed columns depends on the distinct row
+    // count of non-const columns
+    assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0, 1), null),
+        is(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0), null)));
+    assertThat(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0, 1, 2), null),
+        is(mq.getDistinctRowCount(rel, ImmutableBitSet.of(0), null)));
+
+    // the population size of mixed columns depends on the population size of
+    // non-const columns
+    assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(0, 1)),
+        is(mq.getPopulationSize(rel, ImmutableBitSet.of(0))));
+    assertThat(mq.getPopulationSize(rel, ImmutableBitSet.of(0, 1, 2)),
+        is(mq.getPopulationSize(rel, ImmutableBitSet.of(0))));
+  }
+
   private static final SqlOperator NONDETERMINISTIC_OP = new 
SqlSpecialOperator(
           "NDC",
           SqlKind.OTHER_FUNCTION,

Reply via email to