This is an automated email from the ASF dual-hosted git repository.

chunwei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new e7ac26d  [CALCITE-4241] Some improvements to metadata query
e7ac26d is described below

commit e7ac26d6ec083848531b456d2acf3f7f58a499b5
Author: liyafan82 <[email protected]>
AuthorDate: Thu Sep 10 16:36:57 2020 +0800

    [CALCITE-4241] Some improvements to metadata query
---
 .../rel/metadata/RelMdColumnUniqueness.java        |  7 +++--
 .../rel/metadata/RelMdDistinctRowCount.java        | 28 +++++++++++++++++---
 .../org/apache/calcite/rel/metadata/RelMdUtil.java | 10 ++------
 .../org/apache/calcite/test/RelMetadataTest.java   | 30 ++++++++++++++++++++++
 4 files changed, 59 insertions(+), 16 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java
index 755964c..c75925b 100644
--- 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java
+++ 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java
@@ -364,7 +364,7 @@ public class RelMdColumnUniqueness
       return true;
     }
     final Set<List<Comparable>> set = new HashSet<>();
-    final List<Comparable> values = new ArrayList<>();
+    final List<Comparable> values = new ArrayList<>(columns.cardinality());
     for (ImmutableList<RexLiteral> tuple : rel.tuples) {
       for (int column : columns) {
         final RexLiteral literal = tuple.get(column);
@@ -395,7 +395,6 @@ public class RelMdColumnUniqueness
   public Boolean areColumnsUnique(RelSubset rel, RelMetadataQuery mq,
       ImmutableBitSet columns, boolean ignoreNulls) {
     columns = decorateWithConstantColumnsFromPredicates(columns, rel, mq);
-    int nullCount = 0;
     for (RelNode rel2 : rel.getRels()) {
       if (rel2 instanceof Aggregate
           || rel2 instanceof Filter
@@ -410,7 +409,7 @@ public class RelMdColumnUniqueness
               return true;
             }
           } else {
-            ++nullCount;
+            return null;
           }
         } catch (CyclicMetadataException e) {
           // Ignore this relational expression; there will be non-cyclic ones
@@ -418,7 +417,7 @@ public class RelMdColumnUniqueness
         }
       }
     }
-    return nullCount == 0 ? false : null;
+    return false;
   }
 
   private boolean simplyProjects(RelNode rel, ImmutableBitSet columns) {
diff --git 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
index 6a6b8e5..971f4f3 100644
--- 
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
+++ 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
@@ -29,6 +29,7 @@ import org.apache.calcite.rel.core.TableModify;
 import org.apache.calcite.rel.core.Union;
 import org.apache.calcite.rel.core.Values;
 import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.util.Bug;
@@ -36,8 +37,12 @@ import org.apache.calcite.util.BuiltInMethod;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.NumberUtil;
 
+import com.google.common.collect.ImmutableList;
+
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 /**
  * RelMdDistinctRowCount supplies a default implementation of
@@ -192,11 +197,26 @@ public class RelMdDistinctRowCount
         return 1D;
       }
     }
-    double selectivity = RelMdUtil.guessSelectivity(predicate);
 
-    // assume half the rows are duplicates
-    double nRows = rel.estimateRowCount(mq) / 2;
-    return RelMdUtil.numDistinctVals(nRows, nRows * selectivity);
+    final Set<List<Comparable>> set = new HashSet<>();
+    final List<Comparable> values = new ArrayList<>(groupKey.cardinality());
+    for (ImmutableList<RexLiteral> tuple : rel.tuples) {
+      for (int column : groupKey) {
+        final RexLiteral literal = tuple.get(column);
+        values.add(literal.isNull()
+            ? NullSentinel.INSTANCE
+            : literal.getValueAs(Comparable.class));
+      }
+      set.add(ImmutableList.copyOf(values));
+      values.clear();
+    }
+    double nRows = set.size();
+    if ((predicate == null) || predicate.isAlwaysTrue()) {
+      return nRows;
+    } else {
+      double selectivity = RelMdUtil.guessSelectivity(predicate);
+      return RelMdUtil.numDistinctVals(nRows, nRows * selectivity);
+    }
   }
 
   public Double getDistinctRowCount(Project rel, RelMetadataQuery mq,
diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java 
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
index ae94a46..4f642a2 100644
--- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
+++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
@@ -232,10 +232,7 @@ public class RelMdUtil {
   public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered(
       RelMetadataQuery mq, RelNode rel, ImmutableBitSet colMask) {
     Boolean b = mq.areColumnsUnique(rel, colMask, true);
-    if (b == null) {
-      return false;
-    }
-    return b;
+    return b != null && b;
   }
 
   public static Boolean areColumnsUniqueWhenNullsFiltered(RelMetadataQuery mq,
@@ -252,10 +249,7 @@ public class RelMdUtil {
   public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered(
       RelMetadataQuery mq, RelNode rel, List<RexInputRef> columnRefs) {
     Boolean b = areColumnsUniqueWhenNullsFiltered(mq, rel, columnRefs);
-    if (b == null) {
-      return false;
-    }
-    return b;
+    return b != null && b;
   }
 
   /**
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java 
b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index 76d33c5..62485dc 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -918,6 +918,36 @@ public class RelMetadataTest extends SqlToRelTestBase {
 
   @Test void testDistinctRowCountTable() {
     // no unique key information is available so return null
+    RelNode rel = convertSql("select * from (values "
+        + "(1, 2, 3, null), "
+        + "(3, 4, 5, 6), "
+        + "(3, 4, null, 6), "
+        + "(8, 4, 5, null) "
+        + ") t(c1, c2, c3, c4)");
+    final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+
+    ImmutableBitSet groupKey = ImmutableBitSet.of(0, 1, 2, 3);
+    Double result = mq.getDistinctRowCount(rel, groupKey, null);
+    // all rows are different
+    assertThat(result, is(4D));
+
+    groupKey = ImmutableBitSet.of(1, 2);
+    result = mq.getDistinctRowCount(rel, groupKey, null);
+    // rows 2 and 4 are the same in the specified columns
+    assertThat(result, is(3D));
+
+    groupKey = ImmutableBitSet.of(0);
+    result = mq.getDistinctRowCount(rel, groupKey, null);
+    // rows 2 and 3 are the same in the specified columns
+    assertThat(result, is(3D));
+
+    groupKey = ImmutableBitSet.of(3);
+    result = mq.getDistinctRowCount(rel, groupKey, null);
+    // the last column has 2 distinct values: 6 and null
+    assertThat(result, is(2D));
+  }
+
+  @Test void testDistinctRowCountValues() {
     RelNode rel = convertSql("select * from emp where deptno = 10");
     final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
     ImmutableBitSet groupKey =

Reply via email to