This is an automated email from the ASF dual-hosted git repository.
chunwei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new e7ac26d [CALCITE-4241] Some improvements to metadata query
e7ac26d is described below
commit e7ac26d6ec083848531b456d2acf3f7f58a499b5
Author: liyafan82 <[email protected]>
AuthorDate: Thu Sep 10 16:36:57 2020 +0800
[CALCITE-4241] Some improvements to metadata query
---
.../rel/metadata/RelMdColumnUniqueness.java | 7 +++--
.../rel/metadata/RelMdDistinctRowCount.java | 28 +++++++++++++++++---
.../org/apache/calcite/rel/metadata/RelMdUtil.java | 10 ++------
.../org/apache/calcite/test/RelMetadataTest.java | 30 ++++++++++++++++++++++
4 files changed, 59 insertions(+), 16 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java
index 755964c..c75925b 100644
---
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java
+++
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdColumnUniqueness.java
@@ -364,7 +364,7 @@ public class RelMdColumnUniqueness
return true;
}
final Set<List<Comparable>> set = new HashSet<>();
- final List<Comparable> values = new ArrayList<>();
+ final List<Comparable> values = new ArrayList<>(columns.cardinality());
for (ImmutableList<RexLiteral> tuple : rel.tuples) {
for (int column : columns) {
final RexLiteral literal = tuple.get(column);
@@ -395,7 +395,6 @@ public class RelMdColumnUniqueness
public Boolean areColumnsUnique(RelSubset rel, RelMetadataQuery mq,
ImmutableBitSet columns, boolean ignoreNulls) {
columns = decorateWithConstantColumnsFromPredicates(columns, rel, mq);
- int nullCount = 0;
for (RelNode rel2 : rel.getRels()) {
if (rel2 instanceof Aggregate
|| rel2 instanceof Filter
@@ -410,7 +409,7 @@ public class RelMdColumnUniqueness
return true;
}
} else {
- ++nullCount;
+ return null;
}
} catch (CyclicMetadataException e) {
// Ignore this relational expression; there will be non-cyclic ones
@@ -418,7 +417,7 @@ public class RelMdColumnUniqueness
}
}
}
- return nullCount == 0 ? false : null;
+ return false;
}
private boolean simplyProjects(RelNode rel, ImmutableBitSet columns) {
diff --git
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
index 6a6b8e5..971f4f3 100644
---
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
+++
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
@@ -29,6 +29,7 @@ import org.apache.calcite.rel.core.TableModify;
import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rel.core.Values;
import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.util.Bug;
@@ -36,8 +37,12 @@ import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.NumberUtil;
+import com.google.common.collect.ImmutableList;
+
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
/**
* RelMdDistinctRowCount supplies a default implementation of
@@ -192,11 +197,26 @@ public class RelMdDistinctRowCount
return 1D;
}
}
- double selectivity = RelMdUtil.guessSelectivity(predicate);
- // assume half the rows are duplicates
- double nRows = rel.estimateRowCount(mq) / 2;
- return RelMdUtil.numDistinctVals(nRows, nRows * selectivity);
+ final Set<List<Comparable>> set = new HashSet<>();
+ final List<Comparable> values = new ArrayList<>(groupKey.cardinality());
+ for (ImmutableList<RexLiteral> tuple : rel.tuples) {
+ for (int column : groupKey) {
+ final RexLiteral literal = tuple.get(column);
+ values.add(literal.isNull()
+ ? NullSentinel.INSTANCE
+ : literal.getValueAs(Comparable.class));
+ }
+ set.add(ImmutableList.copyOf(values));
+ values.clear();
+ }
+ double nRows = set.size();
+ if ((predicate == null) || predicate.isAlwaysTrue()) {
+ return nRows;
+ } else {
+ double selectivity = RelMdUtil.guessSelectivity(predicate);
+ return RelMdUtil.numDistinctVals(nRows, nRows * selectivity);
+ }
}
public Double getDistinctRowCount(Project rel, RelMetadataQuery mq,
diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
index ae94a46..4f642a2 100644
--- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
+++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdUtil.java
@@ -232,10 +232,7 @@ public class RelMdUtil {
public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered(
RelMetadataQuery mq, RelNode rel, ImmutableBitSet colMask) {
Boolean b = mq.areColumnsUnique(rel, colMask, true);
- if (b == null) {
- return false;
- }
- return b;
+ return b != null && b;
}
public static Boolean areColumnsUniqueWhenNullsFiltered(RelMetadataQuery mq,
@@ -252,10 +249,7 @@ public class RelMdUtil {
public static boolean areColumnsDefinitelyUniqueWhenNullsFiltered(
RelMetadataQuery mq, RelNode rel, List<RexInputRef> columnRefs) {
Boolean b = areColumnsUniqueWhenNullsFiltered(mq, rel, columnRefs);
- if (b == null) {
- return false;
- }
- return b;
+ return b != null && b;
}
/**
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index 76d33c5..62485dc 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -918,6 +918,36 @@ public class RelMetadataTest extends SqlToRelTestBase {
@Test void testDistinctRowCountTable() {
// no unique key information is available so return null
+ RelNode rel = convertSql("select * from (values "
+ + "(1, 2, 3, null), "
+ + "(3, 4, 5, 6), "
+ + "(3, 4, null, 6), "
+ + "(8, 4, 5, null) "
+ + ") t(c1, c2, c3, c4)");
+ final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+
+ ImmutableBitSet groupKey = ImmutableBitSet.of(0, 1, 2, 3);
+ Double result = mq.getDistinctRowCount(rel, groupKey, null);
+ // all rows are different
+ assertThat(result, is(4D));
+
+ groupKey = ImmutableBitSet.of(1, 2);
+ result = mq.getDistinctRowCount(rel, groupKey, null);
+ // rows 2 and 4 are the same in the specified columns
+ assertThat(result, is(3D));
+
+ groupKey = ImmutableBitSet.of(0);
+ result = mq.getDistinctRowCount(rel, groupKey, null);
+ // rows 2 and 3 are the same in the specified columns
+ assertThat(result, is(3D));
+
+ groupKey = ImmutableBitSet.of(3);
+ result = mq.getDistinctRowCount(rel, groupKey, null);
+ // the last column has 2 distinct values: 6 and null
+ assertThat(result, is(2D));
+ }
+
+ @Test void testDistinctRowCountValues() {
RelNode rel = convertSql("select * from emp where deptno = 10");
final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
ImmutableBitSet groupKey =