Github user gparai commented on a diff in the pull request:
https://github.com/apache/drill/pull/729#discussion_r100414561
--- Diff:
exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdRowCount.java
---
@@ -17,32 +17,81 @@
******************************************************************************/
package org.apache.drill.exec.planner.cost;
+import java.io.IOException;
+
+import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
-import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdRowCount;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.drill.exec.planner.common.DrillFilterRelBase;
+import org.apache.drill.exec.planner.common.DrillRelOptUtil;
+import org.apache.drill.exec.planner.logical.DrillTable;
+import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
+import org.apache.drill.exec.store.parquet.ParquetGroupScan;
public class DrillRelMdRowCount extends RelMdRowCount{
private static final DrillRelMdRowCount INSTANCE = new
DrillRelMdRowCount();
public static final RelMetadataProvider SOURCE =
ReflectiveRelMetadataProvider.reflectiveSource(BuiltInMethod.ROW_COUNT.method,
INSTANCE);
@Override
- public Double getRowCount(Aggregate rel) {
- ImmutableBitSet groupKey = ImmutableBitSet.range(rel.getGroupCount());
-
- if (groupKey.isEmpty()) {
- return 1.0;
+ public Double getRowCount(RelNode rel) {
+ if (rel instanceof TableScan) {
+ return getRowCount((TableScan) rel);
+ } else if (rel instanceof DrillFilterRelBase) {
+ return getRowCount((DrillFilterRelBase) rel);
} else {
return super.getRowCount(rel);
}
}
- @Override
- public Double getRowCount(Filter rel) {
+ private Double getRowCount(DrillFilterRelBase rel) {
+ if (DrillRelOptUtil.guessRows(rel)) {
+ return super.getRowCount(rel);
+ }
+ // Need capped selectivity estimates. See the Filter getRows() method
return rel.getRows();
}
+
+ private Double getRowCount(TableScan rel) {
+ DrillTable table;
+ if (DrillRelOptUtil.guessRows(rel)) {
+ return super.getRowCount(rel);
+ }
+ table = rel.getTable().unwrap(DrillTable.class);
+ if (table == null) {
+ table =
rel.getTable().unwrap(DrillTranslatableTable.class).getDrillTable();
+ }
+ // Return rowcount from statistics, if available. Otherwise, delegate
to parent.
+ try {
+ if (table != null
+ && table.getStatsTable() != null
+ /* For ParquetGroupScan rely on accurate count from the scan
instead of
+ * statistics since partition pruning/filter pushdown might have
occurred.
+ * The other way would be to iterate over the rowgroups present
in the
+ * ParquetGroupScan to obtain the rowcount.
+ */
+ && !(table.getGroupScan() instanceof ParquetGroupScan)) {
--- End diff --
Done
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---