This is an automated email from the ASF dual-hosted git repository. boaz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 3603f240922389939ca0498face0eb255448ea53 Author: Venkata Jyothsna Donapati <[email protected]> AuthorDate: Fri May 3 15:21:59 2019 -0700 DRILL-7238: Fixed ConvertCountToDirectScan to handle non-existent columns closes #1781 --- .../logical/ConvertCountToDirectScanRule.java | 6 +++- .../logical/TestConvertCountToDirectScan.java | 37 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java index 7375499..3a4e6ab 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java @@ -228,6 +228,7 @@ public class ConvertCountToDirectScanRule extends RelOptRule { * 2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>), the count = total row count * 3. For COUNT(nullable column), count = (total row count - column's null count) * 4. Also count can not be calculated for parition columns. + * 5. For the columns that are not present in the Summary(Non-existent columns), the count = 0 * * @param settings planner options * @param metadataSummary metadata summary containing row counts and column counts @@ -288,7 +289,10 @@ public class ConvertCountToDirectScanRule extends RelOptRule { Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath)); - if (columnMetadata == null || columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) { + if (columnMetadata == null) { + // If the column doesn't exist in the table, row count is set to 0 + cnt = 0; + } else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) { // if column stats is not available don't apply this rule, return empty counts return ImmutableMap.of(); } else { diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java index eaf9257..c35ab2d 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java @@ -340,4 +340,41 @@ public class TestConvertCountToDirectScan extends PlanTestBase { test("drop table if exists %s", tableName); } } + + @Test + public void testCountsWithNonExColumn() throws Exception { + test("use dfs.tmp"); + String tableName = "parquet_table_counts_nonex"; + + try { + test(String.format("create table `%s/1` as select * from cp.`parquet/alltypes_optional.parquet`", tableName)); + test(String.format("create table `%s/2` as select * from cp.`parquet/alltypes_optional.parquet`", tableName)); + test(String.format("create table `%s/3` as select * from cp.`parquet/alltypes_optional.parquet`", tableName)); + test(String.format("create table `%s/4` as select * from cp.`parquet/alltypes_optional.parquet`", tableName)); + + test("refresh table metadata %s", tableName); + + String sql = String.format("select\n" + + "count(*) as star_count,\n" + + "count(col_int) as int_column_count,\n" + + "count(col_vrchr) as vrchr_column_count,\n" + + "count(non_existent) as non_existent\n" + + "from %s", tableName); + + String usedMetaSummaryPattern = "usedMetadataSummaryFile = true"; + String recordReaderPattern = "DynamicPojoRecordReader"; + + testPlanMatchingPatterns(sql, new String[]{usedMetaSummaryPattern, recordReaderPattern}); + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("star_count", "int_column_count", "vrchr_column_count", "non_existent" ) + .baselineValues(24L, 8L, 12L, 0L) + .go(); + + } finally { + test("drop table if exists %s", tableName); + } + } }
