This is an automated email from the ASF dual-hosted git repository.

boaz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 3603f240922389939ca0498face0eb255448ea53
Author: Venkata Jyothsna Donapati <[email protected]>
AuthorDate: Fri May 3 15:21:59 2019 -0700

    DRILL-7238: Fixed ConvertCountToDirectScan to handle non-existent columns
    
    closes #1781
---
 .../logical/ConvertCountToDirectScanRule.java      |  6 +++-
 .../logical/TestConvertCountToDirectScan.java      | 37 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
index 7375499..3a4e6ab 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
@@ -228,6 +228,7 @@ public class ConvertCountToDirectScanRule extends 
RelOptRule {
    *   2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit 
column>), the count = total row count
    *   3. For COUNT(nullable column), count = (total row count - column's null 
count)
    *   4. Also count can not be calculated for parition columns.
+   *   5. For the columns that are not present in the Summary(Non-existent 
columns), the count = 0
    *
    * @param settings planner options
    * @param metadataSummary metadata summary containing row counts and column 
counts
@@ -288,7 +289,10 @@ public class ConvertCountToDirectScanRule extends 
RelOptRule {
 
           Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = 
metadataSummary.getColumnTypeInfo(new 
Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
 
-         if (columnMetadata == null || columnMetadata.totalNullCount == 
Statistic.NO_COLUMN_STATS) {
+          if (columnMetadata == null) {
+            // If the column doesn't exist in the table, row count is set to 0
+            cnt = 0;
+          } else if (columnMetadata.totalNullCount == 
Statistic.NO_COLUMN_STATS) {
             // if column stats is not available don't apply this rule, return 
empty counts
             return ImmutableMap.of();
           } else {
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
index eaf9257..c35ab2d 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
@@ -340,4 +340,41 @@ public class TestConvertCountToDirectScan extends 
PlanTestBase {
       test("drop table if exists %s", tableName);
     }
   }
+
+  @Test
+  public void testCountsWithNonExColumn() throws Exception {
+    test("use dfs.tmp");
+    String tableName = "parquet_table_counts_nonex";
+
+    try {
+      test(String.format("create table `%s/1` as select * from 
cp.`parquet/alltypes_optional.parquet`", tableName));
+      test(String.format("create table `%s/2` as select * from 
cp.`parquet/alltypes_optional.parquet`", tableName));
+      test(String.format("create table `%s/3` as select * from 
cp.`parquet/alltypes_optional.parquet`", tableName));
+      test(String.format("create table `%s/4` as select * from 
cp.`parquet/alltypes_optional.parquet`", tableName));
+
+      test("refresh table metadata %s", tableName);
+
+      String sql = String.format("select\n" +
+              "count(*) as star_count,\n" +
+              "count(col_int) as int_column_count,\n" +
+              "count(col_vrchr) as vrchr_column_count,\n" +
+              "count(non_existent) as non_existent\n" +
+              "from %s", tableName);
+
+      String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
+      String recordReaderPattern = "DynamicPojoRecordReader";
+
+      testPlanMatchingPatterns(sql, new String[]{usedMetaSummaryPattern, 
recordReaderPattern});
+
+      testBuilder()
+              .sqlQuery(sql)
+              .unOrdered()
+              .baselineColumns("star_count", "int_column_count", 
"vrchr_column_count", "non_existent" )
+              .baselineValues(24L, 8L, 12L, 0L)
+              .go();
+
+    } finally {
+      test("drop table if exists %s", tableName);
+    }
+  }
 }

Reply via email to