Chun Chang created DRILL-1434: --------------------------------- Summary: count() on null value gives zero Key: DRILL-1434 URL: https://issues.apache.org/jira/browse/DRILL-1434 Project: Apache Drill Issue Type: Bug Components: Functions - Drill Affects Versions: 0.6.0 Reporter: Chun Chang
code base #Fri Sep 12 14:08:02 PDT 2014 git.commit.id.abbrev=9e16466 I have a parquet file (tpcds data) which contains null value on a column. The total count of the column: 0: jdbc:drill:schema=dfs> select count(ss_quantity) from `tpcds/p1/store_sales.parquet`; +------------+ | EXPR$0 | +------------+ | 2880404 | +------------+ The count without considering null is: 0: jdbc:drill:schema=dfs> select count(ss_quantity) from `tpcds/p1/store_sales.parquet` where ss_quantity is not null; +------------+ | EXPR$0 | +------------+ | 2750408 | +------------+ But the count for null value is zero: 0: jdbc:drill:schema=dfs> select count(ss_quantity) from `tpcds/p1/store_sales.parquet` where ss_quantity is null; +------------+ | EXPR$0 | +------------+ | 0 | +------------+ Here is the physical plan look like for this query: 0: jdbc:drill:schema=dfs> explain plan for select count(ss_quantity) from `tpcds/p1/store_sales.parquet` where ss_quantity is null; +------------+------------+ | text | json | +------------+------------+ | 00-00 Screen 00-01 StreamAgg(group=[{}], EXPR$0=[COUNT($0)]) 00-02 Filter(condition=[IS NULL($0)]) 00-03 ProducerConsumer 00-04 Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:/user/root/mondrian/tpcds/p1/store_sales.parquet]], selectionRoot=/user/root/mondrian/tpcds/p1/store_sales.parquet, columns=[SchemaPath [`ss_quantity`]]]]) | { "head" : { "version" : 1, "generator" : { "type" : "ExplainHandler", "info" : "" }, "type" : "APACHE_DRILL_PHYSICAL", "options" : [ ], "queue" : 0, "resultMode" : "EXEC" }, "graph" : [ { "pop" : "parquet-scan", "@id" : 4, "entries" : [ { "path" : "maprfs:/user/root/mondrian/tpcds/p1/store_sales.parquet" } ], "storage" : { "type" : "file", "enabled" : true, "connection" : "maprfs:///", "workspaces" : { "default" : { "location" : "/user/root/mondrian/", "writable" : true, "storageformat" : null }, "home" : { "location" : "/", "writable" : false, "storageformat" : null }, "root" : { "location" : "/", "writable" : false, "storageformat" : null }, "abhi" : { "location" : "/tables", "writable" : true, "storageformat" : "csv" }, "chun" : { "location" : "/drill/testdata/chun/", "writable" : false, "storageformat" : null }, "tmp" : { "location" : "/tmp", "writable" : true, "storageformat" : "csv" } }, "formats" : { "psv" : { "type" : "text", "extensions" : [ "tbl" ], "delimiter" : "|" }, "csv" : { "type" : "text", "extensions" : [ "csv" ], "delimiter" : "," }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], "delimiter" : "\t" }, "parquet" : { "type" : "parquet" }, "json" : { "type" : "json" } } }, "format" : { "type" : "parquet" }, "columns" : [ "`ss_quantity`" ], "selectionRoot" : "/user/root/mondrian/tpcds/p1/store_sales.parquet", "cost" : 2880404.0 }, { "pop" : "producer-consumer", "@id" : 3, "child" : 4, "size" : 10, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 2880404.0 }, { "pop" : "filter", "@id" : 2, "child" : 3, "expr" : "isnull(`ss_quantity`) ", "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 720101.0 }, { "pop" : "streaming-aggregate", "@id" : 1, "child" : 2, "keys" : [ ], "exprs" : [ { "ref" : "`EXPR$0`", "expr" : "count(`ss_quantity`) " } ], "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 1.0 }, { "pop" : "screen", "@id" : 0, "child" : 1, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 72010.1 } ] } | +------------+------------+ -- This message was sent by Atlassian JIRA (v6.3.4#6332)