IMPALA-4289: Mark agg slots of NDV() functions as non-nullable. This change might give a minor speedup to COMPUTE STATS and COMPUTE INCREMENTAL STATS. In any case, marking the slots non-nullable seems strictly better than leaving them nullable.
Testing: I ran our local testdata/compute-table-stats.sh and it succeeded. Change-Id: I1c05b8dfb797b2a42ee1a7bf14ad56bb83d2b1c5 Reviewed-on: http://gerrit.cloudera.org:8080/4707 Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com> Reviewed-by: Alex Behm <alex.b...@cloudera.com> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0b3efb19 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0b3efb19 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0b3efb19 Branch: refs/heads/hadoop-next Commit: 0b3efb19cc0d971a47d0b7d7928a2d311c13e65a Parents: f2780b5 Author: Alex Behm <alex.b...@cloudera.com> Authored: Wed Oct 12 16:57:49 2016 -0700 Committer: Internal Jenkins <cloudera-hud...@gerrit.cloudera.org> Committed: Thu Oct 13 04:31:42 2016 +0000 ---------------------------------------------------------------------- .../java/org/apache/impala/analysis/AggregateInfoBase.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0b3efb19/fe/src/main/java/org/apache/impala/analysis/AggregateInfoBase.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/AggregateInfoBase.java b/fe/src/main/java/org/apache/impala/analysis/AggregateInfoBase.java index bced878..51c56ae 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AggregateInfoBase.java +++ b/fe/src/main/java/org/apache/impala/analysis/AggregateInfoBase.java @@ -147,10 +147,13 @@ public abstract class AggregateInfoBase { slotDesc.setSourceExpr(aggExpr); } - // count(*) is non-nullable. - if (aggExpr.getFnName().getFunction().equals("count")) { + // COUNT(), NDV() and NDV_NO_FINALIZE() are non-nullable. The latter two are used + // by compute stats and compute incremental stats, respectively. + if (aggExpr.getFnName().getFunction().equals("count") + || aggExpr.getFnName().getFunction().equals("ndv") + || aggExpr.getFnName().getFunction().equals("ndv_no_finalize")) { // TODO: Consider making nullability a property of types or of builtin agg fns. - // row_number, rank, and dense_rank are non-nullable as well. + // row_number(), rank(), and dense_rank() are non-nullable as well. slotDesc.setIsNullable(false); } if (!isOutputTuple) {