IMPALA-3897 Codegen null-aware constant in PHJ::ProcessBuildBatch() This change codegen outs a branch in ProcessBuildBatch(). This branch never gets executed for most of the join types except NULL_AWARE_LEFT_ANTI_JOIN. The branch itself is not expensive to execute, but it will reduce codegen time by removing the dead code inside the branch for almost all join modes.
Change-Id: I06acbebc9d2d23bef4734b480a5d3ce41680ea70 Reviewed-on: http://gerrit.cloudera.org:8080/7849 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4e842620 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4e842620 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4e842620 Branch: refs/heads/master Commit: 4e84262074cab9cc8ad83a5a9d5bc42b4be42d76 Parents: e1ae988 Author: aphadke <[email protected]> Authored: Fri Aug 18 17:02:02 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue Sep 12 03:46:21 2017 +0000 ---------------------------------------------------------------------- be/src/codegen/gen_ir_descriptions.py | 2 +- be/src/exec/partitioned-hash-join-builder-ir.cc | 5 ++--- be/src/exec/partitioned-hash-join-builder.cc | 15 ++++++++++++--- be/src/exec/partitioned-hash-join-builder.h | 8 +++++--- 4 files changed, 20 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/codegen/gen_ir_descriptions.py ---------------------------------------------------------------------- diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py index 668ae24..75d233c 100755 --- a/be/src/codegen/gen_ir_descriptions.py +++ b/be/src/codegen/gen_ir_descriptions.py @@ -99,7 +99,7 @@ ir_functions = [ ["HASH_FNV", "IrFnvHash"], ["HASH_MURMUR", "IrMurmurHash"], ["PHJ_PROCESS_BUILD_BATCH", - "_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEb"], + "_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEbb"], ["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN", "_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi0EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"], ["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN", http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/partitioned-hash-join-builder-ir.cc b/be/src/exec/partitioned-hash-join-builder-ir.cc index e15e116..b9c2cc3 100644 --- a/be/src/exec/partitioned-hash-join-builder-ir.cc +++ b/be/src/exec/partitioned-hash-join-builder-ir.cc @@ -37,15 +37,14 @@ inline bool PhjBuilder::AppendRow( } Status PhjBuilder::ProcessBuildBatch( - RowBatch* build_batch, HashTableCtx* ctx, bool build_filters) { + RowBatch* build_batch, HashTableCtx* ctx, bool build_filters, bool is_null_aware) { Status status; HashTableCtx::ExprValuesCache* expr_vals_cache = ctx->expr_values_cache(); expr_vals_cache->Reset(); FOREACH_ROW(build_batch, 0, build_batch_iter) { TupleRow* build_row = build_batch_iter.Get(); if (!ctx->EvalAndHashBuild(build_row)) { - if (null_aware_partition_ != NULL) { - // TODO: remove with codegen/template + if (is_null_aware) { // If we are NULL aware and this build row has NULL in the eq join slot, // append it to the null_aware partition. We will need it later. if (UNLIKELY( http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/partitioned-hash-join-builder.cc b/be/src/exec/partitioned-hash-join-builder.cc index 0c04541..d292c59 100644 --- a/be/src/exec/partitioned-hash-join-builder.cc +++ b/be/src/exec/partitioned-hash-join-builder.cc @@ -191,14 +191,18 @@ Status PhjBuilder::Send(RuntimeState* state, RowBatch* batch) { SCOPED_TIMER(partition_build_rows_timer_); bool build_filters = ht_ctx_->level() == 0 && filter_ctxs_.size() > 0; if (process_build_batch_fn_ == NULL) { - RETURN_IF_ERROR(ProcessBuildBatch(batch, ht_ctx_.get(), build_filters)); + RETURN_IF_ERROR(ProcessBuildBatch(batch, ht_ctx_.get(), build_filters, + join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)); + } else { DCHECK(process_build_batch_fn_level0_ != NULL); if (ht_ctx_->level() == 0) { RETURN_IF_ERROR( - process_build_batch_fn_level0_(this, batch, ht_ctx_.get(), build_filters)); + process_build_batch_fn_level0_(this, batch, ht_ctx_.get(), build_filters, + join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)); } else { - RETURN_IF_ERROR(process_build_batch_fn_(this, batch, ht_ctx_.get(), build_filters)); + RETURN_IF_ERROR(process_build_batch_fn_(this, batch, ht_ctx_.get(), build_filters, + join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)); } } @@ -807,6 +811,11 @@ Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen, DCHECK_EQ(replaced_constants.stores_tuples, 0); DCHECK_EQ(replaced_constants.quadratic_probing, 0); + Value* is_null_aware_arg = codegen->GetArgument(process_build_batch_fn, 5); + is_null_aware_arg->replaceAllUsesWith( + ConstantInt::get(Type::getInt1Ty(codegen->context()), + join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)); + Function* process_build_batch_fn_level0 = codegen->CloneFunction(process_build_batch_fn); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4e842620/be/src/exec/partitioned-hash-join-builder.h ---------------------------------------------------------------------- diff --git a/be/src/exec/partitioned-hash-join-builder.h b/be/src/exec/partitioned-hash-join-builder.h index eba9e9f..2c52988 100644 --- a/be/src/exec/partitioned-hash-join-builder.h +++ b/be/src/exec/partitioned-hash-join-builder.h @@ -290,9 +290,11 @@ class PhjBuilder : public DataSink { Status CreateAndPreparePartition(int level, Partition** partition) WARN_UNUSED_RESULT; /// Reads the rows in build_batch and partitions them into hash_partitions_. If - /// 'build_filters' is true, runtime filters are populated. + /// 'build_filters' is true, runtime filters are populated. 'is_null_aware' is + /// set to true if the join type is a null aware join. Status ProcessBuildBatch( - RowBatch* build_batch, HashTableCtx* ctx, bool build_filters) WARN_UNUSED_RESULT; + RowBatch* build_batch, HashTableCtx* ctx, bool build_filters, + bool is_null_aware) WARN_UNUSED_RESULT; /// Append 'row' to 'stream'. In the common case, appending the row to the stream /// immediately succeeds. Otherwise this function falls back to the slower path of @@ -491,7 +493,7 @@ class PhjBuilder : public DataSink { /// and is used when the partition level is 0, otherwise xxx_fn_ uses murmur hash and is /// used for subsequent levels. typedef Status (*ProcessBuildBatchFn)( - PhjBuilder*, RowBatch*, HashTableCtx*, bool build_filters); + PhjBuilder*, RowBatch*, HashTableCtx*, bool build_filters, bool is_null_aware); /// Jitted ProcessBuildBatch function pointers. NULL if codegen is disabled. ProcessBuildBatchFn process_build_batch_fn_; ProcessBuildBatchFn process_build_batch_fn_level0_;
