Repository: incubator-impala
Updated Branches:
  refs/heads/master 646920810 -> dd340b881


IMPALA-3360: Codegen inserting into runtime filters

This patch codegens PhjBuilder::InsertRuntimeFilters() and
FilterContext::Insert().

This allows us to unroll the loop over all the filters in
PhjBuilder::ProcessBuildBatch(), eliminate the branch on type that
happens in RawValue::GetHashValue(), and eliminate the AVX check
that happens in BloomFilter::Insert().

Testing:
- Ran existing runtime filter tests.
- Ran perf tests locally (all avg. over three runs):
  - Four way self join on tpch_parquet.lineitem. Should be a good case
    for this as there's several large hash join build sides that will
    benefit from the codegen. Total query running time improved ~7%
    (from 16.07s to 14.91s).
  - Single join of tpch_parquet.lineitem against a selectively
    filtered tpch_parquet.lineitem. Should be a bad case for this
    patch, as the build side of the join is very small. Total query
    running time regressed by about ~2% (from 0.73s to 0.75s) due to
    an increase in codegen time (from 295ms to 309ms for the fragment
    containing the hash join).

Change-Id: I79cf23ad92dadaab996a50a2ca07ef9ebe8639bb
Reviewed-on: http://gerrit.cloudera.org:8080/8029
Reviewed-by: Thomas Tauber-Marshall <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/79dc220b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/79dc220b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/79dc220b

Branch: refs/heads/master
Commit: 79dc220bd75eb5dc333aeeff3f65fc5dbfe3a6e8
Parents: 6469208
Author: Thomas Tauber-Marshall <[email protected]>
Authored: Wed Sep 6 12:29:38 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Mon Sep 25 19:37:20 2017 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py           |   6 +-
 be/src/codegen/impala-ir.cc                     |   1 +
 be/src/exec/filter-context.cc                   | 162 +++++++++++++++++++
 be/src/exec/filter-context.h                    |   9 +-
 be/src/exec/partitioned-hash-join-builder-ir.cc |   3 +-
 be/src/exec/partitioned-hash-join-builder.cc    |  85 +++++++++-
 be/src/exec/partitioned-hash-join-builder.h     |  15 +-
 be/src/util/CMakeLists.txt                      |   1 +
 be/src/util/bloom-filter-ir.cc                  |  32 ++++
 be/src/util/bloom-filter.h                      |   6 +
 10 files changed, 311 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py 
b/be/src/codegen/gen_ir_descriptions.py
index 75d233c..5e1ce43 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -193,6 +193,8 @@ ir_functions = [
   ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"],
   ["RAW_VALUE_COMPARE",
    "_ZN6impala8RawValue7CompareEPKvS2_RKNS_10ColumnTypeE"],
+  ["RAW_VALUE_GET_HASH_VALUE",
+   "_ZN6impala8RawValue12GetHashValueEPKvRKNS_10ColumnTypeEj"],
   ["TOPN_NODE_INSERT_BATCH",
    "_ZN6impala8TopNNode11InsertBatchEPNS_8RowBatchE"],
   ["MEMPOOL_ALLOCATE",
@@ -202,7 +204,9 @@ ir_functions = [
   ["RUNTIME_FILTER_EVAL",
    "_ZNK6impala13RuntimeFilter4EvalEPvRKNS_10ColumnTypeE"],
   ["UNION_MATERIALIZE_BATCH",
-  "_ZN6impala9UnionNode16MaterializeBatchEPNS_8RowBatchEPPh"]
+  "_ZN6impala9UnionNode16MaterializeBatchEPNS_8RowBatchEPPh"],
+  ["BLOOM_FILTER_INSERT_NO_AVX2", "_ZN6impala11BloomFilter12InsertNoAvx2Ej"],
+  ["BLOOM_FILTER_INSERT_AVX2", "_ZN6impala11BloomFilter10InsertAvx2Ej"]
 ]
 
 enums_preamble = '\

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/codegen/impala-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/impala-ir.cc b/be/src/codegen/impala-ir.cc
index 2992849..24e0ce7 100644
--- a/be/src/codegen/impala-ir.cc
+++ b/be/src/codegen/impala-ir.cc
@@ -57,6 +57,7 @@
 #include "runtime/raw-value-ir.cc"
 #include "runtime/runtime-filter-ir.cc"
 #include "udf/udf-ir.cc"
+#include "util/bloom-filter-ir.cc"
 #include "util/hash-util-ir.cc"
 
 #pragma clang diagnostic pop

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/exec/filter-context.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/filter-context.cc b/be/src/exec/filter-context.cc
index 0eee704..ecf744a 100644
--- a/be/src/exec/filter-context.cc
+++ b/be/src/exec/filter-context.cc
@@ -214,3 +214,165 @@ Status FilterContext::CodegenEval(LlvmCodeGen* codegen, 
ScalarExpr* filter_expr,
   return Status::OK();
 }
 
+// An example of the generated code for TPCH-Q2: RF002 -> n_regionkey
+//
+// @expr_type_arg = constant %"struct.impala::ColumnType" { i32 4, i32 -1, i32 
-1,
+//     i32 -1, %"class.std::vector.422" zeroinitializer,
+//     %"class.std::vector.101" zeroinitializer }
+//
+// define void @FilterContextInsert(%"struct.impala::FilterContext"* %this,
+//     %"class.impala::TupleRow"* %row) #43 {
+// entry:
+//   %0 = alloca i16
+//   %local_bloom_filter_ptr = getelementptr inbounds 
%"struct.impala::FilterContext",
+//       %"struct.impala::FilterContext"* %this, i32 0, i32 3
+//   %local_bloom_filter_arg = load %"class.impala::BloomFilter"*,
+//       %"class.impala::BloomFilter"** %local_bloom_filter_ptr
+//   %bloom_is_null = icmp eq %"class.impala::BloomFilter"* 
%local_bloom_filter_arg, null
+//   br i1 %bloom_is_null, label %bloom_is_null1, label %bloom_not_null
+//
+// bloom_not_null:                                   ; preds = %entry
+//   %expr_eval_ptr = getelementptr inbounds %"struct.impala::FilterContext",
+//       %"struct.impala::FilterContext"* %this, i32 0, i32 0
+//   %expr_eval_arg = load %"class.impala::ScalarExprEvaluator"*,
+//       %"class.impala::ScalarExprEvaluator"** %expr_eval_ptr
+//   %result = call i32 @GetSlotRef.46(
+//       %"class.impala::ScalarExprEvaluator"* %expr_eval_arg,
+//       %"class.impala::TupleRow"* %row)
+//   %is_null = trunc i32 %result to i1
+//   br i1 %is_null, label %val_is_null, label %val_not_null
+//
+// bloom_is_null1:                                   ; preds = %entry
+//   ret void
+//
+// val_not_null:                                     ; preds = %bloom_not_null
+//   %1 = ashr i32 %result, 16
+//   %2 = trunc i32 %1 to i16
+//   store i16 %2, i16* %0
+//   %native_ptr = bitcast i16* %0 to i8*
+//   br label %insert_filter
+//
+// val_is_null:                                      ; preds = %bloom_not_null
+//   br label %insert_filter
+//
+// insert_filter:                                    ; preds = %val_not_null, 
%val_is_null
+//   %val_ptr_phi = phi i8* [ %native_ptr, %val_not_null ], [ null, 
%val_is_null ]
+//   %hash_value = call i32 
@_ZN6impala8RawValue12GetHashValueEPKvRKNS_10ColumnTypeEj(
+//       i8* %val_ptr_phi, %"struct.impala::ColumnType"* @expr_type_arg, i32 
1234)
+//   call void @_ZN6impala11BloomFilter9InsertAvxEj(
+//       %"class.impala::BloomFilter"* %local_bloom_filter_arg, i32 
%hash_value)
+//   ret void
+// }
+Status FilterContext::CodegenInsert(
+    LlvmCodeGen* codegen, ScalarExpr* filter_expr, Function** fn) {
+  LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  *fn = nullptr;
+  PointerType* this_type = codegen->GetPtrType(FilterContext::LLVM_CLASS_NAME);
+  PointerType* tuple_row_ptr_type = 
codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME);
+  LlvmCodeGen::FnPrototype prototype(
+      codegen, "FilterContextInsert", codegen->void_type());
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("this", this_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+
+  Value* args[2];
+  Function* insert_filter_fn = prototype.GeneratePrototype(&builder, args);
+  Value* this_arg = args[0];
+  Value* row_arg = args[1];
+
+  // Load 'local_bloom_filter' from 'this_arg' FilterContext object.
+  Value* local_bloom_filter_ptr =
+      builder.CreateStructGEP(nullptr, this_arg, 3, "local_bloom_filter_ptr");
+  Value* local_bloom_filter_arg =
+      builder.CreateLoad(local_bloom_filter_ptr, "local_bloom_filter_arg");
+
+  // Check if 'local_bloom_filter' is NULL and return if so.
+  Value* bloom_is_null = builder.CreateIsNull(local_bloom_filter_arg, 
"bloom_is_null");
+  BasicBlock* bloom_not_null_block =
+      BasicBlock::Create(context, "bloom_not_null", insert_filter_fn);
+  BasicBlock* bloom_is_null_block =
+      BasicBlock::Create(context, "bloom_is_null", insert_filter_fn);
+  builder.CreateCondBr(bloom_is_null, bloom_is_null_block, 
bloom_not_null_block);
+  builder.SetInsertPoint(bloom_is_null_block);
+  builder.CreateRetVoid();
+  builder.SetInsertPoint(bloom_not_null_block);
+
+  BasicBlock* val_not_null_block =
+      BasicBlock::Create(context, "val_not_null", insert_filter_fn);
+  BasicBlock* val_is_null_block =
+      BasicBlock::Create(context, "val_is_null", insert_filter_fn);
+  BasicBlock* insert_filter_block =
+      BasicBlock::Create(context, "insert_filter", insert_filter_fn);
+
+  Function* compute_fn;
+  RETURN_IF_ERROR(filter_expr->GetCodegendComputeFn(codegen, &compute_fn));
+  DCHECK(compute_fn != nullptr);
+
+  // Load 'expr_eval' from 'this_arg' FilterContext object.
+  Value* expr_eval_ptr = builder.CreateStructGEP(nullptr, this_arg, 0, 
"expr_eval_ptr");
+  Value* expr_eval_arg = builder.CreateLoad(expr_eval_ptr, "expr_eval_arg");
+
+  // Evaluate the row against the filter's expression.
+  Value* compute_fn_args[] = {expr_eval_arg, row_arg};
+  CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(
+      codegen, &builder, filter_expr->type(), compute_fn, compute_fn_args, 
"result");
+
+  // Check if the result is NULL
+  Value* val_is_null = result.GetIsNull();
+  builder.CreateCondBr(val_is_null, val_is_null_block, val_not_null_block);
+
+  // Set the pointer to NULL in case it evaluates to NULL.
+  builder.SetInsertPoint(val_is_null_block);
+  Value* null_ptr = codegen->null_ptr_value();
+  builder.CreateBr(insert_filter_block);
+
+  // Saves 'result' on the stack and passes a pointer to it to 
'insert_bloom_filter_fn'.
+  builder.SetInsertPoint(val_not_null_block);
+  Value* native_ptr = result.ToNativePtr();
+  native_ptr = builder.CreatePointerCast(native_ptr, codegen->ptr_type(), 
"native_ptr");
+  builder.CreateBr(insert_filter_block);
+
+  // Get the arguments in place to call 'get_hash_value_fn'.
+  builder.SetInsertPoint(insert_filter_block);
+  PHINode* val_ptr_phi = builder.CreatePHI(codegen->ptr_type(), 2, 
"val_ptr_phi");
+  val_ptr_phi->addIncoming(native_ptr, val_not_null_block);
+  val_ptr_phi->addIncoming(null_ptr, val_is_null_block);
+
+  // Create a global constant of the filter expression's ColumnType. It needs 
to be a
+  // constant for constant propagation and dead code elimination in 
'get_hash_value_fn'.
+  Type* col_type = codegen->GetType(ColumnType::LLVM_CLASS_NAME);
+  Constant* expr_type_arg = codegen->ConstantToGVPtr(
+      col_type, filter_expr->type().ToIR(codegen), "expr_type_arg");
+
+  // Call RawValue::GetHashValue() on the result of the filter's expression.
+  Value* seed_arg =
+      codegen->GetIntConstant(TYPE_INT, RuntimeFilterBank::DefaultHashSeed());
+  Value* get_hash_value_args[] = {val_ptr_phi, expr_type_arg, seed_arg};
+  Function* get_hash_value_fn =
+      codegen->GetFunction(IRFunction::RAW_VALUE_GET_HASH_VALUE, false);
+  DCHECK(get_hash_value_fn != nullptr);
+  Value* hash_value =
+      builder.CreateCall(get_hash_value_fn, get_hash_value_args, "hash_value");
+
+  // Call Insert() on the bloom filter.
+  Value* insert_args[] = {local_bloom_filter_arg, hash_value};
+  Function* insert_bloom_filter_fn;
+  if (CpuInfo::IsSupported(CpuInfo::AVX2)) {
+    insert_bloom_filter_fn =
+        codegen->GetFunction(IRFunction::BLOOM_FILTER_INSERT_AVX2, false);
+  } else {
+    insert_bloom_filter_fn =
+        codegen->GetFunction(IRFunction::BLOOM_FILTER_INSERT_NO_AVX2, false);
+  }
+
+  DCHECK(insert_bloom_filter_fn != nullptr);
+  builder.CreateCall(insert_bloom_filter_fn, insert_args);
+  builder.CreateRetVoid();
+
+  *fn = codegen->FinalizeFunction(insert_filter_fn);
+  if (*fn == NULL) {
+    return Status("Codegen'ed FilterContext::Insert() fails verification, see 
log");
+  }
+  return Status::OK();
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/exec/filter-context.h
----------------------------------------------------------------------
diff --git a/be/src/exec/filter-context.h b/be/src/exec/filter-context.h
index fa95b91..81a3889 100644
--- a/be/src/exec/filter-context.h
+++ b/be/src/exec/filter-context.h
@@ -115,7 +115,14 @@ struct FilterContext {
   /// argument to RuntimeFilter::Eval() with a constant. On success, 'fn' is 
set to
   /// the generated function. On failure, an error status is returned.
   static Status CodegenEval(LlvmCodeGen* codegen, ScalarExpr* filter_expr,
-     llvm::Function** fn) WARN_UNUSED_RESULT;
+      llvm::Function** fn) WARN_UNUSED_RESULT;
+
+  /// Codegen Insert() by codegen'ing the expression 'filter_expr', replacing 
the type
+  /// argument to RawValue::GetHashValue() with a constant, and calling into 
the correct
+  /// version of BloomFilter::Insert(), depending on the presence of AVX.  On 
success,
+  /// 'fn' is set to the generated function. On failure, an error status is 
returned.
+  static Status CodegenInsert(LlvmCodeGen* codegen, ScalarExpr* filter_expr,
+      llvm::Function** fn) WARN_UNUSED_RESULT;
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/exec/partitioned-hash-join-builder-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder-ir.cc 
b/be/src/exec/partitioned-hash-join-builder-ir.cc
index b9c2cc3..8481212 100644
--- a/be/src/exec/partitioned-hash-join-builder-ir.cc
+++ b/be/src/exec/partitioned-hash-join-builder-ir.cc
@@ -57,8 +57,7 @@ Status PhjBuilder::ProcessBuildBatch(
     if (build_filters) {
       DCHECK_EQ(ctx->level(), 0)
           << "Runtime filters should not be built during repartitioning.";
-      // TODO: unroll loop and codegen expr evaluation and hashing 
(IMPALA-3360).
-      for (const FilterContext& ctx : filter_ctxs_) ctx.Insert(build_row);
+      InsertRuntimeFilters(build_row);
     }
     const uint32_t hash = expr_vals_cache->CurExprValuesHash();
     const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/exec/partitioned-hash-join-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.cc 
b/be/src/exec/partitioned-hash-join-builder.cc
index d292c59..6b7b791 100644
--- a/be/src/exec/partitioned-hash-join-builder.cc
+++ b/be/src/exec/partitioned-hash-join-builder.cc
@@ -48,10 +48,14 @@ static const string PREPARE_FOR_READ_FAILED_ERROR_MSG =
 using namespace impala;
 using llvm::ConstantInt;
 using llvm::Function;
+using llvm::LLVMContext;
+using llvm::PointerType;
 using llvm::Type;
 using llvm::Value;
 using strings::Substitute;
 
+const char* PhjBuilder::LLVM_CLASS_NAME = "class.impala::PhjBuilder";
+
 PhjBuilder::PhjBuilder(int join_node_id, TJoinOp::type join_op,
     const RowDescriptor* probe_row_desc, const RowDescriptor* build_row_desc,
     RuntimeState* state, BufferPool::ClientHandle* buffer_pool_client,
@@ -491,6 +495,10 @@ void PhjBuilder::AllocateRuntimeFilters() {
   }
 }
 
+void PhjBuilder::InsertRuntimeFilters(TupleRow* build_row) noexcept {
+  for (const FilterContext& ctx : filter_ctxs_) ctx.Insert(build_row);
+}
+
 void PhjBuilder::PublishRuntimeFilters(int64_t num_build_rows) {
   int32_t num_enabled_filters = 0;
   // Use 'num_build_rows' to estimate FP-rate of each Bloom filter, and publish
@@ -761,10 +769,14 @@ void PhjBuilder::Codegen(LlvmCodeGen* codegen) {
   Function* eval_build_row_fn;
   codegen_status.MergeStatus(ht_ctx_->CodegenEvalRow(codegen, true, 
&eval_build_row_fn));
 
+  Function* insert_filters_fn;
+  codegen_status.MergeStatus(
+      CodegenInsertRuntimeFilters(codegen, filter_exprs_, &insert_filters_fn));
+
   if (codegen_status.ok()) {
     TPrefetchMode::type prefetch_mode = 
runtime_state_->query_options().prefetch_mode;
-    build_codegen_status =
-        CodegenProcessBuildBatch(codegen, hash_fn, murmur_hash_fn, 
eval_build_row_fn);
+    build_codegen_status = CodegenProcessBuildBatch(
+        codegen, hash_fn, murmur_hash_fn, eval_build_row_fn, 
insert_filters_fn);
     insert_codegen_status = CodegenInsertBatch(codegen, hash_fn, 
murmur_hash_fn,
         eval_build_row_fn, prefetch_mode);
   } else {
@@ -788,8 +800,8 @@ string PhjBuilder::DebugString() const {
   return ss.str();
 }
 
-Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen,
-    Function* hash_fn, Function* murmur_hash_fn, Function* eval_row_fn) {
+Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen, Function* 
hash_fn,
+    Function* murmur_hash_fn, Function* eval_row_fn, Function* 
insert_filters_fn) {
   Function* process_build_batch_fn =
       codegen->GetFunction(IRFunction::PHJ_PROCESS_BUILD_BATCH, true);
   DCHECK(process_build_batch_fn != NULL);
@@ -799,6 +811,10 @@ Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* 
codegen,
       codegen->ReplaceCallSites(process_build_batch_fn, eval_row_fn, 
"EvalBuildRow");
   DCHECK_EQ(replaced, 1);
 
+  replaced = codegen->ReplaceCallSites(
+      process_build_batch_fn, insert_filters_fn, "InsertRuntimeFilters");
+  DCHECK_EQ(replaced, 1);
+
   // Replace some hash table parameters with constants.
   HashTableCtx::HashTableReplacedConstants replaced_constants;
   const bool stores_duplicates = true;
@@ -924,3 +940,64 @@ Status PhjBuilder::CodegenInsertBatch(LlvmCodeGen* 
codegen, Function* hash_fn,
       insert_batch_fn_level0, 
reinterpret_cast<void**>(&insert_batch_fn_level0_));
   return Status::OK();
 }
+
+// An example of the generated code for a query with two filters built by this 
node.
+//
+// ; Function Attrs: noinline
+// define void @InsertRuntimeFilters(%"class.impala::PhjBuilder"* %this,
+//     %"class.impala::TupleRow"* %row) #46 {
+// entry:
+//   call void @FilterContextInsert(%"struct.impala::FilterContext"* inttoptr (
+//       i64 197870464 to %"struct.impala::FilterContext"*),
+//       %"class.impala::TupleRow"* %row)
+//   call void @FilterContextInsert.14(%"struct.impala::FilterContext"* 
inttoptr (
+//       i64 197870496 to %"struct.impala::FilterContext"*),
+//       %"class.impala::TupleRow"* %row)
+//   ret void
+// }
+Status PhjBuilder::CodegenInsertRuntimeFilters(
+    LlvmCodeGen* codegen, const vector<ScalarExpr*>& filter_exprs, Function** 
fn) {
+  LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  *fn = nullptr;
+  Type* this_type = codegen->GetPtrType(PhjBuilder::LLVM_CLASS_NAME);
+  PointerType* tuple_row_ptr_type = 
codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME);
+  LlvmCodeGen::FnPrototype prototype(
+      codegen, "InsertRuntimeFilters", codegen->void_type());
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("this", this_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+
+  Value* args[2];
+  Function* insert_runtime_filters_fn = prototype.GeneratePrototype(&builder, 
args);
+  Value* row_arg = args[1];
+
+  int num_filters = filter_exprs.size();
+  for (int i = 0; i < num_filters; ++i) {
+    Function* insert_fn;
+    RETURN_IF_ERROR(FilterContext::CodegenInsert(codegen, filter_exprs_[i], 
&insert_fn));
+    PointerType* filter_context_type =
+        codegen->GetPtrType(FilterContext::LLVM_CLASS_NAME);
+    Value* filter_context_ptr =
+        codegen->CastPtrToLlvmPtr(filter_context_type, &filter_ctxs_[i]);
+
+    Value* insert_args[] = {filter_context_ptr, row_arg};
+    builder.CreateCall(insert_fn, insert_args);
+  }
+
+  builder.CreateRetVoid();
+
+  if (num_filters > 0) {
+    // Don't inline this function to avoid code bloat in ProcessBuildBatch().
+    // If there is any filter, InsertRuntimeFilters() is large enough to not 
benefit
+    // much from inlining.
+    insert_runtime_filters_fn->addFnAttr(llvm::Attribute::NoInline);
+  }
+
+  *fn = codegen->FinalizeFunction(insert_runtime_filters_fn);
+  if (*fn == nullptr) {
+    return Status("Codegen'd PhjBuilder::InsertRuntimeFilters() failed "
+                  "verification, see log");
+  }
+  return Status::OK();
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/exec/partitioned-hash-join-builder.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.h 
b/be/src/exec/partitioned-hash-join-builder.h
index 2c52988..c123a9b 100644
--- a/be/src/exec/partitioned-hash-join-builder.h
+++ b/be/src/exec/partitioned-hash-join-builder.h
@@ -348,6 +348,10 @@ class PhjBuilder : public DataSink {
   /// phase.
   void AllocateRuntimeFilters();
 
+  /// Iterates over the runtime filters in filters_ and inserts each row into 
each filter.
+  /// This is replaced at runtime with code generated by 
CodegenInsertRuntimeFilters().
+  void InsertRuntimeFilters(TupleRow* build_row) noexcept;
+
   /// Publish the runtime filters to the fragment-local RuntimeFilterBank.
   /// 'num_build_rows' is used to determine whether the computed filters have 
an
   /// unacceptably high false-positive rate.
@@ -356,7 +360,8 @@ class PhjBuilder : public DataSink {
   /// Codegen processing build batches. Identical signature to 
ProcessBuildBatch().
   /// Returns non-OK status if codegen was not possible.
   Status CodegenProcessBuildBatch(LlvmCodeGen* codegen, llvm::Function* 
hash_fn,
-      llvm::Function* murmur_hash_fn, llvm::Function* eval_row_fn) 
WARN_UNUSED_RESULT;
+      llvm::Function* murmur_hash_fn, llvm::Function* eval_row_fn,
+      llvm::Function* insert_filters_fn) WARN_UNUSED_RESULT;
 
   /// Codegen inserting batches into a partition's hash table. Identical 
signature to
   /// Partition::InsertBatch(). Returns non-OK if codegen was not possible.
@@ -364,6 +369,11 @@ class PhjBuilder : public DataSink {
       llvm::Function* murmur_hash_fn, llvm::Function* eval_row_fn,
       TPrefetchMode::type prefetch_mode) WARN_UNUSED_RESULT;
 
+  /// Codegen inserting rows into runtime filters. Identical signature to
+  /// InsertRuntimeFilters(). Returns non-OK if codegen was not possible.
+  Status CodegenInsertRuntimeFilters(
+      LlvmCodeGen* codegen, const vector<ScalarExpr*>& filter_exprs, 
llvm::Function** fn);
+
   RuntimeState* const runtime_state_;
 
   // The ID of the plan join node this is associated with.
@@ -503,6 +513,9 @@ class PhjBuilder : public DataSink {
   /// Jitted Partition::InsertBatch() function pointers. NULL if codegen is 
disabled.
   InsertBatchFn insert_batch_fn_;
   InsertBatchFn insert_batch_fn_level0_;
+
+  /// Class name in LLVM IR.
+  static const char* LLVM_CLASS_NAME;
 };
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 947ab78..4ff03d6 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -32,6 +32,7 @@ add_library(Util
   bitmap.cc
   bit-util.cc
   bloom-filter.cc
+  bloom-filter-ir.cc
   coding-util.cc
   codec.cc
   common-metrics.cc

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/util/bloom-filter-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bloom-filter-ir.cc b/be/src/util/bloom-filter-ir.cc
new file mode 100644
index 0000000..4c56149
--- /dev/null
+++ b/be/src/util/bloom-filter-ir.cc
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/bloom-filter.h"
+
+#include "codegen/impala-ir.h"
+
+using namespace impala;
+
+void BloomFilter::InsertNoAvx2(const uint32_t hash) noexcept {
+  const uint32_t bucket_idx = HashUtil::Rehash32to32(hash) & directory_mask_;
+  BucketInsert(bucket_idx, hash);
+}
+
+void BloomFilter::InsertAvx2(const uint32_t hash) noexcept {
+  const uint32_t bucket_idx = HashUtil::Rehash32to32(hash) & directory_mask_;
+  BucketInsertAVX2(bucket_idx, hash);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/79dc220b/be/src/util/bloom-filter.h
----------------------------------------------------------------------
diff --git a/be/src/util/bloom-filter.h b/be/src/util/bloom-filter.h
index 913b331..4a50cfc 100644
--- a/be/src/util/bloom-filter.h
+++ b/be/src/util/bloom-filter.h
@@ -131,6 +131,12 @@ class BloomFilter {
 
   Bucket* directory_;
 
+  // Same as Insert(), but skips the CPU check and assumes that AVX is not 
available.
+  void InsertNoAvx2(const uint32_t hash) noexcept;
+
+  // Same as Insert(), but skips the CPU check and assumes that AVX is 
available.
+  void InsertAvx2(const uint32_t hash) noexcept;
+
   /// Does the actual work of Insert(). bucket_idx is the index of the bucket 
to insert
   /// into and 'hash' is the value passed to Insert().
   void BucketInsert(const uint32_t bucket_idx, const uint32_t hash) noexcept;

Reply via email to