[1/3] incubator-impala git commit: IMPALA-3838, IMPALA-4495: Codegen EvalRuntimeFilters() and fixes filter stats updates

tarmstrong Wed, 23 Nov 2016 13:54:06 -0800

Repository: incubator-impala
Updated Branches:
  refs/heads/master 4db330e69 -> bbf5255d0



IMPALA-3838, IMPALA-4495: Codegen EvalRuntimeFilters() and fixes filter stats 
updates

This change codegens HdfsParquetScanner::EvalRuntimeFilters()
by unrolling its loop, codegen'ing the expression evaluation
of the runtime filter and replacing some type information with
constants in the hashing function of runtime filter to avoid
branching at runtime.

This change also fixes IMPALA-4495 by not counting a row as
'considered' in the filter stats before the filter arrives.
This avoids unnecessarily marking a runtime filter as
ineffective before it's even used.

With this change, TPCDS-Q88 improves by 13-14%.
primitive_broadcast_join_1 improves by 24%.

Change-Id: I27114869840e268d17e91d6e587ef811628e3837
Reviewed-on: http://gerrit.cloudera.org:8080/4833
Reviewed-by: Michael Ho <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1e306211
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1e306211
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1e306211

Branch: refs/heads/master
Commit: 1e306211d0eb511ff3e4f3f6f91d7e6fcd01af15
Parents: 4db330e
Author: Michael Ho <[email protected]>
Authored: Fri Oct 14 13:51:43 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Wed Nov 23 12:48:47 2016 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py           |   6 +-
 be/src/codegen/impala-ir.cc                     |   1 +
 be/src/codegen/llvm-codegen.cc                  |  12 +-
 be/src/codegen/llvm-codegen.h                   |  22 ++-
 be/src/exec/filter-context.cc                   | 147 ++++++++++++++++-
 be/src/exec/filter-context.h                    |  27 +++-
 be/src/exec/hdfs-parquet-scanner-ir.cc          |  22 ++-
 be/src/exec/hdfs-parquet-scanner.cc             | 156 +++++++++++++++----
 be/src/exec/hdfs-parquet-scanner.h              |  26 +++-
 be/src/exec/hdfs-scan-node-base.cc              |  22 ++-
 be/src/exec/hdfs-scan-node.cc                   |   8 +-
 be/src/exec/partitioned-hash-join-builder-ir.cc |  10 +-
 be/src/exec/partitioned-hash-join-builder.cc    |  12 +-
 be/src/runtime/CMakeLists.txt                   |   1 +
 be/src/runtime/raw-value-ir.cc                  |  57 +++++++
 be/src/runtime/raw-value.cc                     |  54 -------
 be/src/runtime/raw-value.h                      |   7 +-
 be/src/runtime/raw-value.inline.h               |   3 +-
 be/src/runtime/runtime-filter-bank.h            |   3 +-
 be/src/runtime/runtime-filter-ir.cc             |  31 ++++
 be/src/runtime/runtime-filter.cc                |   2 +
 be/src/runtime/runtime-filter.h                 |  15 +-
 be/src/runtime/runtime-filter.inline.h          |  11 --
 be/src/runtime/types.h                          |   4 +-
 be/src/util/bloom-filter.cc                     |  10 +-
 be/src/util/bloom-filter.h                      |  21 +--
 tests/query_test/test_tpch_queries.py           |   2 +-
 27 files changed, 518 insertions(+), 174 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py 
b/be/src/codegen/gen_ir_descriptions.py
index 09f3a20..44fa3cf 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -186,6 +186,8 @@ ir_functions = [
    "_ZNK6impala11HdfsScanner14GetConjunctCtxEi"],
   ["PROCESS_SCRATCH_BATCH",
    "_ZN6impala18HdfsParquetScanner19ProcessScratchBatchEPNS_8RowBatchE"],
+  ["PARQUET_SCANNER_EVAL_RUNTIME_FILTER",
+   "_ZN6impala18HdfsParquetScanner17EvalRuntimeFilterEiPNS_8TupleRowE"],
   ["STRING_TO_BOOL", "IrStringToBool"],
   ["STRING_TO_INT8", 
"_Z14IrStringToInt8PKciPN6impala12StringParser11ParseResultE"],
   ["STRING_TO_INT16", "IrStringToInt16"],
@@ -202,7 +204,9 @@ ir_functions = [
   ["MEMPOOL_ALLOCATE",
    "_ZN6impala7MemPool8AllocateILb0EEEPhli"],
   ["MEMPOOL_CHECKED_ALLOCATE",
-   "_ZN6impala7MemPool8AllocateILb1EEEPhli"]
+   "_ZN6impala7MemPool8AllocateILb1EEEPhli"],
+  ["RUNTIME_FILTER_EVAL",
+   "_ZNK6impala13RuntimeFilter4EvalEPvRKNS_10ColumnTypeE"]
 ]
 
 enums_preamble = '\

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/codegen/impala-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/impala-ir.cc b/be/src/codegen/impala-ir.cc
index 1711bac..2ef8790 100644
--- a/be/src/codegen/impala-ir.cc
+++ b/be/src/codegen/impala-ir.cc
@@ -55,6 +55,7 @@
 #include "exprs/utility-functions-ir.cc"
 #include "runtime/mem-pool.h"
 #include "runtime/raw-value-ir.cc"
+#include "runtime/runtime-filter-ir.cc"
 #include "udf/udf-ir.cc"
 #include "util/hash-util-ir.cc"
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 596f48a..b8b5db6 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -572,7 +572,7 @@ Value* LlvmCodeGen::CastPtrToLlvmPtr(Type* type, const 
void* ptr) {
   return ConstantExpr::getIntToPtr(const_int, type);
 }
 
-Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, uint64_t val) {
+Constant* LlvmCodeGen::GetIntConstant(PrimitiveType type, uint64_t val) {
   switch (type) {
     case TYPE_TINYINT:
       return ConstantInt::get(context(), APInt(8, val));
@@ -588,7 +588,7 @@ Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, 
uint64_t val) {
   }
 }
 
-Value* LlvmCodeGen::GetIntConstant(int num_bytes, uint64_t low_bits, uint64_t 
high_bits) {
+Constant* LlvmCodeGen::GetIntConstant(int num_bytes, uint64_t low_bits, 
uint64_t high_bits) {
   DCHECK_GE(num_bytes, 1);
   DCHECK_LE(num_bytes, 16);
   DCHECK(BitUtil::IsPowerOf2(num_bytes));
@@ -1485,6 +1485,14 @@ Value* LlvmCodeGen::GetPtrTo(LlvmBuilder* builder, 
Value* v, const char* name) {
   return ptr;
 }
 
+Constant* LlvmCodeGen::ConstantToGVPtr(Type* type, Constant* ir_constant,
+    const string& name) {
+  GlobalVariable* gv = new GlobalVariable(*module_, type, true,
+      GlobalValue::PrivateLinkage, ir_constant, name);
+  return ConstantExpr::getGetElementPtr(NULL, gv,
+      ArrayRef<Constant*>({GetIntConstant(TYPE_INT, 0)}));
+}
+
 }
 
 namespace boost {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index 8853d7e..03656c8 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -253,6 +253,12 @@ class LlvmCodeGen {
   /// Alloca's an instance of the appropriate pointer type and sets it to 
point at 'v'
   llvm::Value* GetPtrTo(LlvmBuilder* builder, llvm::Value* v, const char* name 
= "");
 
+  /// Creates a global value 'name' using constant 'ir_constant' and returns
+  /// a pointer to the global value. Useful for creating constant function 
arguments
+  /// which cannot be represented with primitive types (e.g. struct).
+  llvm::Constant* ConstantToGVPtr(llvm::Type* type, llvm::Constant* 
ir_constant,
+      const std::string& name);
+
   /// Returns reference to llvm context object.  Each LlvmCodeGen has its own
   /// context to allow multiple threads to be calling into llvm at the same 
time.
   llvm::LLVMContext& context() { return *context_.get(); }
@@ -298,6 +304,12 @@ class LlvmCodeGen {
   int ReplaceCallSitesWithBoolConst(llvm::Function* caller, bool constant,
       const std::string& target_name);
 
+  /// Replace calls to functions in 'caller' where the callee's name has 
'target_name'
+  /// as a substring. Calls to functions are replaced with the value 
'replacement'. The
+  /// return value is the number of calls replaced.
+  int ReplaceCallSitesWithValue(llvm::Function* caller, llvm::Value* 
replacement,
+      const std::string& target_name);
+
   /// Returns a copy of fn. The copy is added to the module.
   llvm::Function* CloneFunction(llvm::Function* fn);
 
@@ -407,13 +419,13 @@ class LlvmCodeGen {
   llvm::Value* CastPtrToLlvmPtr(llvm::Type* type, const void* ptr);
 
   /// Returns the constant 'val' of 'type'.
-  llvm::Value* GetIntConstant(PrimitiveType type, uint64_t val);
+  llvm::Constant* GetIntConstant(PrimitiveType type, uint64_t val);
 
   /// Returns a constant int of 'byte_size' bytes based on 'low_bits' and 
'high_bits'
   /// which stand for the lower and upper 64-bits of the constant 
respectively. For
   /// values less than or equal to 64-bits, 'high_bits' is not used. This 
function
   /// can generate constant up to 128-bit wide. 'byte_size' must be power of 2.
-  llvm::Value* GetIntConstant(int byte_size, uint64_t low_bits, uint64_t 
high_bits);
+  llvm::Constant* GetIntConstant(int byte_size, uint64_t low_bits, uint64_t 
high_bits);
 
   /// Initialise a constant global string and returns an i8* pointer to it.
   llvm::Value* GetStringConstant(LlvmBuilder* builder, char* data, int len);
@@ -556,12 +568,6 @@ class LlvmCodeGen {
   /// Clears generated hash fns.  This is only used for testing.
   void ClearHashFns();
 
-  /// Replace calls to functions in 'caller' where the callee's name has 
'target_name'
-  /// as a substring. Calls to functions are replaced with the value 
'replacement'. The
-  /// return value is the number of calls replaced.
-  int ReplaceCallSitesWithValue(llvm::Function* caller, llvm::Value* 
replacement,
-      const std::string& target_name);
-
   /// Finds call instructions in 'caller' where 'target_name' is a substring 
of the
   /// callee's name. Found instructions are appended to the 'results' vector.
   static void FindCallSites(llvm::Function* caller, const std::string& 
target_name,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/filter-context.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/filter-context.cc b/be/src/exec/filter-context.cc
index a3c0ee1..5c33ed4 100644
--- a/be/src/exec/filter-context.cc
+++ b/be/src/exec/filter-context.cc
@@ -17,16 +17,22 @@
 
 #include "exec/filter-context.h"
 
+#include "codegen/codegen-anyval.h"
+#include "runtime/runtime-filter.h"
+#include "runtime/tuple-row.h"
 #include "util/runtime-profile-counters.h"
 
 using namespace impala;
 using namespace strings;
+using namespace llvm;
 
 const std::string FilterStats::ROW_GROUPS_KEY = "RowGroups";
 const std::string FilterStats::FILES_KEY = "Files";
 const std::string FilterStats::SPLITS_KEY = "Splits";
 const std::string FilterStats::ROWS_KEY = "Rows";
 
+const char* FilterContext::LLVM_CLASS_NAME = "struct.impala::FilterContext";
+
 FilterStats::FilterStats(RuntimeProfile* runtime_profile, bool 
is_partition_filter) {
   DCHECK(runtime_profile != NULL);
   profile = runtime_profile;
@@ -64,5 +70,144 @@ void FilterStats::RegisterCounterGroup(const string& key) {
 Status FilterContext::CloneFrom(const FilterContext& from, RuntimeState* 
state) {
   filter = from.filter;
   stats = from.stats;
-  return from.expr->Clone(state, &expr);
+  return from.expr_ctx->Clone(state, &expr_ctx);
+}
+
+bool FilterContext::Eval(TupleRow* row) const noexcept {
+  void* e = expr_ctx->GetValue(row);
+  return filter->Eval(e, expr_ctx->root()->type());
+}
+
+void FilterContext::Insert(TupleRow* row) const noexcept {
+  if (local_bloom_filter == NULL) return;
+  void* e = expr_ctx->GetValue(row);
+  uint32_t filter_hash = RawValue::GetHashValue(
+      e, expr_ctx->root()->type(), RuntimeFilterBank::DefaultHashSeed());
+  local_bloom_filter->Insert(filter_hash);
 }
+
+// An example of the generated code for TPCH-Q2: RF002 -> n_regionkey
+//
+// @expr_type_arg = constant %"struct.impala::ColumnType" { i32 4, i32 -1, i32 
-1,
+//     i32 -1, %"class.std::vector.422" zeroinitializer,
+//     %"class.std::vector.101" zeroinitializer }
+//
+// ; Function Attrs: alwaysinline
+// define i1 @FilterContextEval(%"struct.impala::FilterContext"* %this,
+//                              %"class.impala::TupleRow"* %row) #34 {
+// entry:
+//   %0 = alloca i16
+//   %expr_ctx_ptr = getelementptr inbounds %"struct.impala::FilterContext",
+//       %"struct.impala::FilterContext"* %this, i32 0, i32 0
+//   %expr_ctx_arg = load %"class.impala::ExprContext"*,
+//       %"class.impala::ExprContext"** %expr_ctx_ptr
+//   %result = call i32 @GetSlotRef(%"class.impala::ExprContext"* 
%expr_ctx_arg,
+//       %"class.impala::TupleRow"* %row)
+//   %is_null1 = trunc i32 %result to i1
+//   br i1 %is_null1, label %is_null, label %not_null
+//
+// not_null:                                         ; preds = %entry
+//   %1 = ashr i32 %result, 16
+//   %2 = trunc i32 %1 to i16
+//   store i16 %2, i16* %0
+//   %native_ptr = bitcast i16* %0 to i8*
+//   br label %eval_filter
+//
+// is_null:                                          ; preds = %entry
+//   br label %eval_filter
+//
+// eval_filter:                                      ; preds = %not_null, 
%is_null
+//   %val_ptr_phi = phi i8* [ %native_ptr, %not_null ], [ null, %is_null ]
+//   %filter_ptr = getelementptr inbounds %"struct.impala::FilterContext",
+//       %"struct.impala::FilterContext"* %this, i32 0, i32 1
+//   %filter_arg = load %"class.impala::RuntimeFilter"*,
+//       %"class.impala::RuntimeFilter"** %filter_ptr
+//   %passed_filter = call i1 
@_ZNK6impala13RuntimeFilter4EvalEPvRKNS_10ColumnTypeE.3(
+//       %"class.impala::RuntimeFilter"* %filter_arg, i8* %val_ptr_phi,
+//       %"struct.impala::ColumnType"* @expr_type_arg)
+//   ret i1 %passed_filter
+// }
+Status FilterContext::CodegenEval(LlvmCodeGen* codegen, Function** fn) const {
+  LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  *fn = NULL;
+  PointerType* this_type = codegen->GetPtrType(FilterContext::LLVM_CLASS_NAME);
+  PointerType* tuple_row_ptr_type = 
codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME);
+  LlvmCodeGen::FnPrototype prototype(codegen, "FilterContextEval",
+      codegen->boolean_type());
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("this", this_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+
+  Value* args[2];
+  Function* eval_filter_fn = prototype.GeneratePrototype(&builder, args);
+  Value* this_arg = args[0];
+  Value* row_arg = args[1];
+
+  BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", 
eval_filter_fn);
+  BasicBlock* is_null_block = BasicBlock::Create(context, "is_null", 
eval_filter_fn);
+  BasicBlock* eval_filter_block =
+      BasicBlock::Create(context, "eval_filter", eval_filter_fn);
+
+  Expr* expr = expr_ctx->root();
+  Function* compute_fn;
+  RETURN_IF_ERROR(expr->GetCodegendComputeFn(codegen, &compute_fn));
+  DCHECK(compute_fn != NULL);
+
+  // The function for checking against the bloom filter for match.
+  Function* runtime_filter_fn =
+      codegen->GetFunction(IRFunction::RUNTIME_FILTER_EVAL, false);
+  DCHECK(runtime_filter_fn != NULL);
+
+  // Load 'expr_ctx' from 'this_arg' FilterContext object.
+  Value* expr_ctx_ptr = builder.CreateStructGEP(NULL, this_arg, 0, 
"expr_ctx_ptr");
+  Value* expr_ctx_arg = builder.CreateLoad(expr_ctx_ptr, "expr_ctx_arg");
+
+  // Evaluate the row against the filter's expression.
+  Value* compute_fn_args[] = {expr_ctx_arg, row_arg};
+  CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(codegen, &builder,
+      expr->type(), compute_fn, compute_fn_args, "result");
+
+  // Check if the result is NULL
+  Value* is_null = result.GetIsNull();
+  builder.CreateCondBr(is_null, is_null_block, not_null_block);
+
+  // Set the pointer to NULL in case it evaluates to NULL.
+  builder.SetInsertPoint(is_null_block);
+  Value* null_ptr = codegen->null_ptr_value();
+  builder.CreateBr(eval_filter_block);
+
+  // Saves 'result' on the stack and passes a pointer to it to 
'runtime_filter_fn'.
+  builder.SetInsertPoint(not_null_block);
+  Value* native_ptr = result.ToNativePtr();
+  native_ptr = builder.CreatePointerCast(native_ptr, codegen->ptr_type(), 
"native_ptr");
+  builder.CreateBr(eval_filter_block);
+
+  // Get the arguments in place to call 'runtime_filter_fn' to see if the row 
passes.
+  builder.SetInsertPoint(eval_filter_block);
+  PHINode* val_ptr_phi = builder.CreatePHI(codegen->ptr_type(), 2, 
"val_ptr_phi");
+  val_ptr_phi->addIncoming(native_ptr, not_null_block);
+  val_ptr_phi->addIncoming(null_ptr, is_null_block);
+
+  // Create a global constant of the filter expression's ColumnType. It needs 
to be a
+  // constant for constant propagation and dead code elimination in 
'runtime_filter_fn'.
+  Type* col_type = codegen->GetType(ColumnType::LLVM_CLASS_NAME);
+  Constant* expr_type_arg = codegen->ConstantToGVPtr(col_type, 
expr->type().ToIR(codegen),
+      "expr_type_arg");
+
+  // Load 'filter' from 'this_arg' FilterContext object.
+  Value* filter_ptr = builder.CreateStructGEP(NULL, this_arg, 1, "filter_ptr");
+  Value* filter_arg = builder.CreateLoad(filter_ptr, "filter_arg");
+
+  Value* run_filter_args[] = {filter_arg, val_ptr_phi, expr_type_arg};
+  Value* passed_filter =
+       builder.CreateCall(runtime_filter_fn, run_filter_args, "passed_filter");
+  builder.CreateRet(passed_filter);
+
+  *fn = codegen->FinalizeFunction(eval_filter_fn);
+  if (*fn == NULL) {
+    return Status("Codegen'ed FilterContext::Eval() fails verification, see 
log");
+  }
+  return Status::OK();
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/filter-context.h
----------------------------------------------------------------------
diff --git a/be/src/exec/filter-context.h b/be/src/exec/filter-context.h
index d684dc1..37d139d 100644
--- a/be/src/exec/filter-context.h
+++ b/be/src/exec/filter-context.h
@@ -28,7 +28,9 @@
 namespace impala {
 
 class BloomFilter;
+class LlvmCodeGen;
 class RuntimeFilter;
+class TupleRow;
 
 /// Container struct for per-filter statistics, with statistics for each 
granularity of
 /// set of rows to which a Runtimefilter might be applied. Common groupings 
are "Rows",
@@ -79,9 +81,13 @@ class FilterStats {
 /// to be applied in the context of a single thread.
 struct FilterContext {
   /// Expression which produces a value to test against the runtime filter.
-  ExprContext* expr;
+  /// This field is referenced in generated code so if the order of it changes
+  /// inside this struct, please update CodegenEval().
+  ExprContext* expr_ctx;
 
   /// Cache of filter from runtime filter bank.
+  /// The field is referenced in generated code so if the order of it changes
+  /// inside this struct, please update CodegenEval().
   const RuntimeFilter* filter;
 
   /// Statistics for this filter, owned by object pool.
@@ -90,12 +96,29 @@ struct FilterContext {
   /// Working copy of local bloom filter
   BloomFilter* local_bloom_filter;
 
+  /// Struct name in LLVM IR.
+  static const char* LLVM_CLASS_NAME;
+
   /// Clones this FilterContext for use in a multi-threaded context (i.e. by 
scanner
   /// threads).
   Status CloneFrom(const FilterContext& from, RuntimeState* state);
 
+  /// Evaluates 'row' on the expression in 'expr_ctx' with the resulting value 
being
+  /// checked against runtime filter 'filter' for matches. Returns true if 
'row' finds
+  /// a match in 'filter'. Returns false otherwise.
+  bool Eval(TupleRow* row) const noexcept;
+
+  /// Evaluates 'row' on the expression in 'expr_ctx' and hashes the resulting 
value.
+  /// The hash value is then used for setting some bits in 
'local_bloom_filter'.
+  void Insert(TupleRow* row) const noexcept;
+
+  /// Codegen Eval() by codegen'ing the expression evaluations and replacing 
the type
+  /// argument to RuntimeFilter::Eval() with a constant. On success, 'fn' is 
set to
+  /// the generated function. On failure, an error status is returned.
+  Status CodegenEval(LlvmCodeGen* codegen, llvm::Function** fn) const;
+
   FilterContext()
-      : expr(NULL), filter(NULL), local_bloom_filter(NULL) { }
+      : expr_ctx(NULL), filter(NULL), local_bloom_filter(NULL) { }
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/hdfs-parquet-scanner-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner-ir.cc 
b/be/src/exec/hdfs-parquet-scanner-ir.cc
index 1cd5252..c9cf1eb 100644
--- a/be/src/exec/hdfs-parquet-scanner-ir.cc
+++ b/be/src/exec/hdfs-parquet-scanner-ir.cc
@@ -17,13 +17,16 @@
 
 #include "exec/hdfs-parquet-scanner.h"
 
+#include "exec/filter-context.h"
 #include "exec/parquet-scratch-tuple-batch.h"
+#include "exprs/expr.h"
+#include "runtime/runtime-filter.h"
+#include "runtime/runtime-filter.inline.h"
 #include "runtime/tuple-row.h"
 
 using namespace impala;
 
 int HdfsParquetScanner::ProcessScratchBatch(RowBatch* dst_batch) {
-  const bool has_filters = !filter_ctxs_.empty();
   ExprContext* const* conjunct_ctxs = &(*scanner_conjunct_ctxs_)[0];
   const int num_conjuncts = scanner_conjunct_ctxs_->size();
 
@@ -48,7 +51,7 @@ int HdfsParquetScanner::ProcessScratchBatch(RowBatch* 
dst_batch) {
     scratch_tuple += tuple_size;
     // Evaluate runtime filters and conjuncts. Short-circuit the evaluation if
     // the filters/conjuncts are empty to avoid function calls.
-    if (has_filters && 
!EvalRuntimeFilters(reinterpret_cast<TupleRow*>(output_row))) {
+    if (!EvalRuntimeFilters(reinterpret_cast<TupleRow*>(output_row))) {
       continue;
     }
     if (!ExecNode::EvalConjuncts(conjunct_ctxs, num_conjuncts,
@@ -60,6 +63,19 @@ int HdfsParquetScanner::ProcessScratchBatch(RowBatch* 
dst_batch) {
     if (output_row == output_row_end) break;
   }
   scratch_batch_->tuple_idx += (scratch_tuple - scratch_tuple_start) / 
tuple_size;
-
   return output_row - output_row_start;
 }
+
+bool HdfsParquetScanner::EvalRuntimeFilter(int i, TupleRow* row) {
+  LocalFilterStats* stats = &filter_stats_[i];
+  const FilterContext* ctx = filter_ctxs_[i];
+  ++stats->total_possible;
+  if (stats->enabled && ctx->filter->HasBloomFilter()) {
+    ++stats->considered;
+    if (!ctx->Eval(row)) {
+      ++stats->rejected;
+      return false;
+    }
+  }
+  return true;
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/hdfs-parquet-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.cc 
b/be/src/exec/hdfs-parquet-scanner.cc
index c8f3ad6..1f8ff1a 100644
--- a/be/src/exec/hdfs-parquet-scanner.cc
+++ b/be/src/exec/hdfs-parquet-scanner.cc
@@ -23,6 +23,7 @@
 #include <gflags/gflags.h>
 #include <gutil/strings/substitute.h>
 
+#include "codegen/codegen-anyval.h"
 #include "codegen/llvm-codegen.h"
 #include "common/logging.h"
 #include "exec/hdfs-scanner.h"
@@ -48,18 +49,18 @@
 
 using llvm::Function;
 using namespace impala;
+using namespace llvm;
 
 DEFINE_double(parquet_min_filter_reject_ratio, 0.1, "(Advanced) If the 
percentage of "
     "rows rejected by a runtime filter drops below this value, the filter is 
disabled.");
 DECLARE_bool(enable_partitioned_aggregation);
 DECLARE_bool(enable_partitioned_hash_join);
 
-// The number of rows between checks to see if a filter is not effective, and 
should be
-// disabled. Must be a power of two.
-const int ROWS_PER_FILTER_SELECTIVITY_CHECK = 16 * 1024;
-static_assert(
-    !(ROWS_PER_FILTER_SELECTIVITY_CHECK & (ROWS_PER_FILTER_SELECTIVITY_CHECK - 
1)),
-    "ROWS_PER_FILTER_SELECTIVITY_CHECK must be a power of two");
+// The number of row batches between checks to see if a filter is effective, 
and
+// should be disabled. Must be a power of two.
+constexpr int BATCHES_PER_FILTER_SELECTIVITY_CHECK = 16;
+static_assert(BitUtil::IsPowerOf2(BATCHES_PER_FILTER_SELECTIVITY_CHECK),
+    "BATCHES_PER_FILTER_SELECTIVITY_CHECK must be a power of two");
 
 // Max dictionary page header size in bytes. This is an estimate and only 
needs to be an
 // upper bound.
@@ -70,6 +71,8 @@ const int16_t HdfsParquetScanner::ROW_GROUP_END;
 const int16_t HdfsParquetScanner::INVALID_LEVEL;
 const int16_t HdfsParquetScanner::INVALID_POS;
 
+const char* HdfsParquetScanner::LLVM_CLASS_NAME = 
"class.impala::HdfsParquetScanner";
+
 Status HdfsParquetScanner::IssueInitialRanges(HdfsScanNodeBase* scan_node,
     const std::vector<HdfsFileDesc*>& files) {
   vector<DiskIoMgr::ScanRange*> footer_ranges;
@@ -146,6 +149,7 @@ HdfsParquetScanner::HdfsParquetScanner(HdfsScanNodeBase* 
scan_node, RuntimeState
       row_group_idx_(-1),
       row_group_rows_read_(0),
       advance_row_group_(true),
+      row_batches_produced_(0),
       scratch_batch_(new ScratchTupleBatch(
           scan_node->row_desc(), state_->batch_size(), 
scan_node->mem_tracker())),
       metadata_range_(NULL),
@@ -183,7 +187,7 @@ Status HdfsParquetScanner::Open(ScannerContext* context) {
   for (int i = 0; i < context->filter_ctxs().size(); ++i) {
     const FilterContext* ctx = &context->filter_ctxs()[i];
     DCHECK(ctx->filter != NULL);
-    if (!ctx->filter->AlwaysTrue()) filter_ctxs_.push_back(ctx);
+    filter_ctxs_.push_back(ctx);
   }
   filter_stats_.resize(filter_ctxs_.size());
 
@@ -326,6 +330,18 @@ int HdfsParquetScanner::CountScalarColumns(const 
vector<ParquetColumnReader*>& c
   return num_columns;
 }
 
+void HdfsParquetScanner::CheckFiltersEffectiveness() {
+  for (int i = 0; i < filter_stats_.size(); ++i) {
+    LocalFilterStats* stats = &filter_stats_[i];
+    const RuntimeFilter* filter = filter_ctxs_[i]->filter;
+    double reject_ratio = stats->rejected / 
static_cast<double>(stats->considered);
+    if (filter->AlwaysTrue() ||
+        reject_ratio < FLAGS_parquet_min_filter_reject_ratio) {
+      stats->enabled = 0;
+    }
+  }
+}
+
 Status HdfsParquetScanner::ProcessSplit() {
   DCHECK(scan_node_->HasRowBatchQueue());
   HdfsScanNode* scan_node = static_cast<HdfsScanNode*>(scan_node_);
@@ -333,6 +349,10 @@ Status HdfsParquetScanner::ProcessSplit() {
     StartNewParquetRowBatch();
     RETURN_IF_ERROR(GetNextInternal(batch_));
     scan_node->AddMaterializedRowBatch(batch_);
+    ++row_batches_produced_;
+    if ((row_batches_produced_ & (BATCHES_PER_FILTER_SELECTIVITY_CHECK - 1)) 
== 0) {
+      CheckFiltersEffectiveness();
+    }
   } while (!eos_ && !scan_node_->ReachedLimit());
 
   // Transfer the remaining resources to this new batch in Close().
@@ -634,7 +654,8 @@ int HdfsParquetScanner::TransferScratchTuples(RowBatch* 
dst_batch) {
 }
 
 Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
-    const vector<ExprContext*>& conjunct_ctxs, Function** 
process_scratch_batch_fn) {
+    const vector<ExprContext*>& conjunct_ctxs, const vector<FilterContext>& 
filter_ctxs,
+    Function** process_scratch_batch_fn) {
   DCHECK(node->runtime_state()->ShouldCodegen());
   *process_scratch_batch_fn = NULL;
   LlvmCodeGen* codegen = node->runtime_state()->codegen();
@@ -652,6 +673,14 @@ Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
   int replaced = codegen->ReplaceCallSites(fn, eval_conjuncts_fn, 
"EvalConjuncts");
   DCHECK_EQ(replaced, 1);
 
+  Function* eval_runtime_filters_fn;
+  RETURN_IF_ERROR(CodegenEvalRuntimeFilters(
+      codegen, filter_ctxs, &eval_runtime_filters_fn));
+  DCHECK(eval_runtime_filters_fn != NULL);
+
+  replaced = codegen->ReplaceCallSites(fn, eval_runtime_filters_fn, 
"EvalRuntimeFilters");
+  DCHECK_EQ(replaced, 1);
+
   fn->setName("ProcessScratchBatch");
   *process_scratch_batch_fn = codegen->FinalizeFunction(fn);
   if (*process_scratch_batch_fn == NULL) {
@@ -663,33 +692,98 @@ Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
 bool HdfsParquetScanner::EvalRuntimeFilters(TupleRow* row) {
   int num_filters = filter_ctxs_.size();
   for (int i = 0; i < num_filters; ++i) {
-    LocalFilterStats* stats = &filter_stats_[i];
-    if (!stats->enabled) continue;
-    const RuntimeFilter* filter = filter_ctxs_[i]->filter;
-    // Check filter effectiveness every ROWS_PER_FILTER_SELECTIVITY_CHECK rows.
-    // TODO: The stats updates and the filter effectiveness check are executed 
very
-    // frequently. Consider hoisting it out of of this loop, and doing an 
equivalent
-    // check less frequently, e.g., after producing an output batch.
-    ++stats->total_possible;
-    if (UNLIKELY(
-        !(stats->total_possible & (ROWS_PER_FILTER_SELECTIVITY_CHECK - 1)))) {
-      double reject_ratio = stats->rejected / 
static_cast<double>(stats->considered);
-      if (filter->AlwaysTrue() ||
-          reject_ratio < FLAGS_parquet_min_filter_reject_ratio) {
-        stats->enabled = 0;
-        continue;
-      }
-    }
-    ++stats->considered;
-    void* e = filter_ctxs_[i]->expr->GetValue(row);
-    if (!filter->Eval<void>(e, filter_ctxs_[i]->expr->root()->type())) {
-      ++stats->rejected;
-      return false;
-    }
+    if (!EvalRuntimeFilter(i, row)) return false;
   }
   return true;
 }
 
+// ; Function Attrs: noinline
+// define i1 @EvalRuntimeFilters(%"class.impala::HdfsParquetScanner"* %this,
+//                               %"class.impala::TupleRow"* %row) #34 {
+// entry:
+//   %0 = call i1 
@_ZN6impala18HdfsParquetScanner17EvalRuntimeFilterEiPNS_8TupleRowE.2(
+//       %"class.impala::HdfsParquetScanner"* %this, i32 0, 
%"class.impala::TupleRow"* %row)
+//   br i1 %0, label %continue, label %bail_out
+//
+// bail_out:                                         ; preds = %entry
+//   ret i1 false
+//
+// continue:                                         ; preds = %entry
+//   ret i1 true
+// }
+//
+// EvalRuntimeFilter() is the same as the cross-compiled version except 
EvalOneFilter()
+// is replaced with the one generated by CodegenEvalOneFilter().
+Status HdfsParquetScanner::CodegenEvalRuntimeFilters(LlvmCodeGen* codegen,
+    const vector<FilterContext>& filter_ctxs, Function** fn) {
+  LLVMContext& context = codegen->context();
+  LlvmBuilder builder(context);
+
+  *fn = NULL;
+  Type* this_type = codegen->GetPtrType(HdfsParquetScanner::LLVM_CLASS_NAME);
+  PointerType* tuple_row_ptr_type = 
codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME);
+  LlvmCodeGen::FnPrototype prototype(codegen, "EvalRuntimeFilters",
+      codegen->GetType(TYPE_BOOLEAN));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("this", this_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+
+  Value* args[2];
+  Function* eval_runtime_filters_fn = prototype.GeneratePrototype(&builder, 
args);
+  Value* this_arg = args[0];
+  Value* row_arg = args[1];
+
+  int num_filters = filter_ctxs.size();
+  if (num_filters == 0) {
+    builder.CreateRet(codegen->true_value());
+  } else {
+    // row_rejected_block: jump target for when a filter is evaluated to false.
+    BasicBlock* row_rejected_block =
+        BasicBlock::Create(context, "row_rejected", eval_runtime_filters_fn);
+
+    DCHECK_GT(num_filters, 0);
+    for (int i = 0; i < num_filters; ++i) {
+      Function* eval_runtime_filter_fn =
+          
codegen->GetFunction(IRFunction::PARQUET_SCANNER_EVAL_RUNTIME_FILTER, true);
+      DCHECK(eval_runtime_filter_fn != NULL);
+
+      // Codegen function for inlining filter's expression evaluation and 
constant fold
+      // the type of the expression into the hashing function to avoid 
branches.
+      Function* eval_one_filter_fn;
+      RETURN_IF_ERROR(filter_ctxs[i].CodegenEval(codegen, 
&eval_one_filter_fn));
+      DCHECK(eval_one_filter_fn != NULL);
+
+      int replaced = codegen->ReplaceCallSites(eval_runtime_filter_fn, 
eval_one_filter_fn,
+          "FilterContext4Eval");
+      DCHECK_EQ(replaced, 1);
+
+      Value* idx = codegen->GetIntConstant(TYPE_INT, i);
+      Value* passed_filter = builder.CreateCall(
+          eval_runtime_filter_fn, ArrayRef<Value*>({this_arg, idx, row_arg}));
+
+      BasicBlock* continue_block =
+          BasicBlock::Create(context, "continue", eval_runtime_filters_fn);
+      builder.CreateCondBr(passed_filter, continue_block, row_rejected_block);
+      builder.SetInsertPoint(continue_block);
+    }
+    builder.CreateRet(codegen->true_value());
+
+    builder.SetInsertPoint(row_rejected_block);
+    builder.CreateRet(codegen->false_value());
+
+    // Don't inline this function to avoid code bloat in ProcessScratchBatch().
+    // If there is any filter, EvalRuntimeFilters() is large enough to not 
benefit
+    // much from inlining.
+    eval_runtime_filters_fn->addFnAttr(llvm::Attribute::NoInline);
+  }
+
+  *fn = codegen->FinalizeFunction(eval_runtime_filters_fn);
+  if (*fn == NULL) {
+    return Status("Codegen'd HdfsParquetScanner::EvalRuntimeFilters() failed "
+        "verification, see log");
+  }
+  return Status::OK();
+}
+
 bool HdfsParquetScanner::AssembleCollection(
     const vector<ParquetColumnReader*>& column_readers, int 
new_collection_rep_level,
     CollectionValueBuilder* coll_value_builder) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/hdfs-parquet-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.h 
b/be/src/exec/hdfs-parquet-scanner.h
index 10bc767..15d64ec 100644
--- a/be/src/exec/hdfs-parquet-scanner.h
+++ b/be/src/exec/hdfs-parquet-scanner.h
@@ -19,6 +19,7 @@
 #ifndef IMPALA_EXEC_HDFS_PARQUET_SCANNER_H
 #define IMPALA_EXEC_HDFS_PARQUET_SCANNER_H
 
+#include "codegen/impala-ir.h"
 #include "exec/hdfs-scanner.h"
 #include "exec/parquet-common.h"
 #include "exec/parquet-scratch-tuple-batch.h"
@@ -336,6 +337,7 @@ class HdfsParquetScanner : public HdfsScanner {
   /// 'process_scratch_batch_fn' if codegen was successful or NULL otherwise.
   static Status Codegen(HdfsScanNodeBase* node,
       const std::vector<ExprContext*>& conjunct_ctxs,
+      const std::vector<FilterContext>& filter_ctxs,
       llvm::Function** process_scratch_batch_fn);
 
   /// The repetition level is set to this value to indicate the end of a row 
group.
@@ -356,6 +358,9 @@ class HdfsParquetScanner : public HdfsScanner {
   /// need to issue another read.
   static const int64_t FOOTER_SIZE = 1024 * 100;
 
+  /// Class name in LLVM IR.
+  static const char* LLVM_CLASS_NAME;
+
   /// Index of the current row group being processed. Initialized to -1 which 
indicates
   /// that we have not started processing the first row group yet (GetNext() 
has not yet
   /// been called).
@@ -403,6 +408,9 @@ class HdfsParquetScanner : public HdfsScanner {
   /// Close().
   vector<LocalFilterStats> filter_stats_;
 
+  /// Number of scratch batches processed so far.
+  int64_t row_batches_produced_;
+
   /// Column reader for each materialized columns for this file.
   std::vector<ParquetColumnReader*> column_readers_;
 
@@ -443,6 +451,10 @@ class HdfsParquetScanner : public HdfsScanner {
 
   virtual Status GetNextInternal(RowBatch* row_batch);
 
+  /// Check runtime filters' effectiveness every 
BATCHES_PER_FILTER_SELECTIVITY_CHECK
+  /// row batches. Will update 'filter_stats_'.
+  void CheckFiltersEffectiveness();
+
   /// Advances 'row_group_idx_' to the next non-empty row group and initializes
   /// the column readers to scan it. Recoverable errors are logged to the 
runtime
   /// state. Only returns a non-OK status if a non-recoverable error is 
encountered
@@ -481,11 +493,23 @@ class HdfsParquetScanner : public HdfsScanner {
   /// materialized tuples. This is a separate function so it can be codegened.
   int ProcessScratchBatch(RowBatch* dst_batch);
 
+  /// Evaluates 'row' against the i-th runtime filter for this scan node and 
returns
+  /// true if 'row' finds a match in the filter. Returns false otherwise.
+  bool EvalRuntimeFilter(int i, TupleRow* row);
+
   /// Evaluates runtime filters (if any) against the given row. Returns true if
   /// they passed, false otherwise. Maintains the runtime filter stats, 
determines
-  /// whether the filters are effective, and disables them if they are not.
+  /// whether the filters are effective, and disables them if they are not. 
This is
+  /// replaced by generated code at runtime.
   bool EvalRuntimeFilters(TupleRow* row);
 
+  /// Codegen EvalRuntimeFilters() by unrolling the loop in the interpreted 
version
+  /// and emitting a customized version of EvalRuntimeFilter() for each filter 
in
+  /// 'filter_ctxs'. Return error status on failure. The generated function is 
returned
+  /// via 'fn'.
+  static Status CodegenEvalRuntimeFilters(LlvmCodeGen* codegen,
+      const std::vector<FilterContext>& filter_ctxs, llvm::Function** fn);
+
   /// Reads data using 'column_readers' to materialize the tuples of a 
CollectionValue
   /// allocated from 'coll_value_builder'.
   ///

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/hdfs-scan-node-base.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.cc 
b/be/src/exec/hdfs-scan-node-base.cc
index 5b97e76..f7677eb 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -129,7 +129,8 @@ Status HdfsScanNodeBase::Init(const TPlanNode& tnode, 
RuntimeState* state) {
     }
 
     FilterContext filter_ctx;
-    RETURN_IF_ERROR(Expr::CreateExprTree(pool_, target.target_expr, 
&filter_ctx.expr));
+    RETURN_IF_ERROR(
+        Expr::CreateExprTree(pool_, target.target_expr, &filter_ctx.expr_ctx));
     filter_ctx.filter = state->filter_bank()->RegisterFilter(filter, false);
 
     string filter_profile_title = Substitute("Filter $0 ($1)", 
filter.filter_id,
@@ -175,8 +176,8 @@ Status HdfsScanNodeBase::Prepare(RuntimeState* state) {
   scan_node_pool_.reset(new MemPool(mem_tracker()));
 
   for (FilterContext& filter: filter_ctxs_) {
-    RETURN_IF_ERROR(filter.expr->Prepare(state, row_desc(), 
expr_mem_tracker()));
-    AddExprCtxToFree(filter.expr);
+    RETURN_IF_ERROR(filter.expr_ctx->Prepare(state, row_desc(), 
expr_mem_tracker()));
+    AddExprCtxToFree(filter.expr_ctx);
   }
 
   // Parse Avro table schema if applicable
@@ -330,7 +331,7 @@ void HdfsScanNodeBase::Codegen(RuntimeState* state) {
         status = HdfsAvroScanner::Codegen(this, conjunct_ctxs_, &fn);
         break;
       case THdfsFileFormat::PARQUET:
-        status = HdfsParquetScanner::Codegen(this, conjunct_ctxs_, &fn);
+        status = HdfsParquetScanner::Codegen(this, conjunct_ctxs_, 
filter_ctxs_, &fn);
         break;
       default:
         // No codegen for this format
@@ -359,7 +360,7 @@ Status HdfsScanNodeBase::Open(RuntimeState* state) {
     RETURN_IF_ERROR(Expr::Open(entry.second, state));
   }
 
-  for (FilterContext& filter: filter_ctxs_) 
RETURN_IF_ERROR(filter.expr->Open(state));
+  for (FilterContext& filter: filter_ctxs_) 
RETURN_IF_ERROR(filter.expr_ctx->Open(state));
 
   // Create template tuples for all partitions.
   for (int64_t partition_id: partition_ids_) {
@@ -466,7 +467,7 @@ void HdfsScanNodeBase::Close(RuntimeState* state) {
     Expr::Close(tid_conjunct.second, state);
   }
 
-  for (auto& filter_ctx: filter_ctxs_) filter_ctx.expr->Close(state);
+  for (auto& filter_ctx: filter_ctxs_) filter_ctx.expr_ctx->Close(state);
   ScanNode::Close(state);
 }
 
@@ -710,13 +711,10 @@ bool HdfsScanNodeBase::PartitionPassesFilters(int32_t 
partition_id,
     if 
(!ctx.filter->filter_desc().targets[target_ndx].is_bound_by_partition_columns) {
       continue;
     }
-    void* e = ctx.expr->GetValue(tuple_row_mem);
 
-    // Not quite right because bitmap could arrive after Eval(), but we're ok 
with
-    // off-by-one errors.
-    bool processed = ctx.filter->HasBloomFilter();
-    bool passed_filter = ctx.filter->Eval<void>(e, ctx.expr->root()->type());
-    ctx.stats->IncrCounters(stats_name, 1, processed, !passed_filter);
+    bool has_filter = ctx.filter->HasBloomFilter();
+    bool passed_filter = !has_filter || ctx.Eval(tuple_row_mem);
+    ctx.stats->IncrCounters(stats_name, 1, has_filter, !passed_filter);
     if (!passed_filter) return false;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/hdfs-scan-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node.cc b/be/src/exec/hdfs-scan-node.cc
index eebf075..d5e0fc8 100644
--- a/be/src/exec/hdfs-scan-node.cc
+++ b/be/src/exec/hdfs-scan-node.cc
@@ -386,8 +386,8 @@ void HdfsScanNode::ScannerThread() {
           runtime_state_->resource_pool()->ReleaseThreadToken(false);
           if (filter_status.ok()) {
             for (auto& ctx: filter_ctxs) {
-              ctx.expr->FreeLocalAllocations();
-              ctx.expr->Close(runtime_state_);
+              ctx.expr_ctx->FreeLocalAllocations();
+              ctx.expr_ctx->Close(runtime_state_);
             }
           }
           return;
@@ -458,8 +458,8 @@ void HdfsScanNode::ScannerThread() {
 
   if (filter_status.ok()) {
     for (auto& ctx: filter_ctxs) {
-      ctx.expr->FreeLocalAllocations();
-      ctx.expr->Close(runtime_state_);
+      ctx.expr_ctx->FreeLocalAllocations();
+      ctx.expr_ctx->Close(runtime_state_);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/partitioned-hash-join-builder-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder-ir.cc 
b/be/src/exec/partitioned-hash-join-builder-ir.cc
index e4785fe..be980c7 100644
--- a/be/src/exec/partitioned-hash-join-builder-ir.cc
+++ b/be/src/exec/partitioned-hash-join-builder-ir.cc
@@ -58,14 +58,8 @@ Status PhjBuilder::ProcessBuildBatch(
     if (build_filters) {
       DCHECK_EQ(ctx->level(), 0)
           << "Runtime filters should not be built during repartitioning.";
-      for (const FilterContext& ctx : filters_) {
-        // TODO: codegen expr evaluation and hashing
-        if (ctx.local_bloom_filter == NULL) continue;
-        void* e = ctx.expr->GetValue(build_row);
-        uint32_t filter_hash = RawValue::GetHashValue(
-            e, ctx.expr->root()->type(), RuntimeFilterBank::DefaultHashSeed());
-        ctx.local_bloom_filter->Insert(filter_hash);
-      }
+      // TODO: unroll loop and codegen expr evaluation and hashing 
(IMPALA-3360).
+      for (const FilterContext& ctx : filters_) ctx.Insert(build_row);
     }
     const uint32_t hash = expr_vals_cache->CurExprValuesHash();
     const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/exec/partitioned-hash-join-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.cc 
b/be/src/exec/partitioned-hash-join-builder.cc
index cfb6e42..dcff33d 100644
--- a/be/src/exec/partitioned-hash-join-builder.cc
+++ b/be/src/exec/partitioned-hash-join-builder.cc
@@ -98,7 +98,7 @@ Status PhjBuilder::Init(RuntimeState* state,
     }
     FilterContext filter_ctx;
     filter_ctx.filter = state->filter_bank()->RegisterFilter(filter, true);
-    RETURN_IF_ERROR(Expr::CreateExprTree(&pool_, filter.src_expr, 
&filter_ctx.expr));
+    RETURN_IF_ERROR(Expr::CreateExprTree(&pool_, filter.src_expr, 
&filter_ctx.expr_ctx));
     filters_.push_back(filter_ctx);
   }
   return Status::OK();
@@ -116,8 +116,8 @@ Status PhjBuilder::Prepare(RuntimeState* state, MemTracker* 
parent_mem_tracker)
       expr_ctxs_to_free_.end(), build_expr_ctxs_.begin(), 
build_expr_ctxs_.end());
 
   for (const FilterContext& ctx : filters_) {
-    RETURN_IF_ERROR(ctx.expr->Prepare(state, row_desc_, 
expr_mem_tracker_.get()));
-    expr_ctxs_to_free_.push_back(ctx.expr);
+    RETURN_IF_ERROR(ctx.expr_ctx->Prepare(state, row_desc_, 
expr_mem_tracker_.get()));
+    expr_ctxs_to_free_.push_back(ctx.expr_ctx);
   }
   RETURN_IF_ERROR(HashTableCtx::Create(state, build_expr_ctxs_, 
build_expr_ctxs_,
       HashTableStoresNulls(), is_not_distinct_from_, 
state->fragment_hash_seed(),
@@ -151,7 +151,9 @@ Status PhjBuilder::Prepare(RuntimeState* state, MemTracker* 
parent_mem_tracker)
 
 Status PhjBuilder::Open(RuntimeState* state) {
   RETURN_IF_ERROR(Expr::Open(build_expr_ctxs_, state));
-  for (const FilterContext& filter : filters_) 
RETURN_IF_ERROR(filter.expr->Open(state));
+  for (const FilterContext& filter : filters_) {
+    RETURN_IF_ERROR(filter.expr_ctx->Open(state));
+  }
   RETURN_IF_ERROR(CreateHashPartitions(0));
   AllocateRuntimeFilters();
 
@@ -227,7 +229,7 @@ void PhjBuilder::Close(RuntimeState* state) {
   CloseAndDeletePartitions();
   if (ht_ctx_ != NULL) ht_ctx_->Close();
   Expr::Close(build_expr_ctxs_, state);
-  for (const FilterContext& ctx : filters_) ctx.expr->Close(state);
+  for (const FilterContext& ctx : filters_) ctx.expr_ctx->Close(state);
   if (block_mgr_client_ != NULL) 
state->block_mgr()->ClearReservations(block_mgr_client_);
   pool_.Clear();
   DataSink::Close(state);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index 54a1347..5114ff9 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -53,6 +53,7 @@ add_library(Runtime
   row-batch.cc
   runtime-filter.cc
   runtime-filter-bank.cc
+  runtime-filter-ir.cc
   runtime-state.cc
   sorted-run-merger.cc
   sorter.cc

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/raw-value-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value-ir.cc b/be/src/runtime/raw-value-ir.cc
index c0df4c9..781efc8 100644
--- a/be/src/runtime/raw-value-ir.cc
+++ b/be/src/runtime/raw-value-ir.cc
@@ -20,6 +20,7 @@
 #include <cmath>
 
 #include "runtime/decimal-value.inline.h"
+#include "runtime/raw-value.inline.h"
 #include "runtime/string-value.inline.h"
 #include "runtime/timestamp-value.h"
 
@@ -104,3 +105,59 @@ int RawValue::Compare(const void* v1, const void* v2, 
const ColumnType& type) {
       return 0;
   };
 }
+
+uint32_t IR_ALWAYS_INLINE RawValue::GetHashValue(const void* v, const 
ColumnType& type,
+    uint32_t seed) noexcept {
+  // The choice of hash function needs to be consistent across all hosts of 
the cluster.
+
+  // Use HashCombine with arbitrary constant to ensure we don't return seed.
+  if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
+
+  switch (type.type) {
+    case TYPE_CHAR:
+    case TYPE_STRING:
+    case TYPE_VARCHAR:
+      return RawValue::GetHashValueNonNull<impala::StringValue>(
+        reinterpret_cast<const StringValue*>(v), type, seed);
+    case TYPE_BOOLEAN:
+      return RawValue::GetHashValueNonNull<bool>(
+        reinterpret_cast<const bool*>(v), type, seed);
+    case TYPE_TINYINT:
+      return RawValue::GetHashValueNonNull<int8_t>(
+        reinterpret_cast<const int8_t*>(v), type, seed);
+    case TYPE_SMALLINT:
+      return RawValue::GetHashValueNonNull<int16_t>(
+        reinterpret_cast<const int16_t*>(v), type, seed);
+    case TYPE_INT:
+      return RawValue::GetHashValueNonNull<int32_t>(
+        reinterpret_cast<const int32_t*>(v), type, seed);
+    case TYPE_BIGINT:
+      return RawValue::GetHashValueNonNull<int64_t>(
+        reinterpret_cast<const int64_t*>(v), type, seed);
+    case TYPE_FLOAT:
+      return  RawValue::GetHashValueNonNull<float>(
+        reinterpret_cast<const float*>(v), type, seed);
+    case TYPE_DOUBLE:
+      return RawValue::GetHashValueNonNull<double>(
+        reinterpret_cast<const double*>(v), type, seed);
+    case TYPE_TIMESTAMP:
+      return  RawValue::GetHashValueNonNull<TimestampValue>(
+        reinterpret_cast<const TimestampValue*>(v), type, seed);
+    case TYPE_DECIMAL:
+      switch(type.GetByteSize()) {
+        case 4: return
+          RawValue::GetHashValueNonNull<Decimal4Value>(
+            reinterpret_cast<const impala::Decimal4Value*>(v), type, seed);
+        case 8:
+          return RawValue::GetHashValueNonNull<Decimal8Value>(
+            reinterpret_cast<const Decimal8Value*>(v), type, seed);
+        case 16:
+          return RawValue::GetHashValueNonNull<Decimal16Value>(
+            reinterpret_cast<const Decimal16Value*>(v), type, seed);
+        DCHECK(false);
+    }
+    default:
+      DCHECK(false);
+      return 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/raw-value.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc
index aef58f2..872b356 100644
--- a/be/src/runtime/raw-value.cc
+++ b/be/src/runtime/raw-value.cc
@@ -191,60 +191,6 @@ void RawValue::Write(const void* value, Tuple* tuple, 
const SlotDescriptor* slot
   }
 }
 
-uint32_t RawValue::GetHashValue(
-    const void* v, const ColumnType& type, uint32_t seed) noexcept {
-  // The choice of hash function needs to be consistent across all hosts of 
the cluster.
-
-  // Use HashCombine with arbitrary constant to ensure we don't return seed.
-  if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
-
-  switch (type.type) {
-    case TYPE_CHAR:
-    case TYPE_STRING:
-    case TYPE_VARCHAR:
-      return RawValue::GetHashValueNonNull<impala::StringValue>(
-          reinterpret_cast<const StringValue*>(v), type, seed);
-    case TYPE_BOOLEAN:
-      return RawValue::GetHashValueNonNull<bool>(
-          reinterpret_cast<const bool*>(v), type, seed);
-    case TYPE_TINYINT:
-      return RawValue::GetHashValueNonNull<int8_t>(
-          reinterpret_cast<const int8_t*>(v), type, seed);
-    case TYPE_SMALLINT:
-      return RawValue::GetHashValueNonNull<int16_t>(
-          reinterpret_cast<const int16_t*>(v), type, seed);
-    case TYPE_INT:
-      return RawValue::GetHashValueNonNull<int32_t>(
-          reinterpret_cast<const int32_t*>(v), type, seed);
-    case TYPE_BIGINT:
-      return RawValue::GetHashValueNonNull<int64_t>(
-          reinterpret_cast<const int64_t*>(v), type, seed);
-    case TYPE_FLOAT:
-      return RawValue::GetHashValueNonNull<float>(
-          reinterpret_cast<const float*>(v), type, seed);
-    case TYPE_DOUBLE:
-      return RawValue::GetHashValueNonNull<double>(
-          reinterpret_cast<const double*>(v), type, seed);
-    case TYPE_TIMESTAMP:
-      return RawValue::GetHashValueNonNull<TimestampValue>(
-          reinterpret_cast<const TimestampValue*>(v), type, seed);
-    case TYPE_DECIMAL:
-      switch (type.GetByteSize()) {
-        case 4:
-          return RawValue::GetHashValueNonNull<Decimal4Value>(
-              reinterpret_cast<const impala::Decimal4Value*>(v), type, seed);
-        case 8:
-          return RawValue::GetHashValueNonNull<Decimal8Value>(
-              reinterpret_cast<const Decimal8Value*>(v), type, seed);
-        case 16:
-          return RawValue::GetHashValueNonNull<Decimal16Value>(
-              reinterpret_cast<const Decimal16Value*>(v), type, seed);
-          DCHECK(false);
-      }
-    default: DCHECK(false); return 0;
-  }
-}
-
 uint32_t RawValue::GetHashValueFnv(const void* v, const ColumnType& type, 
uint32_t seed) {
   // Use HashCombine with arbitrary constant to ensure we don't return seed.
   if (v == NULL) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/raw-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.h b/be/src/runtime/raw-value.h
index 5ec8ed1..a69c8b0 100644
--- a/be/src/runtime/raw-value.h
+++ b/be/src/runtime/raw-value.h
@@ -22,6 +22,7 @@
 #include <sstream>
 #include <string>
 
+#include "codegen/impala-ir.h"
 #include "runtime/types.h"
 
 namespace impala {
@@ -54,14 +55,14 @@ class RawValue {
 
   /// Returns hash value for 'v' interpreted as 'type'.  The resulting hash 
value
   /// is combined with the seed value.
-  static uint32_t GetHashValue(
+  static uint32_t IR_ALWAYS_INLINE GetHashValue(
       const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;
 
   /// Templatized version of GetHashValue, use if type is known ahead. 
GetHashValue
   /// handles nulls.
   template<typename T>
-  static inline uint32_t GetHashValue(const T* v, const ColumnType& type,
-      uint32_t seed = 0);
+  static inline uint32_t IR_ALWAYS_INLINE GetHashValue(const T* v, const 
ColumnType& type,
+      uint32_t seed = 0) noexcept;
 
   /// Returns hash value for non-nullable 'v' for type T. GetHashValueNonNull 
doesn't
   /// handle nulls.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/raw-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.inline.h 
b/be/src/runtime/raw-value.inline.h
index 63c9a07..1214bad 100644
--- a/be/src/runtime/raw-value.inline.h
+++ b/be/src/runtime/raw-value.inline.h
@@ -206,11 +206,12 @@ inline uint32_t 
RawValue::GetHashValueNonNull<Decimal16Value>(
 
 template<typename T>
 inline uint32_t RawValue::GetHashValue(const T* v, const ColumnType& type,
-    uint32_t seed) {
+    uint32_t seed) noexcept {
   // Use HashCombine with arbitrary constant to ensure we don't return seed.
   if (UNLIKELY(v == NULL)) return HashUtil::HashCombine32(HASH_VAL_NULL, seed);
   return RawValue::GetHashValueNonNull<T>(v, type, seed);
 }
+
 }
 
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/runtime-filter-bank.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-filter-bank.h 
b/be/src/runtime/runtime-filter-bank.h
index 447a70b..d8be8ab 100644
--- a/be/src/runtime/runtime-filter-bank.h
+++ b/be/src/runtime/runtime-filter-bank.h
@@ -18,6 +18,7 @@
 #ifndef IMPALA_RUNTIME_RUNTIME_FILTER_BANK_H
 #define IMPALA_RUNTIME_RUNTIME_FILTER_BANK_H
 
+#include "codegen/impala-ir.h"
 #include "common/object-pool.h"
 #include "runtime/types.h"
 #include "util/runtime-profile.h"
@@ -100,7 +101,7 @@ class RuntimeFilterBank {
   BloomFilter* AllocateScratchBloomFilter(int32_t filter_id);
 
   /// Default hash seed to use when computing hashed values to insert into 
filters.
-  static int32_t DefaultHashSeed() { return 1234; }
+  static int32_t IR_ALWAYS_INLINE DefaultHashSeed() { return 1234; }
 
   /// Releases all memory allocated for BloomFilters.
   void Close();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/runtime-filter-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-filter-ir.cc 
b/be/src/runtime/runtime-filter-ir.cc
new file mode 100644
index 0000000..f9a7180
--- /dev/null
+++ b/be/src/runtime/runtime-filter-ir.cc
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/runtime-filter.h"
+
+#include "runtime/raw-value.h"
+
+using namespace impala;
+
+bool RuntimeFilter::Eval(void* val, const ColumnType& col_type) const noexcept 
{
+  // Safe to read bloom_filter_ concurrently with any ongoing SetBloomFilter() 
thanks
+  // to a) the atomicity of / pointer assignments and b) the x86 TSO memory 
model.
+  if (bloom_filter_ == BloomFilter::ALWAYS_TRUE_FILTER) return true;
+  uint32_t h = RawValue::GetHashValue(val, col_type,
+      RuntimeFilterBank::DefaultHashSeed());
+  return bloom_filter_->Find(h);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/runtime-filter.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-filter.cc b/be/src/runtime/runtime-filter.cc
index b34248b..228094e 100644
--- a/be/src/runtime/runtime-filter.cc
+++ b/be/src/runtime/runtime-filter.cc
@@ -25,6 +25,8 @@ using namespace impala;
 
 const int RuntimeFilter::SLEEP_PERIOD_MS = 20;
 
+const char* RuntimeFilter::LLVM_CLASS_NAME = "class.impala::RuntimeFilter";
+
 bool RuntimeFilter::WaitForArrival(int32_t timeout_ms) const {
   do {
     if (HasBloomFilter()) return true;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/runtime-filter.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-filter.h b/be/src/runtime/runtime-filter.h
index f64979e..828c36d 100644
--- a/be/src/runtime/runtime-filter.h
+++ b/be/src/runtime/runtime-filter.h
@@ -56,13 +56,11 @@ class RuntimeFilter {
   /// once per filter. Does not acquire the memory associated with 
'bloom_filter'.
   inline void SetBloomFilter(BloomFilter* bloom_filter);
 
-  /// Returns false iff the bloom_filter filter has been set via 
SetBloomFilter() and
-  /// hash[val] is not in that bloom_filter. Otherwise returns true. Is safe 
to call
-  /// concurrently with SetBloomFilter().
-  ///
-  /// Templatized in preparation for templatized hashes.
-  template<typename T>
-  inline bool Eval(T* val, const ColumnType& col_type) const;
+  /// Returns false iff 'bloom_filter_' has been set via SetBloomFilter() and 
hash[val] is
+  /// not in that 'bloom_filter_'. Otherwise returns true. Is safe to call 
concurrently
+  /// with SetBloomFilter(). 'val' is a value derived from evaluating a tuple 
row against
+  /// the expression of the owning filter context. 'col_type' is the value's 
type.
+  bool Eval(void* val, const ColumnType& col_type) const noexcept;
 
   /// Returns the amount of time waited since registration for the filter to
   /// arrive. Returns 0 if filter has not yet arrived.
@@ -83,6 +81,9 @@ class RuntimeFilter {
   /// Frequency with which to check for filter arrival in WaitForArrival()
   static const int SLEEP_PERIOD_MS;
 
+  /// Class name in LLVM IR.
+  static const char* LLVM_CLASS_NAME;
+
  private:
   /// Membership bloom_filter. May be NULL even after arrival_time_ is set. 
This is a
   /// compact way of representing a full Bloom filter that contains every 
element.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/runtime-filter.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-filter.inline.h 
b/be/src/runtime/runtime-filter.inline.h
index c647361..2ab77de 100644
--- a/be/src/runtime/runtime-filter.inline.h
+++ b/be/src/runtime/runtime-filter.inline.h
@@ -45,17 +45,6 @@ inline void RuntimeFilter::SetBloomFilter(BloomFilter* 
bloom_filter) {
   arrival_time_ = MonotonicMillis();
 }
 
-template<typename T>
-inline bool RuntimeFilter::Eval(T* val, const ColumnType& col_type) const {
-  // Safe to read bloom_filter_ concurrently with any ongoing SetBloomFilter() 
thanks
-  // to a) the atomicity of / pointer assignments and b) the x86 TSO memory 
model.
-  if (bloom_filter_ == NULL) return true;
-
-  uint32_t h = RawValue::GetHashValue(val, col_type,
-      RuntimeFilterBank::DefaultHashSeed());
-  return bloom_filter_->Find(h);
-}
-
 inline bool RuntimeFilter::AlwaysTrue() const  {
   return HasBloomFilter() && bloom_filter_ == BloomFilter::ALWAYS_TRUE_FILTER;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/runtime/types.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index b0835bf..f2edbee 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -96,6 +96,8 @@ struct ColumnType {
   /// Only set if type == TYPE_STRUCT. The field name of each child.
   std::vector<std::string> field_names;
 
+  static const char* LLVM_CLASS_NAME;
+
   ColumnType(PrimitiveType type = INVALID_TYPE)
     : type(type), len(-1), precision(-1), scale(-1) {
     DCHECK_NE(type, TYPE_CHAR);
@@ -279,8 +281,6 @@ struct ColumnType {
   /// Recursive implementation of ToThrift() that populates 'thrift_type' with 
the
   /// TTypeNodes for this type and its children.
   void ToThrift(TColumnType* thrift_type) const;
-
-  static const char* LLVM_CLASS_NAME;
 };
 
 std::ostream& operator<<(std::ostream& os, const ColumnType& type);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/util/bloom-filter.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bloom-filter.cc b/be/src/util/bloom-filter.cc
index 45238b0..3e930fa 100644
--- a/be/src/util/bloom-filter.cc
+++ b/be/src/util/bloom-filter.cc
@@ -29,8 +29,6 @@ using namespace std;
 
 namespace impala {
 
-BloomFilter* const BloomFilter::ALWAYS_TRUE_FILTER = NULL;
-
 constexpr uint32_t BloomFilter::REHASH[8] __attribute__((aligned(32)));
 
 BloomFilter::BloomFilter(const int log_heap_space)
@@ -86,7 +84,7 @@ void BloomFilter::ToThrift(const BloomFilter* filter, 
TBloomFilter* thrift) {
 // The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. 
However, we
 // compile with -fno-strict-aliasing.
 
-void BloomFilter::BucketInsert(const uint32_t bucket_idx, const uint32_t hash) 
{
+void BloomFilter::BucketInsert(const uint32_t bucket_idx, const uint32_t hash) 
noexcept {
   // new_bucket will be all zeros except for eight 1-bits, one in each 32-bit 
word. It is
   // 16-byte aligned so it can be read as a __m128i using aligned SIMD loads 
in the second
   // part of this method.
@@ -119,7 +117,7 @@ __m256i BloomFilter::MakeMask(const uint32_t hash) {
 }
 
 void BloomFilter::BucketInsertAVX2(
-    const uint32_t bucket_idx, const uint32_t hash) {
+    const uint32_t bucket_idx, const uint32_t hash) noexcept {
   const __m256i mask = MakeMask(hash);
   __m256i* const bucket = &reinterpret_cast<__m256i*>(directory_)[bucket_idx];
   _mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask));
@@ -129,7 +127,7 @@ void BloomFilter::BucketInsertAVX2(
 }
 
 bool BloomFilter::BucketFindAVX2(
-    const uint32_t bucket_idx, const uint32_t hash) const {
+    const uint32_t bucket_idx, const uint32_t hash) const noexcept {
   const __m256i mask = MakeMask(hash);
   const __m256i bucket = reinterpret_cast<__m256i*>(directory_)[bucket_idx];
   // We should return true if 'bucket' has a one wherever 'mask' does. 
_mm256_testc_si256
@@ -142,7 +140,7 @@ bool BloomFilter::BucketFindAVX2(
 }
 
 bool BloomFilter::BucketFind(
-    const uint32_t bucket_idx, const uint32_t hash) const {
+    const uint32_t bucket_idx, const uint32_t hash) const noexcept {
   for (int i = 0; i < BUCKET_WORDS; ++i) {
     BucketWord hval =
         (REHASH[i] * hash) >> ((1 << LOG_BUCKET_WORD_BITS) - 
LOG_BUCKET_WORD_BITS);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/be/src/util/bloom-filter.h
----------------------------------------------------------------------
diff --git a/be/src/util/bloom-filter.h b/be/src/util/bloom-filter.h
index 4342814..153848c 100644
--- a/be/src/util/bloom-filter.h
+++ b/be/src/util/bloom-filter.h
@@ -52,7 +52,8 @@ namespace impala {
 /// probabilities between 0.1% (for 15 bits) and 10% (for 5 bits).
 ///
 /// Our tiny BloomFilters are 32 bytes to take advantage of 32-byte SIMD in 
newer Intel
-/// machines.
+/// machines. 'noexcept' is added to various functions called from the 
cross-compiled code
+/// so LLVM will not generate exception related code at their call sites.
 class BloomFilter {
  public:
   /// Consumes at most (1 << log_heap_space) bytes on the heap.
@@ -61,7 +62,7 @@ class BloomFilter {
   ~BloomFilter();
 
   /// Representation of a filter which allows all elements to pass.
-  static BloomFilter* const ALWAYS_TRUE_FILTER;
+  static constexpr BloomFilter* const ALWAYS_TRUE_FILTER = NULL;
 
   /// Converts 'filter' to its corresponding Thrift representation. If the 
first argument
   /// is NULL, it is interpreted as a complete filter which contains all 
elements.
@@ -72,11 +73,11 @@ class BloomFilter {
   /// the set of values is 32-bit ints, the identity function is a valid hash 
function for
   /// this Bloom filter, since the collision probability (the probability that 
two
   /// non-equal values will have the same hash value) is 0.
-  void Insert(const uint32_t hash);
+  void Insert(const uint32_t hash) noexcept;
 
   /// Finds an element in the BloomFilter, returning true if it is found and 
false (with
   /// high probabilty) if it is not.
-  bool Find(const uint32_t hash) const;
+  bool Find(const uint32_t hash) const noexcept;
 
   /// Computes the logical OR of 'in' with 'out' and stores the result in 
'out'.
   static void Or(const TBloomFilter& in, TBloomFilter* out);
@@ -131,16 +132,16 @@ class BloomFilter {
 
   /// Does the actual work of Insert(). bucket_idx is the index of the bucket 
to insert
   /// into and 'hash' is the value passed to Insert().
-  void BucketInsert(const uint32_t bucket_idx, const uint32_t hash);
+  void BucketInsert(const uint32_t bucket_idx, const uint32_t hash) noexcept;
 
   /// A faster SIMD version of BucketInsert().
-  void BucketInsertAVX2(const uint32_t bucket_idx, const uint32_t hash)
+  void BucketInsertAVX2(const uint32_t bucket_idx, const uint32_t hash) 
noexcept
       __attribute__((__target__("avx2")));
 
   /// BucketFind() and BucketFindAVX2() are just like BucketInsert() and
   /// BucketInsertAVX2(), but for Find().
-  bool BucketFind(const uint32_t bucket_idx, const uint32_t hash) const;
-  bool BucketFindAVX2(const uint32_t bucket_idx, const uint32_t hash) const
+  bool BucketFind(const uint32_t bucket_idx, const uint32_t hash) const 
noexcept;
+  bool BucketFindAVX2(const uint32_t bucket_idx, const uint32_t hash) const 
noexcept
       __attribute__((__target__("avx2")));
 
   /// A helper function for the AVX2 methods. Turns a 32-bit hash into a 
256-bit Bucket
@@ -173,7 +174,7 @@ class BloomFilter {
 // the advantage of requiring fewer random bits: log2(32) * 8 = 5 * 8 = 40 
random bits for
 // a split Bloom filter, but log2(256) * 8 = 64 random bits for a standard 
Bloom filter.
 
-inline void ALWAYS_INLINE BloomFilter::Insert(const uint32_t hash) {
+inline void ALWAYS_INLINE BloomFilter::Insert(const uint32_t hash) noexcept {
   const uint32_t bucket_idx = HashUtil::Rehash32to32(hash) & directory_mask_;
   if (CpuInfo::IsSupported(CpuInfo::AVX2)) {
     BucketInsertAVX2(bucket_idx, hash);
@@ -182,7 +183,7 @@ inline void ALWAYS_INLINE BloomFilter::Insert(const 
uint32_t hash) {
   }
 }
 
-inline bool ALWAYS_INLINE BloomFilter::Find(const uint32_t hash) const {
+inline bool ALWAYS_INLINE BloomFilter::Find(const uint32_t hash) const 
noexcept {
   const uint32_t bucket_idx = HashUtil::Rehash32to32(hash) & directory_mask_;
   if (CpuInfo::IsSupported(CpuInfo::AVX2)) {
     return BucketFindAVX2(bucket_idx, hash);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1e306211/tests/query_test/test_tpch_queries.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_tpch_queries.py 
b/tests/query_test/test_tpch_queries.py
index 1969fe6..adcd575 100644
--- a/tests/query_test/test_tpch_queries.py
+++ b/tests/query_test/test_tpch_queries.py
@@ -36,7 +36,7 @@ class TestTpchQuery(ImpalaTestSuite):
     # TODO: the planner tests are based on text and need this.
     if cls.exploration_strategy() == 'core':
       cls.TestMatrix.add_constraint(lambda v:\
-          v.get_value('table_format').file_format in ['text', 'kudu'])
+          v.get_value('table_format').file_format in ['text', 'parquet', 
'kudu'])
 
   def get_test_file_prefix(self, vector):
     if vector.get_value('table_format').file_format in ['kudu']:

[1/3] incubator-impala git commit: IMPALA-3838, IMPALA-4495: Codegen EvalRuntimeFilters() and fixes filter stats updates

Reply via email to