IMPALA-5479: Propagate the argument type in RawValue::Compare()

CodegenAnyVal::Compare() generates code which calls the cross
compiled version of RawValue::Compare() without propagating
the type information into RawValue::Compare(). As a result,
the generated code of RawValue::Compare() is not any more
efficient than the interpreted version as we still have
the big switch statement in it.

This change creates a global constant for the argument 'type'
passed to RawValue::Compare(). By inlining the call to
RawValue::Compare(), LLVM was able to constant propagate
the type and eliminate the dead code for non-target types.

With this change, a query with top-n improves by 12% on average:

select l_orderkey, l_partkey, l_suppkey
from tpch50_parquet.lineitem
order by l_orderkey, l_partkey, l_suppkey
limit 10000;

4.49s -> 3.95s

This change also adds the ALWAYS_INLINE attribute to
RuntimeFilter::Eval() as it's needed to propagate the type
after a recent change to not put ALWAYS_INLINE attribute
on all cross-compiled functions.

Change-Id: I10b5b284e3da03024476a9620a12d6e7fbf08b3c
Reviewed-on: http://gerrit.cloudera.org:8080/7140
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e0f69ca1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e0f69ca1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e0f69ca1

Branch: refs/heads/master
Commit: e0f69ca1da448246a6260099c64f4cc5862188a2
Parents: adbb0b7
Author: Michael Ho <[email protected]>
Authored: Fri Jun 9 10:57:10 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Sat Jun 10 03:09:10 2017 +0000

----------------------------------------------------------------------
 be/src/codegen/codegen-anyval.cc    |  6 +++++-
 be/src/runtime/raw-value-ir.cc      |  7 ++++---
 be/src/runtime/raw-value.h          | 13 ++++++++-----
 be/src/runtime/runtime-filter-ir.cc |  3 ++-
 be/src/runtime/runtime-filter.h     |  3 ++-
 5 files changed, 21 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e0f69ca1/be/src/codegen/codegen-anyval.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index 8000dfd..a73faff 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -699,7 +699,11 @@ Value* CodegenAnyVal::Compare(CodegenAnyVal* other, const 
char* name) {
   Value* void_v1 = builder_->CreateBitCast(v1, codegen_->ptr_type());
   Value* v2 = other->ToNativePtr();
   Value* void_v2 = builder_->CreateBitCast(v2, codegen_->ptr_type());
-  Value* type_ptr = codegen_->GetPtrTo(builder_, type_.ToIR(codegen_), "type");
+  // Create a global constant of the values' ColumnType. It needs to be a 
constant
+  // for constant propagation and dead code elimination in 'compare_fn'.
+  Type* col_type = codegen_->GetType(ColumnType::LLVM_CLASS_NAME);
+  Constant* type_ptr =
+      codegen_->ConstantToGVPtr(col_type, type_.ToIR(codegen_), "type");
   Function* compare_fn = codegen_->GetFunction(IRFunction::RAW_VALUE_COMPARE, 
false);
   Value* args[] = { void_v1, void_v2, type_ptr };
   return builder_->CreateCall(compare_fn, args, name);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e0f69ca1/be/src/runtime/raw-value-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value-ir.cc b/be/src/runtime/raw-value-ir.cc
index 781efc8..1458b15 100644
--- a/be/src/runtime/raw-value-ir.cc
+++ b/be/src/runtime/raw-value-ir.cc
@@ -26,7 +26,8 @@
 
 using namespace impala;
 
-int RawValue::Compare(const void* v1, const void* v2, const ColumnType& type) {
+int IR_ALWAYS_INLINE RawValue::Compare(
+    const void* v1, const void* v2, const ColumnType& type) noexcept {
   const StringValue* string_value1;
   const StringValue* string_value2;
   const TimestampValue* ts_value1;
@@ -106,8 +107,8 @@ int RawValue::Compare(const void* v1, const void* v2, const 
ColumnType& type) {
   };
 }
 
-uint32_t IR_ALWAYS_INLINE RawValue::GetHashValue(const void* v, const 
ColumnType& type,
-    uint32_t seed) noexcept {
+uint32_t IR_ALWAYS_INLINE RawValue::GetHashValue(
+    const void* v, const ColumnType& type, uint32_t seed) noexcept {
   // The choice of hash function needs to be consistent across all hosts of 
the cluster.
 
   // Use HashCombine with arbitrary constant to ensure we don't return seed.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e0f69ca1/be/src/runtime/raw-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.h b/be/src/runtime/raw-value.h
index a69c8b0..89ae323 100644
--- a/be/src/runtime/raw-value.h
+++ b/be/src/runtime/raw-value.h
@@ -54,15 +54,16 @@ class RawValue {
                                 std::stringstream* stream);
 
   /// Returns hash value for 'v' interpreted as 'type'.  The resulting hash 
value
-  /// is combined with the seed value.
+  /// is combined with the seed value. Inlined in IR so that the constant 
'type' can be
+  /// propagated.
   static uint32_t IR_ALWAYS_INLINE GetHashValue(
       const void* v, const ColumnType& type, uint32_t seed = 0) noexcept;
 
   /// Templatized version of GetHashValue, use if type is known ahead. 
GetHashValue
-  /// handles nulls.
+  /// handles nulls. Inlined in IR so that the constant 'type' can be 
propagated.
   template<typename T>
-  static inline uint32_t IR_ALWAYS_INLINE GetHashValue(const T* v, const 
ColumnType& type,
-      uint32_t seed = 0) noexcept;
+  static inline uint32_t IR_ALWAYS_INLINE GetHashValue(
+      const T* v, const ColumnType& type, uint32_t seed = 0) noexcept;
 
   /// Returns hash value for non-nullable 'v' for type T. GetHashValueNonNull 
doesn't
   /// handle nulls.
@@ -79,7 +80,9 @@ class RawValue {
 
   /// Compares both values.
   /// Return value is < 0  if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
-  static int Compare(const void* v1, const void* v2, const ColumnType& type);
+  /// Inlined in IR so that the constant 'type' can be propagated.
+  static int IR_ALWAYS_INLINE Compare(
+      const void* v1, const void* v2, const ColumnType& type) noexcept;
 
   /// Writes the bytes of a given value into the slot of a tuple.
   /// For string values, the string data is copied into memory allocated from 
'pool'

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e0f69ca1/be/src/runtime/runtime-filter-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-filter-ir.cc 
b/be/src/runtime/runtime-filter-ir.cc
index f9a7180..1c1ecc0 100644
--- a/be/src/runtime/runtime-filter-ir.cc
+++ b/be/src/runtime/runtime-filter-ir.cc
@@ -21,7 +21,8 @@
 
 using namespace impala;
 
-bool RuntimeFilter::Eval(void* val, const ColumnType& col_type) const noexcept 
{
+bool IR_ALWAYS_INLINE RuntimeFilter::Eval(
+    void* val, const ColumnType& col_type) const noexcept {
   // Safe to read bloom_filter_ concurrently with any ongoing SetBloomFilter() 
thanks
   // to a) the atomicity of / pointer assignments and b) the x86 TSO memory 
model.
   if (bloom_filter_ == BloomFilter::ALWAYS_TRUE_FILTER) return true;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e0f69ca1/be/src/runtime/runtime-filter.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-filter.h b/be/src/runtime/runtime-filter.h
index 828c36d..b953982 100644
--- a/be/src/runtime/runtime-filter.h
+++ b/be/src/runtime/runtime-filter.h
@@ -60,7 +60,8 @@ class RuntimeFilter {
   /// not in that 'bloom_filter_'. Otherwise returns true. Is safe to call 
concurrently
   /// with SetBloomFilter(). 'val' is a value derived from evaluating a tuple 
row against
   /// the expression of the owning filter context. 'col_type' is the value's 
type.
-  bool Eval(void* val, const ColumnType& col_type) const noexcept;
+  /// Inlined in IR so that the constant 'col_type' can be propagated.
+  bool IR_ALWAYS_INLINE Eval(void* val, const ColumnType& col_type) const 
noexcept;
 
   /// Returns the amount of time waited since registration for the filter to
   /// arrive. Returns 0 if filter has not yet arrived.

Reply via email to