Repository: incubator-impala
Updated Branches:
  refs/heads/master 0c874189e -> aa28e37eb


IMPALA-4008: don't bake in hash table and hash join pointers

This fixes some of the cases where memory addresses are baked into
codegen'd code.

Testing:
Ran exhaustive build.

Perf:
Ran a local perf run. No significant changes. I was able to see some
small improvements on microbenchmarks.

    
+-----------+-----------------------+---------+------------+------------+----------------+
    | Workload  | File Format           | Avg (s) | Delta(Avg) | GeoMean(s) | 
Delta(GeoMean) |
    
+-----------+-----------------------+---------+------------+------------+----------------+
    | TPCH(_20) | parquet / none / none | 9.07    | +0.46%     | 5.88       | 
+0.34%         |
    
+-----------+-----------------------+---------+------------+------------+----------------+

    
+-----------+----------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
    | Workload  | Query    | File Format           | Avg(s) | Base Avg(s) | 
Delta(Avg) | StdDev(%)  | Base StdDev(%) | Num Clients | Iters |
    
+-----------+----------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
    | TPCH(_20) | TPCH-Q2  | parquet / none / none | 2.12   | 1.89        |   
+12.29%  | * 10.85% * | * 20.30% *     | 1           | 10    |
    | TPCH(_20) | TPCH-Q13 | parquet / none / none | 9.84   | 9.34        |   
+5.39%   |   9.01%    |   3.79%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q17 | parquet / none / none | 14.61  | 14.19       |   
+2.97%   |   2.15%    |   1.52%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q18 | parquet / none / none | 14.76  | 14.35       |   
+2.82%   |   3.20%    |   2.64%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q9  | parquet / none / none | 13.72  | 13.54       |   
+1.30%   |   1.75%    |   0.70%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q8  | parquet / none / none | 5.71   | 5.64        |   
+1.30%   |   1.21%    |   1.23%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q19 | parquet / none / none | 47.35  | 46.75       |   
+1.28%   |   2.39%    |   1.88%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q5  | parquet / none / none | 4.57   | 4.52        |   
+1.20%   |   1.30%    |   0.88%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q16 | parquet / none / none | 2.07   | 2.05        |   
+1.12%   |   2.59%    |   1.79%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q11 | parquet / none / none | 1.45   | 1.45        |   
+0.15%   |   2.69%    |   2.06%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q3  | parquet / none / none | 4.65   | 4.65        |   
-0.09%   |   2.12%    |   2.17%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q4  | parquet / none / none | 3.22   | 3.23        |   
-0.26%   |   1.03%    |   1.33%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q7  | parquet / none / none | 15.84  | 15.92       |   
-0.50%   |   0.91%    |   1.15%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q14 | parquet / none / none | 3.29   | 3.31        |   
-0.59%   |   3.31%    |   1.58%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q22 | parquet / none / none | 2.65   | 2.67        |   
-0.78%   |   3.03%    |   1.46%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q15 | parquet / none / none | 4.50   | 4.55        |   
-1.19%   |   2.87%    |   2.45%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q20 | parquet / none / none | 3.84   | 3.91        |   
-1.76%   |   2.20%    |   1.94%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q10 | parquet / none / none | 5.58   | 5.70        |   
-2.00%   |   1.01%    |   1.79%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q21 | parquet / none / none | 22.84  | 23.42       |   
-2.47%   |   0.68%    |   0.56%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q1  | parquet / none / none | 11.25  | 11.60       |   
-3.06%   |   0.48%    |   1.74%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q12 | parquet / none / none | 3.81   | 3.98        |   
-4.38%   |   1.62%    |   1.14%        | 1           | 10    |
    | TPCH(_20) | TPCH-Q6  | parquet / none / none | 1.94   | 2.04        |   
-4.85%   |   2.40%    |   1.58%        | 1           | 10    |
    
+-----------+----------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+

    
+--------------------+-----------------------+---------+------------+------------+----------------+
    | Workload           | File Format           | Avg (s) | Delta(Avg) | 
GeoMean(s) | Delta(GeoMean) |
    
+--------------------+-----------------------+---------+------------+------------+----------------+
    | TARGETED-PERF(_20) | parquet / none / none | 8.17    | -1.66%     | 2.96  
     | -1.48%         |
    
+--------------------+-----------------------+---------+------------+------------+----------------+

    
+--------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
    | Workload           | Query                                                
  | File Format           | Avg(s) | Base Avg(s) | Delta(Avg) | StdDev(%)  | 
Base StdDev(%) | Num Clients | Iters |
    
+--------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
    | TARGETED-PERF(_20) | primitive_topn_bigint                                
  | parquet / none / none | 3.32   | 2.87        |   +15.31%  | * 17.22% * |   
1.64%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_AGG-Q4                                          
  | parquet / none / none | 7.07   | 6.61        |   +6.93%   | * 15.59% * |   
5.08%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_AGG-Q1                                          
  | parquet / none / none | 1.18   | 1.12        |   +5.57%   |   1.94%    |   
2.96%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_empty_build_join_1                         
  | parquet / none / none | 10.75  | 10.47       |   +2.76%   |   1.15%    |   
0.94%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_top-n_all                                  
  | parquet / none / none | 24.30  | 23.85       |   +1.87%   |   1.40%    |   
0.82%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_broadcast_join_2                           
  | parquet / none / none | 2.45   | 2.42        |   +1.38%   |   1.93%    |   
1.33%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_filter_bigint_selective                    
  | parquet / none / none | 0.57   | 0.57        |   +1.16%   |   3.66%    |   
4.19%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_STRING-Q3                                       
  | parquet / none / none | 1.70   | 1.68        |   +1.06%   |   1.88%    |   
2.69%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_broadcast_join_3                           
  | parquet / none / none | 4.15   | 4.13        |   +0.47%   |   1.27%    |   
1.28%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_broadcast_join_1                           
  | parquet / none / none | 1.46   | 1.46        |   +0.32%   |   1.68%    |   
2.43%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_orderby_all                                
  | parquet / none / none | 12.92  | 12.89       |   +0.22%   |   1.69%    |   
1.12%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_STRING-Q4                                       
  | parquet / none / none | 1.68   | 1.67        |   +0.21%   |   2.31%    |   
2.44%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_STRING-Q7                                       
  | parquet / none / none | 3.35   | 3.35        |   +0.14%   |   1.10%    |   
1.84%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_conjunct_ordering_4                        
  | parquet / none / none | 0.46   | 0.46        |   +0.07%   |   0.29%    |   
0.27%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_groupby_bigint_pk                          
  | parquet / none / none | 35.99  | 36.04       |   -0.13%   |   8.55%    |   
6.95%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_AGG-Q6                                          
  | parquet / none / none | 0.97   | 0.97        |   -0.16%   |   2.68%    |   
2.66%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_filter_decimal_selective                   
  | parquet / none / none | 0.84   | 0.84        |   -0.31%   |   2.85%    |   
3.58%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_shuffle_join_union_all_with_groupby        
  | parquet / none / none | 20.46  | 20.53       |   -0.34%   |   0.60%    |   
0.60%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_AGG-Q7                                          
  | parquet / none / none | 0.98   | 0.98        |   -0.44%   |   2.78%    |   
2.33%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_exchange_shuffle                           
  | parquet / none / none | 25.21  | 25.34       |   -0.49%   |   1.94%    |   
1.30%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_filter_string_like                         
  | parquet / none / none | 6.27   | 6.31        |   -0.56%   |   0.59%    |   
0.43%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_STRING-Q5                                       
  | parquet / none / none | 1.96   | 1.97        |   -0.60%   |   1.98%    |   
1.25%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_filter_bigint_non_selective                
  | parquet / none / none | 0.53   | 0.53        |   -0.71%   |   2.64%    |   
0.28%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_exchange_broadcast                         
  | parquet / none / none | 14.77  | 14.97       |   -1.33%   |   2.59%    |   
2.39%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_LIMIT-Q1                                        
  | parquet / none / none | 0.01   | 0.01        |   -1.40%   |   3.77%    |   
3.51%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_orderby_bigint                             
  | parquet / none / none | 2.58   | 2.61        |   -1.44%   |   2.85%    |   
1.04%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_STRING-Q1                                       
  | parquet / none / none | 1.47   | 1.49        |   -1.63%   |   2.79%    |   
1.62%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_filter_decimal_non_selective               
  | parquet / none / none | 0.79   | 0.81        |   -1.65%   |   1.91%    |   
2.67%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_STRING-Q2                                       
  | parquet / none / none | 1.56   | 1.58        |   -1.73%   |   2.86%    |   
2.57%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_STRING-Q6                                       
  | parquet / none / none | 4.48   | 4.59        |   -2.22%   |   1.21%    |   
0.85%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_conjunct_ordering_5                        
  | parquet / none / none | 11.80  | 12.10       |   -2.50%   |   3.06%    |   
2.09%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_conjunct_ordering_1                        
  | parquet / none / none | 7.96   | 8.18        |   -2.58%   |   1.69%    |   
2.17%        | 1           | 10    |
    | TARGETED-PERF(_20) | 
primitive_shuffle_join_one_to_many_string_with_groupby | parquet / none / none 
| 93.11  | 95.78       |   -2.78%   |   1.07%    |   1.00%        | 1           
| 10    |
    | TARGETED-PERF(_20) | primitive_filter_string_selective                    
  | parquet / none / none | 0.88   | 0.91        |   -3.03%   |   4.68%    |   
5.34%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_AGG-Q5                                          
  | parquet / none / none | 2.44   | 2.52        |   -3.35%   |   2.39%    |   
2.04%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_groupby_bigint_highndv                     
  | parquet / none / none | 9.16   | 9.50        |   -3.59%   |   0.98%    |   
1.56%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_conjunct_ordering_3                        
  | parquet / none / none | 1.44   | 1.50        |   -4.14%   |   1.10%    |   
1.08%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_filter_string_non_selective                
  | parquet / none / none | 0.89   | 0.93        |   -4.31%   |   7.20%    |   
5.12%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_groupby_decimal_highndv                    
  | parquet / none / none | 13.98  | 14.94       |   -6.40%   | * 11.24% * |   
8.66%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_AGG-Q2                                          
  | parquet / none / none | 2.87   | 3.13        |   -8.36%   |   0.57%    |   
1.59%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_groupby_bigint_lowndv                      
  | parquet / none / none | 1.48   | 1.62        |   -8.91%   |   1.62%    |   
1.28%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_conjunct_ordering_2                        
  | parquet / none / none | 13.68  | 15.05       |   -9.14%   |   2.91%    |   
1.83%        | 1           | 10    |
    | TARGETED-PERF(_20) | primitive_groupby_decimal_lowndv.test                
  | parquet / none / none | 1.47   | 1.61        |   -9.26%   |   0.19%    |   
1.50%        | 1           | 10    |
    | TARGETED-PERF(_20) | PERF_AGG-Q3                                          
  | parquet / none / none | 4.25   | 4.82        |   -11.82%  |   0.79%    |   
0.84%        | 1           | 10    |
    
+--------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+

Change-Id: Ie353666dbb5c958f0094d169306fe930ec3014c5
Reviewed-on: http://gerrit.cloudera.org:8080/4326
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/01b8ade6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/01b8ade6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/01b8ade6

Branch: refs/heads/master
Commit: 01b8ade668818bb49a53b3d434c5a3b436a4c2c9
Parents: 0c87418
Author: Tim Armstrong <[email protected]>
Authored: Tue Sep 6 15:31:29 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Wed Sep 14 03:04:18 2016 +0000

----------------------------------------------------------------------
 be/src/exec/hash-table-test.cc                 |   4 +-
 be/src/exec/hash-table.cc                      | 351 ++++++++++----------
 be/src/exec/hash-table.h                       | 117 ++++---
 be/src/exec/hash-table.inline.h                |  22 +-
 be/src/exec/partitioned-aggregation-node-ir.cc |   6 +-
 be/src/exec/partitioned-aggregation-node.cc    |  10 +-
 be/src/exec/partitioned-hash-join-node-ir.cc   |   8 +-
 be/src/exec/partitioned-hash-join-node.cc      |  31 +-
 8 files changed, 270 insertions(+), 279 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table-test.cc b/be/src/exec/hash-table-test.cc
index 07d9214..1066d2c 100644
--- a/be/src/exec/hash-table-test.cc
+++ b/be/src/exec/hash-table-test.cc
@@ -455,7 +455,7 @@ class HashTableTest : public testing::Test {
     for (int build_row_val = 0; build_row_val < table_size; ++build_row_val) {
       TupleRow* row = CreateTupleRow(build_row_val);
       bool passes = ht_ctx->EvalAndHashBuild(row);
-      hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+      hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
       EXPECT_TRUE(passes);
 
       // Insert using both Insert() and FindBucket() methods.
@@ -473,7 +473,7 @@ class HashTableTest : public testing::Test {
       EXPECT_EQ(hash_table->EmptyBuckets(), table_size - build_row_val - 1);
 
       passes = ht_ctx->EvalAndHashProbe(row);
-      hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+      hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
       EXPECT_TRUE(passes);
       iter = hash_table->FindProbeRow(ht_ctx.get());
       EXPECT_FALSE(iter.AtEnd());

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index f8df641..dfa700e 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -148,32 +148,33 @@ uint32_t HashTableCtx::Hash(const void* input, int len, 
uint32_t hash) const {
   return HashUtil::MurmurHash2_64(input, len, hash);
 }
 
-uint32_t HashTableCtx::HashCurrentRow() const {
+uint32_t HashTableCtx::HashRow(
+    const uint8_t* expr_values, const uint8_t* expr_values_null) const {
   DCHECK_LT(level_, seeds_.size());
   if (expr_values_cache_.var_result_offset() == -1) {
     /// This handles NULLs implicitly since a constant seed value was put
     /// into results buffer for nulls.
-    return Hash(expr_values_cache_.cur_expr_values_,
-        expr_values_cache_.expr_values_bytes_per_row(), seeds_[level_]);
+    return Hash(
+        expr_values, expr_values_cache_.expr_values_bytes_per_row(), 
seeds_[level_]);
   } else {
-    return HashTableCtx::HashVariableLenRow();
+    return HashTableCtx::HashVariableLenRow(expr_values, expr_values_null);
   }
 }
 
-bool HashTableCtx::EvalRow(TupleRow* row, const vector<ExprContext*>& ctxs) {
+bool HashTableCtx::EvalRow(const TupleRow* row, const vector<ExprContext*>& 
ctxs,
+    uint8_t* expr_values, uint8_t* expr_values_null) {
   bool has_null = false;
-  uint8_t* exprs_nullness = expr_values_cache_.ExprValueNullPtr(0);
   for (int i = 0; i < ctxs.size(); ++i) {
-    void* loc = expr_values_cache_.ExprValuePtr(i);
+    void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
     void* val = ctxs[i]->GetValue(row);
     if (val == NULL) {
       // If the table doesn't store nulls, no reason to keep evaluating
       if (!stores_nulls_) return true;
-      exprs_nullness[i] = true;
+      expr_values_null[i] = true;
       val = reinterpret_cast<void*>(&NULL_VALUE);
       has_null = true;
     } else {
-      exprs_nullness[i] = false;
+      expr_values_null[i] = false;
     }
     DCHECK_LE(build_expr_ctxs_[i]->root()->type().GetSlotSize(),
         sizeof(NULL_VALUE));
@@ -182,48 +183,48 @@ bool HashTableCtx::EvalRow(TupleRow* row, const 
vector<ExprContext*>& ctxs) {
   return has_null;
 }
 
-uint32_t HashTableCtx::HashVariableLenRow() const {
+uint32_t HashTableCtx::HashVariableLenRow(
+    const uint8_t* expr_values, const uint8_t* expr_values_null) const {
   uint32_t hash = seeds_[level_];
   int var_result_offset = expr_values_cache_.var_result_offset();
   // Hash the non-var length portions (if there are any)
   if (var_result_offset != 0) {
-    hash = Hash(expr_values_cache_.cur_expr_values_, var_result_offset, hash);
+    hash = Hash(expr_values, var_result_offset, hash);
   }
 
-  uint8_t* exprs_nullness = expr_values_cache_.ExprValueNullPtr(0);
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
-    // non-string and null slots are already part of cur_expr_values_
-    if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING &&
-        build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue;
+    // non-string and null slots are already part of 'expr_values'.
+    if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING
+        && build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue;
 
-    void* loc = expr_values_cache_.ExprValuePtr(i);
-    if (exprs_nullness[i]) {
+    const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
+    if (expr_values_null[i]) {
       // Hash the null random seed values at 'loc'
       hash = Hash(loc, sizeof(StringValue), hash);
     } else {
       // Hash the string
       // TODO: when using CRC hash on empty string, this only swaps bytes.
-      StringValue* str = reinterpret_cast<StringValue*>(loc);
+      const StringValue* str = reinterpret_cast<const StringValue*>(loc);
       hash = Hash(str->ptr, str->len, hash);
     }
   }
   return hash;
 }
 
-template<bool FORCE_NULL_EQUALITY>
-bool HashTableCtx::Equals(TupleRow* build_row) const {
-  uint8_t* exprs_nullness = expr_values_cache_.ExprValueNullPtr(0);
+template <bool FORCE_NULL_EQUALITY>
+bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t* 
expr_values,
+    const uint8_t* expr_values_null) const {
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     void* val = build_expr_ctxs_[i]->GetValue(build_row);
     if (val == NULL) {
       if (!(FORCE_NULL_EQUALITY || finds_nulls_[i])) return false;
-      if (!exprs_nullness[i]) return false;
+      if (!expr_values_null[i]) return false;
       continue;
     } else {
-      if (exprs_nullness[i]) return false;
+      if (expr_values_null[i]) return false;
     }
 
-    void* loc = expr_values_cache_.ExprValuePtr(i);
+    const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
     if (!RawValue::Eq(loc, val, build_expr_ctxs_[i]->root()->type())) {
       return false;
     }
@@ -231,19 +232,21 @@ bool HashTableCtx::Equals(TupleRow* build_row) const {
   return true;
 }
 
-template bool HashTableCtx::Equals<true>(TupleRow* build_row) const;
-template bool HashTableCtx::Equals<false>(TupleRow* build_row) const;
+template bool HashTableCtx::Equals<true>(const TupleRow* build_row,
+    const uint8_t* expr_values, const uint8_t* expr_values_null) const;
+template bool HashTableCtx::Equals<false>(const TupleRow* build_row,
+    const uint8_t* expr_values, const uint8_t* expr_values_null) const;
 
 HashTableCtx::ExprValuesCache::ExprValuesCache()
-    : capacity_(0),
-      cur_expr_values_(NULL),
-      cur_expr_values_null_(NULL),
-      cur_expr_values_hash_(NULL),
-      cur_expr_values_hash_end_(NULL),
-      expr_values_array_(NULL),
-      expr_values_null_array_(NULL),
-      expr_values_hash_array_(NULL),
-      null_bitmap_(0) { }
+  : capacity_(0),
+    cur_expr_values_(NULL),
+    cur_expr_values_null_(NULL),
+    cur_expr_values_hash_(NULL),
+    cur_expr_values_hash_end_(NULL),
+    expr_values_array_(NULL),
+    expr_values_null_array_(NULL),
+    expr_values_hash_array_(NULL),
+    null_bitmap_(0) {}
 
 Status HashTableCtx::ExprValuesCache::Init(RuntimeState* state,
     MemTracker* tracker, const std::vector<ExprContext*>& build_expr_ctxs) {
@@ -312,12 +315,14 @@ int HashTableCtx::ExprValuesCache::MemUsage(int capacity,
       Bitmap::MemUsage(capacity);               // null_bitmap_
 }
 
-uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(int expr_idx) const {
-  return cur_expr_values_ + expr_values_offsets_[expr_idx];
+uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(
+    uint8_t* expr_values, int expr_idx) const {
+  return expr_values + expr_values_offsets_[expr_idx];
 }
 
-uint8_t* HashTableCtx::ExprValuesCache::ExprValueNullPtr(int expr_idx) const {
-  return cur_expr_values_null_ + expr_idx;
+const uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(
+    const uint8_t* expr_values, int expr_idx) const {
+  return expr_values + expr_values_offsets_[expr_idx];
 }
 
 void HashTableCtx::ExprValuesCache::ResetIterators() {
@@ -609,65 +614,64 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
 }
 
 // Codegen for evaluating a tuple row over either build_expr_ctxs_ or 
probe_expr_ctxs_.
-// For a group by with (big int, string) the IR looks like
+// For a group by with (big int, string) the IR looks like:
+//
 // define i1 @EvalProbeRow(%"class.impala::HashTableCtx"* %this_ptr,
-//                         %"class.impala::TupleRow"* %row) #33 {
+//    %"class.impala::TupleRow"* %row, i8* %expr_values, i8* 
%expr_values_null) #34 {
 // entry:
-//   %0 = load i8*, i8** inttoptr (i64 230325056 to i8**)
-//   %1 = load i8*, i8** inttoptr (i64 230325064 to i8**)
-//   %loc_addr = getelementptr i8, i8* %0, i32 0
-//   %loc = bitcast i8* %loc_addr to i32*
-//   %result = call i64 @GetSlotRef.3(%"class.impala::ExprContext"*
-//             inttoptr (i64 158123712 to %"class.impala::ExprContext"*),
-//             %"class.impala::TupleRow"* %row)
-//   %is_null = trunc i64 %result to i1
-//   %2 = zext i1 %is_null to i8
-//   %null_byte_loc = getelementptr i8, i8* %1, i32 0
-//   store i8 %2, i8* %null_byte_loc
+//   %loc_addr = getelementptr i8, i8* %expr_values, i32 0
+//   %loc = bitcast i8* %loc_addr to i64*
+//   %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"*
+//        inttoptr (i64 197737664 to %"class.impala::ExprContext"*),
+//        %"class.impala::TupleRow"* %row)
+//   %0 = extractvalue { i8, i64 } %result, 0
+//   %is_null = trunc i8 %0 to i1
+//   %1 = zext i1 %is_null to i8
+//   %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0
+//   store i8 %1, i8* %null_byte_loc
 //   br i1 %is_null, label %null, label %not_null
 //
 // null:                                             ; preds = %entry
-//   store i32 -2128831035, i32* %loc
+//   store i64 2166136261, i64* %loc
 //   br label %continue
 //
 // not_null:                                         ; preds = %entry
-//   %3 = ashr i64 %result, 32
-//   %4 = trunc i64 %3 to i32
-//   store i32 %4, i32* %loc
+//   %val = extractvalue { i8, i64 } %result, 1
+//   store i64 %val, i64* %loc
 //   br label %continue
 //
 // continue:                                         ; preds = %not_null, %null
 //   %is_null_phi = phi i1 [ true, %null ], [ false, %not_null ]
 //   %has_null = or i1 false, %is_null_phi
-//   %loc_addr1 = getelementptr i8, i8* %0, i32 8
+//   %loc_addr1 = getelementptr i8, i8* %expr_values, i32 8
 //   %loc2 = bitcast i8* %loc_addr1 to %"struct.impala::StringValue"*
-//   %result6 = call { i64, i8* } @GetSlotRef.4(%"class.impala::ExprContext"*
-//              inttoptr (i64 158123904 to %"class.impala::ExprContext"*),
-//              %"class.impala::TupleRow"* %row)
-//   %5 = extractvalue { i64, i8* } %result6, 0
-//   %is_null7 = trunc i64 %5 to i1
-//   %6 = zext i1 %is_null7 to i8
-//   %null_byte_loc8 = getelementptr i8, i8* %1, i32 1
-//   store i8 %6, i8* %null_byte_loc8
+//   %result6 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"*
+//      inttoptr (i64 197738048 to %"class.impala::ExprContext"*),
+//      %"class.impala::TupleRow"* %row)
+//   %2 = extractvalue { i64, i8* } %result6, 0
+//   %is_null7 = trunc i64 %2 to i1
+//   %3 = zext i1 %is_null7 to i8
+//   %null_byte_loc8 = getelementptr i8, i8* %expr_values_null, i32 1
+//   store i8 %3, i8* %null_byte_loc8
 //   br i1 %is_null7, label %null3, label %not_null4
 //
 // null3:                                            ; preds = %continue
 //   %string_ptr = getelementptr inbounds %"struct.impala::StringValue",
-//                 %"struct.impala::StringValue"* %loc2, i32 0, i32 0
+//        %"struct.impala::StringValue"* %loc2, i32 0, i32 0
 //   %string_len = getelementptr inbounds %"struct.impala::StringValue",
-//                 %"struct.impala::StringValue"* %loc2, i32 0, i32 1
+//        %"struct.impala::StringValue"* %loc2, i32 0, i32 1
 //   store i8* inttoptr (i32 -2128831035 to i8*), i8** %string_ptr
 //   store i32 -2128831035, i32* %string_len
 //   br label %continue5
 //
 // not_null4:                                        ; preds = %continue
+//   %4 = extractvalue { i64, i8* } %result6, 0
+//   %5 = ashr i64 %4, 32
+//   %6 = trunc i64 %5 to i32
+//   %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %6, 1
 //   %result9 = extractvalue { i64, i8* } %result6, 1
-//   %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i8* 
%result9, 0
-//   %8 = extractvalue { i64, i8* } %result6, 0
-//   %9 = ashr i64 %8, 32
-//   %10 = trunc i64 %9 to i32
-//   %11 = insertvalue %"struct.impala::StringValue" %7, i32 %10, 1
-//   store %"struct.impala::StringValue" %11, %"struct.impala::StringValue"* 
%loc2
+//   %8 = insertvalue %"struct.impala::StringValue" %7, i8* %result9, 0
+//   store %"struct.impala::StringValue" %8, %"struct.impala::StringValue"* 
%loc2
 //   br label %continue5
 //
 // continue5:                                        ; preds = %not_null4, 
%null3
@@ -675,6 +679,7 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
 //   %has_null11 = or i1 %has_null, %is_null_phi10
 //   ret i1 %has_null11
 // }
+//
 // For each expr, we create 3 code blocks.  The null, not null and continue 
blocks.
 // Both the null and not null branch into the continue block.  The continue 
block
 // becomes the start of the next block for codegen (either the next expr or 
just the
@@ -701,37 +706,31 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, 
bool build, Function**
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
   PointerType* this_ptr_type = PointerType::get(this_type, 0);
-  PointerType* buffer_ptr_type = PointerType::get(codegen->ptr_type(), 0);
   LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : 
"EvalProbeRow",
       codegen->GetType(TYPE_BOOLEAN));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", 
codegen->ptr_type()));
+  prototype.AddArgument(
+      LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));
 
   LLVMContext& context = codegen->context();
   LlvmCodeGen::LlvmBuilder builder(context);
-  Value* args[2];
+  Value* args[4];
   *fn = prototype.GeneratePrototype(&builder, args);
   Value* row = args[1];
+  Value* expr_values = args[2];
+  Value* expr_values_null = args[3];
   Value* has_null = codegen->false_value();
 
-  // Load cur_expr_values_ into a LLVM pointer.
-  Value* cur_expr_values_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
-      &expr_values_cache_.cur_expr_values_);
-  Value* cur_expr_values = builder.CreateLoad(cur_expr_values_ptr);
-
-  // Load cur_expr_values_null_ into a LLVM pointer.
-  Value* cur_expr_values_null_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
-      &expr_values_cache_.cur_expr_values_null_);
-  Value* cur_expr_values_null = builder.CreateLoad(cur_expr_values_null_ptr);
-
   for (int i = 0; i < ctxs.size(); ++i) {
     // TODO: refactor this to somewhere else?  This is not hash table specific 
except for
     // the null handling bit and would be used for anyone that needs to 
materialize a
     // vector of exprs
     // Convert result buffer to llvm ptr type
     int offset = expr_values_cache_.expr_values_offsets(i);
-    Value* loc = builder.CreateGEP(NULL, cur_expr_values,
-        codegen->GetIntConstant(TYPE_INT, offset), "loc_addr");
+    Value* loc = builder.CreateGEP(
+        NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), 
"loc_addr");
     Value* llvm_loc = builder.CreatePointerCast(loc,
         codegen->GetPtrType(ctxs[i]->root()->type()), "loc");
 
@@ -758,8 +757,8 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, 
bool build, Function**
 
     // Set null-byte result
     Value* null_byte = builder.CreateZExt(is_null, 
codegen->GetType(TYPE_TINYINT));
-    Value* llvm_null_byte_loc = builder.CreateGEP(NULL, cur_expr_values_null,
-        codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc");
+    Value* llvm_null_byte_loc = builder.CreateGEP(
+        NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i), 
"null_byte_loc");
     builder.CreateStore(null_byte, llvm_null_byte_loc);
     builder.CreateCondBr(is_null, null_block, not_null_block);
 
@@ -800,15 +799,15 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, 
bool build, Function**
 
 // Codegen for hashing the current row.  In the case with both string and 
non-string data
 // (group by int_col, string_col), the IR looks like:
-// define i32 @HashCurrentRow(%"class.impala::HashTableCtx"* %this_ptr) #33 {
+//
+// define i32 @HashRow(%"class.impala::HashTableCtx"* %this_ptr, i8* 
%expr_values,
+//    i8* %expr_values_null) #34 {
 // entry:
-//   %0 = load i8*, i8** inttoptr (i64 230325056 to i8**)
-//   %1 = load i8*, i8** inttoptr (i64 230325064 to i8**)
 //   %seed = call i32 @_ZNK6impala12HashTableCtx11GetHashSeedEv(
-//           %"class.impala::HashTableCtx"* %this_ptr)
-//   %hash = call i32 @CrcHash8(i8* %0, i32 8, i32 %seed)
-//   %loc_addr = getelementptr i8, i8* %0, i32 8
-//   %null_byte_loc = getelementptr i8, i8* %1, i32 1
+//        %"class.impala::HashTableCtx"* %this_ptr)
+//   %hash = call i32 @CrcHash8(i8* %expr_values, i32 8, i32 %seed)
+//   %loc_addr = getelementptr i8, i8* %expr_values, i32 8
+//   %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 1
 //   %null_byte = load i8, i8* %null_byte_loc
 //   %is_null = icmp ne i8 %null_byte, 0
 //   br i1 %is_null, label %null, label %not_null
@@ -819,12 +818,12 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, 
bool build, Function**
 //
 // not_null:                                         ; preds = %entry
 //   %str_val = bitcast i8* %loc_addr to %"struct.impala::StringValue"*
-//   %2 = getelementptr inbounds %"struct.impala::StringValue",
+//   %0 = getelementptr inbounds %"struct.impala::StringValue",
 //        %"struct.impala::StringValue"* %str_val, i32 0, i32 0
-//   %3 = getelementptr inbounds %"struct.impala::StringValue",
+//   %1 = getelementptr inbounds %"struct.impala::StringValue",
 //        %"struct.impala::StringValue"* %str_val, i32 0, i32 1
-//   %ptr = load i8*, i8** %2
-//   %len = load i32, i32* %3
+//   %ptr = load i8*, i8** %0
+//   %len = load i32, i32* %1
 //   %string_hash = call i32 @IrCrcHash(i8* %ptr, i32 %len, i32 %hash)
 //   br label %continue
 //
@@ -832,12 +831,11 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, 
bool build, Function**
 //   %hash_phi = phi i32 [ %string_hash, %not_null ], [ %str_null, %null ]
 //   ret i32 %hash_phi
 // }
-Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* state, bool 
use_murmur,
-    Function** fn) {
+Status HashTableCtx::CodegenHashRow(RuntimeState* state, bool use_murmur, 
Function** fn) {
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     // Disable codegen for CHAR
     if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) {
-      return Status("HashTableCtx::CodegenHashCurrentRow(): CHAR NYI");
+      return Status("HashTableCtx::CodegenHashRow(): CHAR NYI");
     }
   }
 
@@ -848,30 +846,21 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* 
state, bool use_murmur,
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
   PointerType* this_ptr_type = PointerType::get(this_type, 0);
-  PointerType* buffer_ptr_type = PointerType::get(codegen->ptr_type(), 0);
 
-  LlvmCodeGen::FnPrototype prototype(codegen,
-      (use_murmur ? "MurmurHashCurrentRow" : "HashCurrentRow"),
-      codegen->GetType(TYPE_INT));
+  LlvmCodeGen::FnPrototype prototype(
+      codegen, (use_murmur ? "MurmurHashRow" : "HashRow"), 
codegen->GetType(TYPE_INT));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", 
codegen->ptr_type()));
+  prototype.AddArgument(
+      LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));
 
   LLVMContext& context = codegen->context();
   LlvmCodeGen::LlvmBuilder builder(context);
-  Value* this_arg;
-  *fn = prototype.GeneratePrototype(&builder, &this_arg);
-
-  // Load cur_expr_values_ into a LLVM pointer.
-  Value* cur_expr_values_ptr =
-      codegen->CastPtrToLlvmPtr(buffer_ptr_type, 
&expr_values_cache_.cur_expr_values_);
-  Value* cur_expr_values = builder.CreateLoad(cur_expr_values_ptr);
-
-  // Load cur_expr_values_null_ into a LLVM pointer.
-  Value* cur_expr_values_null = NULL;
-  if (stores_nulls_) {
-    Value* cur_expr_values_null_ptr = codegen->CastPtrToLlvmPtr(
-        buffer_ptr_type, &expr_values_cache_.cur_expr_values_null_);
-    cur_expr_values_null = builder.CreateLoad(cur_expr_values_null_ptr);
-  }
+  Value* args[3];
+  *fn = prototype.GeneratePrototype(&builder, args);
+  Value* this_arg = args[0];
+  Value* expr_values = args[1];
+  Value* expr_values_null = args[2];
 
   // Call GetHashSeed() to get seeds_[level_]
   Function* get_hash_seed_fn =
@@ -889,8 +878,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* 
state, bool use_murmur,
                           
codegen->GetMurmurHashFunction(expr_values_bytes_per_row) :
                           codegen->GetHashFunction(expr_values_bytes_per_row);
       Value* len = codegen->GetIntConstant(TYPE_INT, 
expr_values_bytes_per_row);
-      hash_result = builder.CreateCall(hash_fn,
-          ArrayRef<Value*>({cur_expr_values, len, hash_result}), "hash");
+      hash_result = builder.CreateCall(
+          hash_fn, ArrayRef<Value*>({expr_values, len, hash_result}), "hash");
     }
   } else {
     if (var_result_offset > 0) {
@@ -898,8 +887,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* 
state, bool use_murmur,
                           codegen->GetMurmurHashFunction(var_result_offset) :
                           codegen->GetHashFunction(var_result_offset);
       Value* len = codegen->GetIntConstant(TYPE_INT, var_result_offset);
-      hash_result = builder.CreateCall(hash_fn,
-          ArrayRef<Value*>({cur_expr_values, len, hash_result}), "hash");
+      hash_result = builder.CreateCall(
+          hash_fn, ArrayRef<Value*>({expr_values, len, hash_result}), "hash");
     }
 
     // Hash string slots
@@ -913,8 +902,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* 
state, bool use_murmur,
       Value* str_null_result = NULL;
 
       int offset = expr_values_cache_.expr_values_offsets(i);
-      Value* llvm_loc = builder.CreateGEP(NULL, cur_expr_values,
-          codegen->GetIntConstant(TYPE_INT, offset), "loc_addr");
+      Value* llvm_loc = builder.CreateGEP(
+          NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), 
"loc_addr");
 
       // If the hash table stores nulls, we need to check if the stringval
       // evaluated to NULL
@@ -923,7 +912,7 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* 
state, bool use_murmur,
         not_null_block = BasicBlock::Create(context, "not_null", *fn);
         continue_block = BasicBlock::Create(context, "continue", *fn);
 
-        Value* llvm_null_byte_loc = builder.CreateGEP(NULL, 
cur_expr_values_null,
+        Value* llvm_null_byte_loc = builder.CreateGEP(NULL, expr_values_null,
             codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc");
         Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte");
         Value* is_null = builder.CreateICmpNE(null_byte,
@@ -977,8 +966,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* 
state, bool use_murmur,
   builder.CreateRet(hash_result);
   *fn = codegen->FinalizeFunction(*fn);
   if (*fn == NULL) {
-    return Status("Codegen'd HashTableCtx::HashCurrentRow() function failed "
-        "verification, see log");
+    return Status(
+        "Codegen'd HashTableCtx::HashRow() function failed verification, see 
log");
   }
   return Status::OK();
 }
@@ -986,67 +975,67 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* 
state, bool use_murmur,
 // Codegen for HashTableCtx::Equals.  For a group by with (bigint, string),
 // the IR looks like:
 //
-// define i1 @Equals(%"class.impala::HashTableCtx"* %this_ptr,
-//                   %"class.impala::TupleRow"* %row) #33 {
+// define i1 @Equals(%"class.impala::HashTableCtx"* %this_ptr, 
%"class.impala::TupleRow"*
+// %row,
+//      i8* %expr_values, i8* %expr_values_null) #34 {
 // entry:
 //   %0 = alloca { i64, i8* }
-//   %1 = load i8*, i8** inttoptr (i64 230325056 to i8**)
-//   %2 = load i8*, i8** inttoptr (i64 230325064 to i8**)
-//   %result = call i64 @GetSlotRef(%"class.impala::ExprContext"* inttoptr
-//             (i64 165557504 to %"class.impala::ExprContext"*),
-//             %"class.impala::TupleRow"* %row)
-//   %is_null = trunc i64 %result to i1
-//   %null_byte_loc = getelementptr i8, i8* %2, i32 0
-//   %3 = load i8, i8* %null_byte_loc
-//   %4 = icmp ne i8 %3, 0
-//   %loc = getelementptr i8, i8* %1, i32 0
-//   %row_val = bitcast i8* %loc to i32*
+//   %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"*
+//        inttoptr (i64 139107136 to %"class.impala::ExprContext"*),
+//        %"class.impala::TupleRow"* %row)
+//   %1 = extractvalue { i8, i64 } %result, 0
+//   %is_null = trunc i8 %1 to i1
+//   %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0
+//   %2 = load i8, i8* %null_byte_loc
+//   %3 = icmp ne i8 %2, 0
+//   %loc = getelementptr i8, i8* %expr_values, i32 0
+//   %row_val = bitcast i8* %loc to i64*
 //   br i1 %is_null, label %null, label %not_null
 //
-// false_block:                ; preds = %cmp9, %not_null2, %null1, %cmp, 
%not_null, %null
+// false_block:                                      ; preds = %cmp9, 
%not_null2, %null1,
+//                                                             %cmp, 
%not_null, %null
 //   ret i1 false
 //
 // null:                                             ; preds = %entry
-//   br i1 %4, label %continue, label %false_block
+//   br i1 %3, label %continue, label %false_block
 //
 // not_null:                                         ; preds = %entry
-//   br i1 %4, label %false_block, label %cmp
+//   br i1 %3, label %false_block, label %cmp
 //
 // continue:                                         ; preds = %cmp, %null
-//   %result4 = call { i64, i8* } @GetSlotRef.2(%"class.impala::ExprContext"*
-//              inttoptr (i64 165557696 to %"class.impala::ExprContext"*),
-//              %"class.impala::TupleRow"* %row)
-//   %5 = extractvalue { i64, i8* } %result4, 0
-//   %is_null5 = trunc i64 %5 to i1
-//   %null_byte_loc6 = getelementptr i8, i8* %2, i32 1
-//   %6 = load i8, i8* %null_byte_loc6
-//   %7 = icmp ne i8 %6, 0
-//   %loc7 = getelementptr i8, i8* %1, i32 8
+//   %result4 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"*
+//        inttoptr (i64 139107328 to %"class.impala::ExprContext"*),
+//        %"class.impala::TupleRow"* %row)
+//   %4 = extractvalue { i64, i8* } %result4, 0
+//   %is_null5 = trunc i64 %4 to i1
+//   %null_byte_loc6 = getelementptr i8, i8* %expr_values_null, i32 1
+//   %5 = load i8, i8* %null_byte_loc6
+//   %6 = icmp ne i8 %5, 0
+//   %loc7 = getelementptr i8, i8* %expr_values, i32 8
 //   %row_val8 = bitcast i8* %loc7 to %"struct.impala::StringValue"*
 //   br i1 %is_null5, label %null1, label %not_null2
 //
 // cmp:                                              ; preds = %not_null
-//   %8 = load i32, i32* %row_val
-//   %9 = ashr i64 %result, 32
-//   %10 = trunc i64 %9 to i32
-//   %cmp_raw = icmp eq i32 %10, %8
+//   %7 = load i64, i64* %row_val
+//   %val = extractvalue { i8, i64 } %result, 1
+//   %cmp_raw = icmp eq i64 %val, %7
 //   br i1 %cmp_raw, label %continue, label %false_block
 //
 // null1:                                            ; preds = %continue
-//   br i1 %7, label %continue3, label %false_block
+//   br i1 %6, label %continue3, label %false_block
 //
 // not_null2:                                        ; preds = %continue
-//   br i1 %7, label %false_block, label %cmp9
+//   br i1 %6, label %false_block, label %cmp9
 //
 // continue3:                                        ; preds = %cmp9, %null1
 //   ret i1 true
 //
 // cmp9:                                             ; preds = %not_null2
 //   store { i64, i8* } %result4, { i64, i8* }* %0
-//   %11 = bitcast { i64, i8* }* %0 to %"struct.impala_udf::StringVal"*
-//   %cmp_raw10 = call i1 @_Z13StringValueEqRKN10impala_udf9StringValERKN6
-//                impala11StringValueE(%"struct.impala_udf::StringVal"* %11,
-//                %"struct.impala::StringValue"* %row_val8)
+//   %8 = bitcast { i64, i8* }* %0 to %"struct.impala_udf::StringVal"*
+//   %cmp_raw10 = call i1
+//        @_Z13StringValueEqRKN10impala_udf9StringValERKN6impala11StringValueE(
+//        %"struct.impala_udf::StringVal"* %8, %"struct.impala::StringValue"* 
%row_val8)
 //   br i1 %cmp_raw10, label %continue3, label %false_block
 // }
 Status HashTableCtx::CodegenEquals(RuntimeState* state, bool 
force_null_equality,
@@ -1068,26 +1057,20 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, 
bool force_null_equality
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
   PointerType* this_ptr_type = PointerType::get(this_type, 0);
-  PointerType* buffer_ptr_type = PointerType::get(codegen->ptr_type(), 0);
   LlvmCodeGen::FnPrototype prototype(codegen, "Equals", 
codegen->GetType(TYPE_BOOLEAN));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", 
codegen->ptr_type()));
+  prototype.AddArgument(
+      LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));
 
   LLVMContext& context = codegen->context();
   LlvmCodeGen::LlvmBuilder builder(context);
-  Value* args[2];
+  Value* args[4];
   *fn = prototype.GeneratePrototype(&builder, args);
   Value* row = args[1];
-
-  // Load cur_expr_values_ into a LLVM pointer.
-  Value* cur_expr_values_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
-      &expr_values_cache_.cur_expr_values_);
-  Value* cur_expr_values = builder.CreateLoad(cur_expr_values_ptr);
-
-  // Load cur_expr_values_null_ into a LLVM pointer.
-  Value* cur_expr_values_null_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
-      &expr_values_cache_.cur_expr_values_null_);
-  Value* cur_expr_values_null = builder.CreateLoad(cur_expr_values_null_ptr);
+  Value* expr_values = args[2];
+  Value* expr_values_null = args[3];
 
   BasicBlock* false_block = BasicBlock::Create(context, "false_block", *fn);
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
@@ -1112,24 +1095,24 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, 
bool force_null_equality
         build_expr_ctxs_[i]->root()->type(), expr_fn, expr_fn_args, "result");
     Value* is_null = result.GetIsNull();
 
-    // Determine if row is null (i.e. cur_expr_values_null_[i] == true). In
+    // Determine if row is null (i.e. expr_values_null[i] == true). In
     // the case where the hash table does not store nulls, this is always 
false.
     Value* row_is_null = codegen->false_value();
 
     // We consider null values equal if we are comparing build rows or if the 
join
     // predicate is <=>
     if (force_null_equality || finds_nulls_[i]) {
-      Value* llvm_null_byte_loc = builder.CreateGEP(NULL, cur_expr_values_null,
-          codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc");
+      Value* llvm_null_byte_loc = builder.CreateGEP(
+          NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i), 
"null_byte_loc");
       Value* null_byte = builder.CreateLoad(llvm_null_byte_loc);
       row_is_null = builder.CreateICmpNE(null_byte,
           codegen->GetIntConstant(TYPE_TINYINT, 0));
     }
 
-    // Get llvm value for row_val from 'cur_expr_values_'
+    // Get llvm value for row_val from 'expr_values'
     int offset = expr_values_cache_.expr_values_offsets(i);
-    Value* loc = builder.CreateGEP(NULL, cur_expr_values,
-        codegen->GetIntConstant(TYPE_INT, offset), "loc");
+    Value* loc = builder.CreateGEP(
+        NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc");
     Value* row_val = builder.CreatePointerCast(loc,
         codegen->GetPtrType(build_expr_ctxs_[i]->root()->type()), "row_val");
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h
index b549aef..58078ad 100644
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -155,12 +155,13 @@ class HashTableCtx {
   /// TODO: this is an awkward abstraction but aggregation node can take 
advantage of
   /// it and save some expr evaluation calls.
   void* ALWAYS_INLINE ExprValue(int expr_idx) const {
-    return expr_values_cache_.ExprValuePtr(expr_idx);
+    return expr_values_cache_.ExprValuePtr(
+        expr_values_cache_.cur_expr_values(), expr_idx);
   }
 
   /// Returns if the expression at 'expr_idx' is evaluated to NULL for the 
current row.
   bool ALWAYS_INLINE ExprValueNull(int expr_idx) const {
-    return static_cast<bool>(*expr_values_cache_.ExprValueNullPtr(expr_idx));
+    return static_cast<bool>(*(expr_values_cache_.cur_expr_values_null() + 
expr_idx));
   }
 
   /// Evaluate and hash the build/probe row, saving the evaluation to the 
current row of
@@ -170,27 +171,26 @@ class HashTableCtx {
   /// row should be rejected  (doesn't need to be processed further) because 
it contains
   /// NULL. These need to be inlined in the IR module so we can find and 
replace the
   /// calls to EvalBuildRow()/EvalProbeRow().
-  bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row);
-  bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row);
+  bool IR_ALWAYS_INLINE EvalAndHashBuild(const TupleRow* row);
+  bool IR_ALWAYS_INLINE EvalAndHashProbe(const TupleRow* row);
 
-  /// Codegen for evaluating a tuple row.  Codegen'd function matches the 
signature
+  /// Codegen for evaluating a tuple row. Codegen'd function matches the 
signature
   /// for EvalBuildRow and EvalTupleRow.
   /// If build_row is true, the codegen uses the build_exprs, otherwise the 
probe_exprs.
   Status CodegenEvalRow(RuntimeState* state, bool build_row, llvm::Function** 
fn);
 
-  /// Codegen for evaluating a TupleRow and comparing equality against
-  /// 'cur_expr_values_'.  Function signature matches HashTable::Equals().
-  /// 'force_null_equality' is true if the generated equality function should 
treat
-  /// all NULLs as equal. See the template parameter to HashTable::Equals().
+  /// Codegen for evaluating a TupleRow and comparing equality. Function 
signature
+  /// matches HashTable::Equals(). 'force_null_equality' is true if the 
generated
+  /// equality function should treat all NULLs as equal. See the template 
parameter
+  /// to HashTable::Equals().
   Status CodegenEquals(RuntimeState* state, bool force_null_equality,
       llvm::Function** fn);
 
-  /// Codegen for hashing the expr values in 'cur_expr_values_'. Function 
prototype
-  /// matches HashCurrentRow identically. Unlike HashCurrentRow(), the 
returned function
-  /// only uses a single hash function, rather than switching based on level_.
-  /// If 'use_murmur' is true, murmur hash is used, otherwise CRC is used if 
the hardware
-  /// supports it (see hash-util.h).
-  Status CodegenHashCurrentRow(RuntimeState* state, bool use_murmur, 
llvm::Function** fn);
+  /// Codegen for hashing expr values. Function prototype matches HashRow 
identically.
+  /// Unlike HashRow(), the returned function only uses a single hash 
function, rather
+  /// than switching based on level_. If 'use_murmur' is true, murmur hash is 
used,
+  /// otherwise CRC is used if the hardware supports it (see hash-util.h).
+  Status CodegenHashRow(RuntimeState* state, bool use_murmur, llvm::Function** 
fn);
 
   /// Struct that returns the number of constants replaced by 
ReplaceConstants().
   struct HashTableReplacedConstants {
@@ -297,17 +297,24 @@ class HashTableCtx {
     void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set<false>(CurIdx(), true); 
}
 
     /// Returns the hash values of the current row.
-    uint32_t ALWAYS_INLINE ExprValuesHash() const { return 
*cur_expr_values_hash_; }
+    uint32_t ALWAYS_INLINE CurExprValuesHash() const { return 
*cur_expr_values_hash_; }
 
     /// Sets the hash values for the current row.
-    void ALWAYS_INLINE SetExprValuesHash(uint32_t hash) { 
*cur_expr_values_hash_ = hash; }
+    void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) { 
*cur_expr_values_hash_ = hash; }
 
-    /// Returns a pointer to the expression value at 'expr_idx' for the 
current row.
-    uint8_t* ExprValuePtr(int expr_idx) const;
+    /// Returns a pointer to the expression value at 'expr_idx' in 
'expr_values'.
+    uint8_t* ExprValuePtr(uint8_t* expr_values, int expr_idx) const;
+    const uint8_t* ExprValuePtr(const uint8_t* expr_values, int expr_idx) 
const;
 
-    /// Returns a pointer to the boolean indicating the nullness of the 
expression value
-    /// at 'expr_idx'.
-    uint8_t* ExprValueNullPtr(int expr_idx) const;
+    /// Returns the current row's expression buffer. The expression values in 
the buffer
+    /// are accessed using ExprValuePtr().
+    uint8_t* ALWAYS_INLINE cur_expr_values() const { return cur_expr_values_; }
+
+    /// Returns null indicator bytes for the current row, one per expression. 
Non-zero
+    /// bytes mean NULL, zero bytes mean non-NULL. Indexed by the expression 
index.
+    /// These are uint8_t instead of bool to simplify codegen with IRBuilder.
+    /// TODO: is there actually a valid reason why this is necessary for 
codegen?
+    uint8_t* ALWAYS_INLINE cur_expr_values_null() const { return 
cur_expr_values_null_; }
 
     /// Returns the offset into the results buffer of the expression value at 
'expr_idx'.
     int ALWAYS_INLINE expr_values_offsets(int expr_idx) const {
@@ -393,46 +400,60 @@ class HashTableCtx {
   /// null bits etc. Returns error if allocation causes query memory limit to 
be exceeded.
   Status Init(RuntimeState* state, int num_build_tuples);
 
-  /// Compute the hash of the values in expr_values_buffer_.
+  /// Compute the hash of the values in 'expr_values' with nullness 
'expr_values_null'.
   /// This will be replaced by codegen.  We don't want this inlined for 
replacing
   /// with codegen'd functions so the function name does not change.
-  uint32_t IR_NO_INLINE HashCurrentRow() const;
+  uint32_t IR_NO_INLINE HashRow(
+      const uint8_t* expr_values, const uint8_t* expr_values_null) const;
 
   /// Wrapper function for calling correct HashUtil function in non-codegen'd 
case.
   uint32_t Hash(const void* input, int len, uint32_t hash) const;
 
-  /// Evaluate 'row' over build exprs caching the results in 
'cur_expr_values_' This
-  /// will be replaced by codegen.  We do not want this function inlined when 
cross
-  /// compiled because we need to be able to differentiate between 
EvalBuildRow and
-  /// EvalProbeRow by name and the build/probe exprs are baked into the 
codegen'd
-  /// function.
-  bool IR_NO_INLINE EvalBuildRow(TupleRow* row) {
-    return EvalRow(row, build_expr_ctxs_);
+  /// Evaluate 'row' over build exprs, storing values into 'expr_values' and 
nullness into
+  /// 'expr_values_null'. This will be replaced by codegen. We do not want this
+  /// function inlined when cross compiled because we need to be able to 
differentiate
+  /// between EvalBuildRow and EvalProbeRow by name and the build/probe exprs 
are baked
+  /// into the codegen'd function.
+  bool IR_NO_INLINE EvalBuildRow(
+      const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) {
+    return EvalRow(row, build_expr_ctxs_, expr_values, expr_values_null);
   }
 
-  /// Evaluate 'row' over probe exprs caching the results in 'cur_expr_values_'
-  /// This will be replaced by codegen.
-  bool IR_NO_INLINE EvalProbeRow(TupleRow* row) {
-    return EvalRow(row, probe_expr_ctxs_);
+  /// Evaluate 'row' over probe exprs, storing the values into 'expr_values' 
and nullness
+  /// into 'expr_values_null'. This will be replaced by codegen.
+  bool IR_NO_INLINE EvalProbeRow(
+      const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) {
+    return EvalRow(row, probe_expr_ctxs_, expr_values, expr_values_null);
   }
 
-  /// Compute the hash of the values in expr_values_buffer_ for rows with 
variable length
-  /// fields (e.g. strings).
-  uint32_t HashVariableLenRow() const;
+  /// Compute the hash of the values in 'expr_values' with nullness 
'expr_values_null'
+  /// for a row with variable length fields (e.g. strings).
+  uint32_t HashVariableLenRow(
+      const uint8_t* expr_values, const uint8_t* expr_values_null) const;
 
-  /// Evaluate the exprs over row and cache the results in 'cur_expr_values_'.
-  /// Returns whether any expr evaluated to NULL.
-  /// This will be replaced by codegen.
-  bool EvalRow(TupleRow* row, const std::vector<ExprContext*>& ctxs);
+  /// Evaluate the exprs over row, storing the values into 'expr_values' and 
nullness into
+  /// 'expr_values_null'. Returns whether any expr evaluated to NULL. This 
will be
+  /// replaced by codegen.
+  bool EvalRow(const TupleRow* row, const std::vector<ExprContext*>& ctxs,
+      uint8_t* expr_values, uint8_t* expr_values_null);
 
   /// Returns true if the values of build_exprs evaluated over 'build_row' 
equal the
-  /// values cached in 'cur_expr_values_'.  This will be replaced by codegen.
-  /// FORCE_NULL_EQUALITY is true if all nulls should be treated as equal, 
regardless
-  /// of the values of 'finds_nulls_'.
-  template<bool FORCE_NULL_EQUALITY>
-  bool IR_NO_INLINE Equals(TupleRow* build_row) const;
+  /// values in 'expr_values' with nullness 'expr_values_null'. 
FORCE_NULL_EQUALITY is
+  /// true if all nulls should be treated as equal, regardless of the values of
+  /// 'finds_nulls_'. This will be replaced by codegen.
+  template <bool FORCE_NULL_EQUALITY>
+  bool IR_NO_INLINE Equals(const TupleRow* build_row, const uint8_t* 
expr_values,
+      const uint8_t* expr_values_null) const;
+
+  /// Helper function that calls Equals() with the current row. Always inlined 
so that
+  /// it does not appear in cross-compiled IR.
+  template <bool FORCE_NULL_EQUALITY>
+  bool ALWAYS_INLINE Equals(const TupleRow* build_row) const {
+    return Equals<FORCE_NULL_EQUALITY>(build_row, 
expr_values_cache_.cur_expr_values(),
+        expr_values_cache_.cur_expr_values_null());
+  }
 
-  /// Cross-compiled function to access member variables used in 
CodegenHashCurrentRow().
+  /// Cross-compiled function to access member variables used in 
CodegenHashRow().
   uint32_t GetHashSeed() const;
 
   /// Functions to be replaced by codegen to specialize the hash table.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.inline.h b/be/src/exec/hash-table.inline.h
index bed02b9..c266704 100644
--- a/be/src/exec/hash-table.inline.h
+++ b/be/src/exec/hash-table.inline.h
@@ -26,17 +26,21 @@
 
 namespace impala {
 
-inline bool HashTableCtx::EvalAndHashBuild(TupleRow* row) {
-  bool has_null = EvalBuildRow(row);
+inline bool HashTableCtx::EvalAndHashBuild(const TupleRow* row) {
+  uint8_t* expr_values = expr_values_cache_.cur_expr_values();
+  uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null();
+  bool has_null = EvalBuildRow(row, expr_values, expr_values_null);
   if (!stores_nulls() && has_null) return false;
-  expr_values_cache_.SetExprValuesHash(HashCurrentRow());
+  expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, 
expr_values_null));
   return true;
 }
 
-inline bool HashTableCtx::EvalAndHashProbe(TupleRow* row) {
-  bool has_null = EvalProbeRow(row);
+inline bool HashTableCtx::EvalAndHashProbe(const TupleRow* row) {
+  uint8_t* expr_values = expr_values_cache_.cur_expr_values();
+  uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null();
+  bool has_null = EvalProbeRow(row, expr_values, expr_values_null);
   if (has_null && !(stores_nulls() && finds_some_nulls())) return false;
-  expr_values_cache_.SetExprValuesHash(HashCurrentRow());
+  expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, 
expr_values_null));
   return true;
 }
 
@@ -92,7 +96,7 @@ inline int64_t HashTable::Probe(Bucket* buckets, int64_t 
num_buckets,
 inline HashTable::HtData* HashTable::InsertInternal(HashTableCtx* ht_ctx) {
   ++num_probes_;
   bool found = false;
-  uint32_t hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+  uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
   int64_t bucket_idx = Probe<true>(buckets_, num_buckets_, ht_ctx, hash, 
&found);
   DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND);
   if (found) {
@@ -135,7 +139,7 @@ inline void HashTable::PrefetchBucket(uint32_t hash) {
 inline HashTable::Iterator HashTable::FindProbeRow(HashTableCtx* ht_ctx) {
   ++num_probes_;
   bool found = false;
-  uint32_t hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+  uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
   int64_t bucket_idx = Probe<false>(buckets_, num_buckets_, ht_ctx, hash, 
&found);
   if (found) {
     return Iterator(this, ht_ctx->scratch_row(), bucket_idx,
@@ -148,7 +152,7 @@ inline HashTable::Iterator 
HashTable::FindProbeRow(HashTableCtx* ht_ctx) {
 inline HashTable::Iterator HashTable::FindBuildRowBucket(
     HashTableCtx* ht_ctx, bool* found) {
   ++num_probes_;
-  uint32_t hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+  uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
   int64_t bucket_idx = Probe<true>(buckets_, num_buckets_, ht_ctx, hash, 
found);
   DuplicateNode* duplicates = NULL;
   if (stores_duplicates() && LIKELY(bucket_idx != Iterator::BUCKET_NOT_FOUND)) 
{

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-aggregation-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node-ir.cc 
b/be/src/exec/partitioned-aggregation-node-ir.cc
index 5982b07..194f6c4 100644
--- a/be/src/exec/partitioned-aggregation-node-ir.cc
+++ b/be/src/exec/partitioned-aggregation-node-ir.cc
@@ -79,7 +79,7 @@ void IR_ALWAYS_INLINE 
PartitionedAggregationNode::EvalAndHashPrefetchGroup(
       is_null = !ht_ctx->EvalAndHashProbe(row);
     }
     // Hoist lookups out of non-null branch to speed up non-null case.
-    const uint32_t hash = expr_vals_cache->ExprValuesHash();
+    const uint32_t hash = expr_vals_cache->CurExprValuesHash();
     const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
     HashTable* hash_tbl = GetHashTable(partition_idx);
     if (is_null) {
@@ -98,7 +98,7 @@ Status PartitionedAggregationNode::ProcessRow(TupleRow* 
__restrict__ row,
     HashTableCtx* __restrict__ ht_ctx) {
   HashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache();
   // Hoist lookups out of non-null branch to speed up non-null case.
-  const uint32_t hash = expr_vals_cache->ExprValuesHash();
+  const uint32_t hash = expr_vals_cache->CurExprValuesHash();
   const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
   if (expr_vals_cache->IsRowNull()) return Status::OK();
   // To process this row, we first see if it can be aggregated or inserted 
into this
@@ -188,7 +188,7 @@ Status 
PartitionedAggregationNode::ProcessBatchStreaming(bool needs_serialize,
     FOREACH_ROW_LIMIT(in_batch, group_start, cache_size, in_batch_iter) {
       // Hoist lookups out of non-null branch to speed up non-null case.
       TupleRow* in_row = in_batch_iter.Get();
-      const uint32_t hash = expr_vals_cache->ExprValuesHash();
+      const uint32_t hash = expr_vals_cache->CurExprValuesHash();
       const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
       if (!expr_vals_cache->IsRowNull() &&
           !TryAddToHashTable(ht_ctx, hash_partitions_[partition_idx],

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.cc 
b/be/src/exec/partitioned-aggregation-node.cc
index 9c91b07..eb5addc 100644
--- a/be/src/exec/partitioned-aggregation-node.cc
+++ b/be/src/exec/partitioned-aggregation-node.cc
@@ -1814,8 +1814,7 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
     // The codegen'd ProcessBatch function is only used in Open() with level_ 
= 0,
     // so don't use murmur hash
     Function* hash_fn;
-    RETURN_IF_ERROR(ht_ctx_->CodegenHashCurrentRow(state_, /* use murmur */ 
false,
-        &hash_fn));
+    RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state_, /* use murmur */ false, 
&hash_fn));
 
     // Codegen HashTable::Equals<true>
     Function* build_equals_fn;
@@ -1830,7 +1829,7 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
         "EvalProbeRow");
     DCHECK_EQ(replaced, 1);
 
-    replaced = codegen->ReplaceCallSites(process_batch_fn, hash_fn, 
"HashCurrentRow");
+    replaced = codegen->ReplaceCallSites(process_batch_fn, hash_fn, "HashRow");
     DCHECK_EQ(replaced, 1);
 
     replaced = codegen->ReplaceCallSites(process_batch_fn, build_equals_fn, 
"Equals");
@@ -1888,7 +1887,7 @@ Status 
PartitionedAggregationNode::CodegenProcessBatchStreaming() {
 
   // We only use the top-level hash function for streaming aggregations.
   Function* hash_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenHashCurrentRow(state_, false, &hash_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state_, false, &hash_fn));
 
   // Codegen HashTable::Equals
   Function* equals_fn;
@@ -1907,8 +1906,7 @@ Status 
PartitionedAggregationNode::CodegenProcessBatchStreaming() {
       "EvalProbeRow");
   DCHECK_EQ(replaced, 1);
 
-  replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, hash_fn,
-      "HashCurrentRow");
+  replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, hash_fn, 
"HashRow");
   DCHECK_EQ(replaced, 1);
 
   replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, equals_fn, 
"Equals");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-hash-join-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node-ir.cc 
b/be/src/exec/partitioned-hash-join-node-ir.cc
index fde1f69..bab1bf8 100644
--- a/be/src/exec/partitioned-hash-join-node-ir.cc
+++ b/be/src/exec/partitioned-hash-join-node-ir.cc
@@ -264,7 +264,7 @@ bool IR_ALWAYS_INLINE PartitionedHashJoinNode::NextProbeRow(
     bool skip_row = false;
 
     // The hash of the expressions results for the current probe row.
-    uint32_t hash = expr_vals_cache->ExprValuesHash();
+    uint32_t hash = expr_vals_cache->CurExprValuesHash();
     // Hoist the followings out of the else statement below to speed up 
non-null case.
     const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
     HashTable* hash_tbl = hash_tbls_[partition_idx];
@@ -344,7 +344,7 @@ void IR_ALWAYS_INLINE 
PartitionedHashJoinNode::EvalAndHashProbePrefetchGroup(
     TupleRow* row = batch_iter.Get();
     if (ht_ctx->EvalAndHashProbe(row)) {
       if (prefetch_mode != TPrefetchMode::NONE) {
-        uint32_t hash = expr_vals_cache->ExprValuesHash();
+        uint32_t hash = expr_vals_cache->CurExprValuesHash();
         const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
         HashTable* hash_tbl = hash_tbls_[partition_idx];
         if (LIKELY(hash_tbl != NULL)) hash_tbl->PrefetchBucket<true>(hash);
@@ -461,7 +461,7 @@ Status PartitionedHashJoinNode::ProcessBuildBatch(RowBatch* 
build_batch,
         ctx.local_bloom_filter->Insert(filter_hash);
       }
     }
-    const uint32_t hash = expr_vals_cache->ExprValuesHash();
+    const uint32_t hash = expr_vals_cache->CurExprValuesHash();
     const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
     Partition* partition = hash_partitions_[partition_idx];
     const bool result = AppendRow(partition->build_rows(), build_row, 
&build_status_);
@@ -485,7 +485,7 @@ bool PartitionedHashJoinNode::Partition::InsertBatch(
     FOREACH_ROW_LIMIT(batch, cur_row, prefetch_size, batch_iter) {
       if (ht_ctx->EvalAndHashBuild(batch_iter.Get())) {
         if (prefetch_mode != TPrefetchMode::NONE) {
-          hash_tbl_->PrefetchBucket<false>(expr_vals_cache->ExprValuesHash());
+          
hash_tbl_->PrefetchBucket<false>(expr_vals_cache->CurExprValuesHash());
         }
       } else {
         expr_vals_cache->SetRowNull();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-hash-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node.cc 
b/be/src/exec/partitioned-hash-join-node.cc
index d424fe6..f634193 100644
--- a/be/src/exec/partitioned-hash-join-node.cc
+++ b/be/src/exec/partitioned-hash-join-node.cc
@@ -207,10 +207,9 @@ Status PartitionedHashJoinNode::Prepare(RuntimeState* 
state) {
   if (state->codegen_enabled()) {
     // Codegen for hashing rows
     Function* hash_fn;
-    codegen_status = ht_ctx_->CodegenHashCurrentRow(state, false, &hash_fn);
+    codegen_status = ht_ctx_->CodegenHashRow(state, false, &hash_fn);
     Function* murmur_hash_fn;
-    codegen_status.MergeStatus(
-        ht_ctx_->CodegenHashCurrentRow(state, true, &murmur_hash_fn));
+    codegen_status.MergeStatus(ht_ctx_->CodegenHashRow(state, true, 
&murmur_hash_fn));
 
     // Codegen for evaluating build rows
     Function* eval_build_row_fn;
@@ -1685,13 +1684,11 @@ Status 
PartitionedHashJoinNode::CodegenProcessBuildBatch(RuntimeState* state,
       ConstantInt::get(Type::getInt1Ty(codegen->context()), filters_.size() > 
0));
 
   // process_build_batch_fn_level0 uses CRC hash if available,
-  replaced = codegen->ReplaceCallSites(process_build_batch_fn_level0, hash_fn,
-      "HashCurrentRow");
+  replaced = codegen->ReplaceCallSites(process_build_batch_fn_level0, hash_fn, 
"HashRow");
   DCHECK_EQ(replaced, 1);
 
   // process_build_batch_fn uses murmur
-  replaced = codegen->ReplaceCallSites(process_build_batch_fn, murmur_hash_fn,
-      "HashCurrentRow");
+  replaced = codegen->ReplaceCallSites(process_build_batch_fn, murmur_hash_fn, 
"HashRow");
   DCHECK_EQ(replaced, 1);
 
   // Never build filters after repartitioning, as all rows have already been 
added to the
@@ -1769,11 +1766,6 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
   DCHECK(process_probe_batch_fn->getLinkage() == GlobalValue::WeakODRLinkage)
       << LlvmCodeGen::Print(process_probe_batch_fn);
 
-  // Bake in %this pointer argument to process_probe_batch_fn.
-  Value* this_arg = codegen->GetArgument(process_probe_batch_fn, 0);
-  Value* this_loc = codegen->CastPtrToLlvmPtr(this_arg->getType(), this);
-  this_arg->replaceAllUsesWith(this_loc);
-
   // Replace the parameter 'prefetch_mode' with constant.
   Value* prefetch_mode_arg = codegen->GetArgument(process_probe_batch_fn, 1);
   TPrefetchMode::type prefetch_mode = state->query_options().prefetch_mode;
@@ -1782,11 +1774,6 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
   prefetch_mode_arg->replaceAllUsesWith(
       ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode));
 
-  // Bake in %ht_ctx pointer argument to process_probe_batch_fn
-  Value* ht_ctx_arg = codegen->GetArgument(process_probe_batch_fn, 3);
-  Value* ht_ctx_loc = codegen->CastPtrToLlvmPtr(ht_ctx_arg->getType(), 
ht_ctx_.get());
-  ht_ctx_arg->replaceAllUsesWith(ht_ctx_loc);
-
   // Codegen HashTable::Equals
   Function* probe_equals_fn;
   RETURN_IF_ERROR(ht_ctx_->CodegenEquals(state, false, &probe_equals_fn));
@@ -1868,12 +1855,10 @@ Status 
PartitionedHashJoinNode::CodegenProcessProbeBatch(
 
   // process_probe_batch_fn_level0 uses CRC hash if available,
   // process_probe_batch_fn uses murmur
-  replaced = codegen->ReplaceCallSites(process_probe_batch_fn_level0, hash_fn,
-      "HashCurrentRow");
+  replaced = codegen->ReplaceCallSites(process_probe_batch_fn_level0, hash_fn, 
"HashRow");
   DCHECK_EQ(replaced, 1);
 
-  replaced = codegen->ReplaceCallSites(process_probe_batch_fn, murmur_hash_fn,
-      "HashCurrentRow");
+  replaced = codegen->ReplaceCallSites(process_probe_batch_fn, murmur_hash_fn, 
"HashRow");
   DCHECK_EQ(replaced, 1);
 
   // Finalize ProcessProbeBatch functions
@@ -1937,9 +1922,9 @@ Status 
PartitionedHashJoinNode::CodegenInsertBatch(RuntimeState* state,
   Function* insert_batch_fn_level0 = codegen->CloneFunction(insert_batch_fn);
 
   // Use codegen'd hash functions
-  replaced = codegen->ReplaceCallSites(insert_batch_fn_level0, hash_fn, 
"HashCurrentRow");
+  replaced = codegen->ReplaceCallSites(insert_batch_fn_level0, hash_fn, 
"HashRow");
   DCHECK_EQ(replaced, 1);
-  replaced = codegen->ReplaceCallSites(insert_batch_fn, murmur_hash_fn, 
"HashCurrentRow");
+  replaced = codegen->ReplaceCallSites(insert_batch_fn, murmur_hash_fn, 
"HashRow");
   DCHECK_EQ(replaced, 1);
 
   insert_batch_fn = codegen->FinalizeFunction(insert_batch_fn);

Reply via email to