Repository: incubator-impala
Updated Branches:
refs/heads/master 0c874189e -> aa28e37eb
IMPALA-4008: don't bake in hash table and hash join pointers
This fixes some of the cases where memory addresses are baked into
codegen'd code.
Testing:
Ran exhaustive build.
Perf:
Ran a local perf run. No significant changes. I was able to see some
small improvements on microbenchmarks.
+-----------+-----------------------+---------+------------+------------+----------------+
| Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) |
Delta(GeoMean) |
+-----------+-----------------------+---------+------------+------------+----------------+
| TPCH(_20) | parquet / none / none | 9.07 | +0.46% | 5.88 |
+0.34% |
+-----------+-----------------------+---------+------------+------------+----------------+
+-----------+----------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
| Workload | Query | File Format | Avg(s) | Base Avg(s) |
Delta(Avg) | StdDev(%) | Base StdDev(%) | Num Clients | Iters |
+-----------+----------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
| TPCH(_20) | TPCH-Q2 | parquet / none / none | 2.12 | 1.89 |
+12.29% | * 10.85% * | * 20.30% * | 1 | 10 |
| TPCH(_20) | TPCH-Q13 | parquet / none / none | 9.84 | 9.34 |
+5.39% | 9.01% | 3.79% | 1 | 10 |
| TPCH(_20) | TPCH-Q17 | parquet / none / none | 14.61 | 14.19 |
+2.97% | 2.15% | 1.52% | 1 | 10 |
| TPCH(_20) | TPCH-Q18 | parquet / none / none | 14.76 | 14.35 |
+2.82% | 3.20% | 2.64% | 1 | 10 |
| TPCH(_20) | TPCH-Q9 | parquet / none / none | 13.72 | 13.54 |
+1.30% | 1.75% | 0.70% | 1 | 10 |
| TPCH(_20) | TPCH-Q8 | parquet / none / none | 5.71 | 5.64 |
+1.30% | 1.21% | 1.23% | 1 | 10 |
| TPCH(_20) | TPCH-Q19 | parquet / none / none | 47.35 | 46.75 |
+1.28% | 2.39% | 1.88% | 1 | 10 |
| TPCH(_20) | TPCH-Q5 | parquet / none / none | 4.57 | 4.52 |
+1.20% | 1.30% | 0.88% | 1 | 10 |
| TPCH(_20) | TPCH-Q16 | parquet / none / none | 2.07 | 2.05 |
+1.12% | 2.59% | 1.79% | 1 | 10 |
| TPCH(_20) | TPCH-Q11 | parquet / none / none | 1.45 | 1.45 |
+0.15% | 2.69% | 2.06% | 1 | 10 |
| TPCH(_20) | TPCH-Q3 | parquet / none / none | 4.65 | 4.65 |
-0.09% | 2.12% | 2.17% | 1 | 10 |
| TPCH(_20) | TPCH-Q4 | parquet / none / none | 3.22 | 3.23 |
-0.26% | 1.03% | 1.33% | 1 | 10 |
| TPCH(_20) | TPCH-Q7 | parquet / none / none | 15.84 | 15.92 |
-0.50% | 0.91% | 1.15% | 1 | 10 |
| TPCH(_20) | TPCH-Q14 | parquet / none / none | 3.29 | 3.31 |
-0.59% | 3.31% | 1.58% | 1 | 10 |
| TPCH(_20) | TPCH-Q22 | parquet / none / none | 2.65 | 2.67 |
-0.78% | 3.03% | 1.46% | 1 | 10 |
| TPCH(_20) | TPCH-Q15 | parquet / none / none | 4.50 | 4.55 |
-1.19% | 2.87% | 2.45% | 1 | 10 |
| TPCH(_20) | TPCH-Q20 | parquet / none / none | 3.84 | 3.91 |
-1.76% | 2.20% | 1.94% | 1 | 10 |
| TPCH(_20) | TPCH-Q10 | parquet / none / none | 5.58 | 5.70 |
-2.00% | 1.01% | 1.79% | 1 | 10 |
| TPCH(_20) | TPCH-Q21 | parquet / none / none | 22.84 | 23.42 |
-2.47% | 0.68% | 0.56% | 1 | 10 |
| TPCH(_20) | TPCH-Q1 | parquet / none / none | 11.25 | 11.60 |
-3.06% | 0.48% | 1.74% | 1 | 10 |
| TPCH(_20) | TPCH-Q12 | parquet / none / none | 3.81 | 3.98 |
-4.38% | 1.62% | 1.14% | 1 | 10 |
| TPCH(_20) | TPCH-Q6 | parquet / none / none | 1.94 | 2.04 |
-4.85% | 2.40% | 1.58% | 1 | 10 |
+-----------+----------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
+--------------------+-----------------------+---------+------------+------------+----------------+
| Workload | File Format | Avg (s) | Delta(Avg) |
GeoMean(s) | Delta(GeoMean) |
+--------------------+-----------------------+---------+------------+------------+----------------+
| TARGETED-PERF(_20) | parquet / none / none | 8.17 | -1.66% | 2.96
| -1.48% |
+--------------------+-----------------------+---------+------------+------------+----------------+
+--------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
| Workload | Query
| File Format | Avg(s) | Base Avg(s) | Delta(Avg) | StdDev(%) |
Base StdDev(%) | Num Clients | Iters |
+--------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
| TARGETED-PERF(_20) | primitive_topn_bigint
| parquet / none / none | 3.32 | 2.87 | +15.31% | * 17.22% * |
1.64% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_AGG-Q4
| parquet / none / none | 7.07 | 6.61 | +6.93% | * 15.59% * |
5.08% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_AGG-Q1
| parquet / none / none | 1.18 | 1.12 | +5.57% | 1.94% |
2.96% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_empty_build_join_1
| parquet / none / none | 10.75 | 10.47 | +2.76% | 1.15% |
0.94% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_top-n_all
| parquet / none / none | 24.30 | 23.85 | +1.87% | 1.40% |
0.82% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_broadcast_join_2
| parquet / none / none | 2.45 | 2.42 | +1.38% | 1.93% |
1.33% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_filter_bigint_selective
| parquet / none / none | 0.57 | 0.57 | +1.16% | 3.66% |
4.19% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_STRING-Q3
| parquet / none / none | 1.70 | 1.68 | +1.06% | 1.88% |
2.69% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_broadcast_join_3
| parquet / none / none | 4.15 | 4.13 | +0.47% | 1.27% |
1.28% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_broadcast_join_1
| parquet / none / none | 1.46 | 1.46 | +0.32% | 1.68% |
2.43% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_orderby_all
| parquet / none / none | 12.92 | 12.89 | +0.22% | 1.69% |
1.12% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_STRING-Q4
| parquet / none / none | 1.68 | 1.67 | +0.21% | 2.31% |
2.44% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_STRING-Q7
| parquet / none / none | 3.35 | 3.35 | +0.14% | 1.10% |
1.84% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_conjunct_ordering_4
| parquet / none / none | 0.46 | 0.46 | +0.07% | 0.29% |
0.27% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_groupby_bigint_pk
| parquet / none / none | 35.99 | 36.04 | -0.13% | 8.55% |
6.95% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_AGG-Q6
| parquet / none / none | 0.97 | 0.97 | -0.16% | 2.68% |
2.66% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_filter_decimal_selective
| parquet / none / none | 0.84 | 0.84 | -0.31% | 2.85% |
3.58% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_shuffle_join_union_all_with_groupby
| parquet / none / none | 20.46 | 20.53 | -0.34% | 0.60% |
0.60% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_AGG-Q7
| parquet / none / none | 0.98 | 0.98 | -0.44% | 2.78% |
2.33% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_exchange_shuffle
| parquet / none / none | 25.21 | 25.34 | -0.49% | 1.94% |
1.30% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_filter_string_like
| parquet / none / none | 6.27 | 6.31 | -0.56% | 0.59% |
0.43% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_STRING-Q5
| parquet / none / none | 1.96 | 1.97 | -0.60% | 1.98% |
1.25% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_filter_bigint_non_selective
| parquet / none / none | 0.53 | 0.53 | -0.71% | 2.64% |
0.28% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_exchange_broadcast
| parquet / none / none | 14.77 | 14.97 | -1.33% | 2.59% |
2.39% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_LIMIT-Q1
| parquet / none / none | 0.01 | 0.01 | -1.40% | 3.77% |
3.51% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_orderby_bigint
| parquet / none / none | 2.58 | 2.61 | -1.44% | 2.85% |
1.04% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_STRING-Q1
| parquet / none / none | 1.47 | 1.49 | -1.63% | 2.79% |
1.62% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_filter_decimal_non_selective
| parquet / none / none | 0.79 | 0.81 | -1.65% | 1.91% |
2.67% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_STRING-Q2
| parquet / none / none | 1.56 | 1.58 | -1.73% | 2.86% |
2.57% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_STRING-Q6
| parquet / none / none | 4.48 | 4.59 | -2.22% | 1.21% |
0.85% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_conjunct_ordering_5
| parquet / none / none | 11.80 | 12.10 | -2.50% | 3.06% |
2.09% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_conjunct_ordering_1
| parquet / none / none | 7.96 | 8.18 | -2.58% | 1.69% |
2.17% | 1 | 10 |
| TARGETED-PERF(_20) |
primitive_shuffle_join_one_to_many_string_with_groupby | parquet / none / none
| 93.11 | 95.78 | -2.78% | 1.07% | 1.00% | 1
| 10 |
| TARGETED-PERF(_20) | primitive_filter_string_selective
| parquet / none / none | 0.88 | 0.91 | -3.03% | 4.68% |
5.34% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_AGG-Q5
| parquet / none / none | 2.44 | 2.52 | -3.35% | 2.39% |
2.04% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_groupby_bigint_highndv
| parquet / none / none | 9.16 | 9.50 | -3.59% | 0.98% |
1.56% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_conjunct_ordering_3
| parquet / none / none | 1.44 | 1.50 | -4.14% | 1.10% |
1.08% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_filter_string_non_selective
| parquet / none / none | 0.89 | 0.93 | -4.31% | 7.20% |
5.12% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_groupby_decimal_highndv
| parquet / none / none | 13.98 | 14.94 | -6.40% | * 11.24% * |
8.66% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_AGG-Q2
| parquet / none / none | 2.87 | 3.13 | -8.36% | 0.57% |
1.59% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_groupby_bigint_lowndv
| parquet / none / none | 1.48 | 1.62 | -8.91% | 1.62% |
1.28% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_conjunct_ordering_2
| parquet / none / none | 13.68 | 15.05 | -9.14% | 2.91% |
1.83% | 1 | 10 |
| TARGETED-PERF(_20) | primitive_groupby_decimal_lowndv.test
| parquet / none / none | 1.47 | 1.61 | -9.26% | 0.19% |
1.50% | 1 | 10 |
| TARGETED-PERF(_20) | PERF_AGG-Q3
| parquet / none / none | 4.25 | 4.82 | -11.82% | 0.79% |
0.84% | 1 | 10 |
+--------------------+--------------------------------------------------------+-----------------------+--------+-------------+------------+------------+----------------+-------------+-------+
Change-Id: Ie353666dbb5c958f0094d169306fe930ec3014c5
Reviewed-on: http://gerrit.cloudera.org:8080/4326
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/01b8ade6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/01b8ade6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/01b8ade6
Branch: refs/heads/master
Commit: 01b8ade668818bb49a53b3d434c5a3b436a4c2c9
Parents: 0c87418
Author: Tim Armstrong <[email protected]>
Authored: Tue Sep 6 15:31:29 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Wed Sep 14 03:04:18 2016 +0000
----------------------------------------------------------------------
be/src/exec/hash-table-test.cc | 4 +-
be/src/exec/hash-table.cc | 351 ++++++++++----------
be/src/exec/hash-table.h | 117 ++++---
be/src/exec/hash-table.inline.h | 22 +-
be/src/exec/partitioned-aggregation-node-ir.cc | 6 +-
be/src/exec/partitioned-aggregation-node.cc | 10 +-
be/src/exec/partitioned-hash-join-node-ir.cc | 8 +-
be/src/exec/partitioned-hash-join-node.cc | 31 +-
8 files changed, 270 insertions(+), 279 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table-test.cc b/be/src/exec/hash-table-test.cc
index 07d9214..1066d2c 100644
--- a/be/src/exec/hash-table-test.cc
+++ b/be/src/exec/hash-table-test.cc
@@ -455,7 +455,7 @@ class HashTableTest : public testing::Test {
for (int build_row_val = 0; build_row_val < table_size; ++build_row_val) {
TupleRow* row = CreateTupleRow(build_row_val);
bool passes = ht_ctx->EvalAndHashBuild(row);
- hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+ hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
EXPECT_TRUE(passes);
// Insert using both Insert() and FindBucket() methods.
@@ -473,7 +473,7 @@ class HashTableTest : public testing::Test {
EXPECT_EQ(hash_table->EmptyBuckets(), table_size - build_row_val - 1);
passes = ht_ctx->EvalAndHashProbe(row);
- hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+ hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
EXPECT_TRUE(passes);
iter = hash_table->FindProbeRow(ht_ctx.get());
EXPECT_FALSE(iter.AtEnd());
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index f8df641..dfa700e 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -148,32 +148,33 @@ uint32_t HashTableCtx::Hash(const void* input, int len,
uint32_t hash) const {
return HashUtil::MurmurHash2_64(input, len, hash);
}
-uint32_t HashTableCtx::HashCurrentRow() const {
+uint32_t HashTableCtx::HashRow(
+ const uint8_t* expr_values, const uint8_t* expr_values_null) const {
DCHECK_LT(level_, seeds_.size());
if (expr_values_cache_.var_result_offset() == -1) {
/// This handles NULLs implicitly since a constant seed value was put
/// into results buffer for nulls.
- return Hash(expr_values_cache_.cur_expr_values_,
- expr_values_cache_.expr_values_bytes_per_row(), seeds_[level_]);
+ return Hash(
+ expr_values, expr_values_cache_.expr_values_bytes_per_row(),
seeds_[level_]);
} else {
- return HashTableCtx::HashVariableLenRow();
+ return HashTableCtx::HashVariableLenRow(expr_values, expr_values_null);
}
}
-bool HashTableCtx::EvalRow(TupleRow* row, const vector<ExprContext*>& ctxs) {
+bool HashTableCtx::EvalRow(const TupleRow* row, const vector<ExprContext*>&
ctxs,
+ uint8_t* expr_values, uint8_t* expr_values_null) {
bool has_null = false;
- uint8_t* exprs_nullness = expr_values_cache_.ExprValueNullPtr(0);
for (int i = 0; i < ctxs.size(); ++i) {
- void* loc = expr_values_cache_.ExprValuePtr(i);
+ void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
void* val = ctxs[i]->GetValue(row);
if (val == NULL) {
// If the table doesn't store nulls, no reason to keep evaluating
if (!stores_nulls_) return true;
- exprs_nullness[i] = true;
+ expr_values_null[i] = true;
val = reinterpret_cast<void*>(&NULL_VALUE);
has_null = true;
} else {
- exprs_nullness[i] = false;
+ expr_values_null[i] = false;
}
DCHECK_LE(build_expr_ctxs_[i]->root()->type().GetSlotSize(),
sizeof(NULL_VALUE));
@@ -182,48 +183,48 @@ bool HashTableCtx::EvalRow(TupleRow* row, const
vector<ExprContext*>& ctxs) {
return has_null;
}
-uint32_t HashTableCtx::HashVariableLenRow() const {
+uint32_t HashTableCtx::HashVariableLenRow(
+ const uint8_t* expr_values, const uint8_t* expr_values_null) const {
uint32_t hash = seeds_[level_];
int var_result_offset = expr_values_cache_.var_result_offset();
// Hash the non-var length portions (if there are any)
if (var_result_offset != 0) {
- hash = Hash(expr_values_cache_.cur_expr_values_, var_result_offset, hash);
+ hash = Hash(expr_values, var_result_offset, hash);
}
- uint8_t* exprs_nullness = expr_values_cache_.ExprValueNullPtr(0);
for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
- // non-string and null slots are already part of cur_expr_values_
- if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING &&
- build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue;
+ // non-string and null slots are already part of 'expr_values'.
+ if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING
+ && build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue;
- void* loc = expr_values_cache_.ExprValuePtr(i);
- if (exprs_nullness[i]) {
+ const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
+ if (expr_values_null[i]) {
// Hash the null random seed values at 'loc'
hash = Hash(loc, sizeof(StringValue), hash);
} else {
// Hash the string
// TODO: when using CRC hash on empty string, this only swaps bytes.
- StringValue* str = reinterpret_cast<StringValue*>(loc);
+ const StringValue* str = reinterpret_cast<const StringValue*>(loc);
hash = Hash(str->ptr, str->len, hash);
}
}
return hash;
}
-template<bool FORCE_NULL_EQUALITY>
-bool HashTableCtx::Equals(TupleRow* build_row) const {
- uint8_t* exprs_nullness = expr_values_cache_.ExprValueNullPtr(0);
+template <bool FORCE_NULL_EQUALITY>
+bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t*
expr_values,
+ const uint8_t* expr_values_null) const {
for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
void* val = build_expr_ctxs_[i]->GetValue(build_row);
if (val == NULL) {
if (!(FORCE_NULL_EQUALITY || finds_nulls_[i])) return false;
- if (!exprs_nullness[i]) return false;
+ if (!expr_values_null[i]) return false;
continue;
} else {
- if (exprs_nullness[i]) return false;
+ if (expr_values_null[i]) return false;
}
- void* loc = expr_values_cache_.ExprValuePtr(i);
+ const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
if (!RawValue::Eq(loc, val, build_expr_ctxs_[i]->root()->type())) {
return false;
}
@@ -231,19 +232,21 @@ bool HashTableCtx::Equals(TupleRow* build_row) const {
return true;
}
-template bool HashTableCtx::Equals<true>(TupleRow* build_row) const;
-template bool HashTableCtx::Equals<false>(TupleRow* build_row) const;
+template bool HashTableCtx::Equals<true>(const TupleRow* build_row,
+ const uint8_t* expr_values, const uint8_t* expr_values_null) const;
+template bool HashTableCtx::Equals<false>(const TupleRow* build_row,
+ const uint8_t* expr_values, const uint8_t* expr_values_null) const;
HashTableCtx::ExprValuesCache::ExprValuesCache()
- : capacity_(0),
- cur_expr_values_(NULL),
- cur_expr_values_null_(NULL),
- cur_expr_values_hash_(NULL),
- cur_expr_values_hash_end_(NULL),
- expr_values_array_(NULL),
- expr_values_null_array_(NULL),
- expr_values_hash_array_(NULL),
- null_bitmap_(0) { }
+ : capacity_(0),
+ cur_expr_values_(NULL),
+ cur_expr_values_null_(NULL),
+ cur_expr_values_hash_(NULL),
+ cur_expr_values_hash_end_(NULL),
+ expr_values_array_(NULL),
+ expr_values_null_array_(NULL),
+ expr_values_hash_array_(NULL),
+ null_bitmap_(0) {}
Status HashTableCtx::ExprValuesCache::Init(RuntimeState* state,
MemTracker* tracker, const std::vector<ExprContext*>& build_expr_ctxs) {
@@ -312,12 +315,14 @@ int HashTableCtx::ExprValuesCache::MemUsage(int capacity,
Bitmap::MemUsage(capacity); // null_bitmap_
}
-uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(int expr_idx) const {
- return cur_expr_values_ + expr_values_offsets_[expr_idx];
+uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(
+ uint8_t* expr_values, int expr_idx) const {
+ return expr_values + expr_values_offsets_[expr_idx];
}
-uint8_t* HashTableCtx::ExprValuesCache::ExprValueNullPtr(int expr_idx) const {
- return cur_expr_values_null_ + expr_idx;
+const uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(
+ const uint8_t* expr_values, int expr_idx) const {
+ return expr_values + expr_values_offsets_[expr_idx];
}
void HashTableCtx::ExprValuesCache::ResetIterators() {
@@ -609,65 +614,64 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
}
// Codegen for evaluating a tuple row over either build_expr_ctxs_ or
probe_expr_ctxs_.
-// For a group by with (big int, string) the IR looks like
+// For a group by with (big int, string) the IR looks like:
+//
// define i1 @EvalProbeRow(%"class.impala::HashTableCtx"* %this_ptr,
-// %"class.impala::TupleRow"* %row) #33 {
+// %"class.impala::TupleRow"* %row, i8* %expr_values, i8*
%expr_values_null) #34 {
// entry:
-// %0 = load i8*, i8** inttoptr (i64 230325056 to i8**)
-// %1 = load i8*, i8** inttoptr (i64 230325064 to i8**)
-// %loc_addr = getelementptr i8, i8* %0, i32 0
-// %loc = bitcast i8* %loc_addr to i32*
-// %result = call i64 @GetSlotRef.3(%"class.impala::ExprContext"*
-// inttoptr (i64 158123712 to %"class.impala::ExprContext"*),
-// %"class.impala::TupleRow"* %row)
-// %is_null = trunc i64 %result to i1
-// %2 = zext i1 %is_null to i8
-// %null_byte_loc = getelementptr i8, i8* %1, i32 0
-// store i8 %2, i8* %null_byte_loc
+// %loc_addr = getelementptr i8, i8* %expr_values, i32 0
+// %loc = bitcast i8* %loc_addr to i64*
+// %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"*
+// inttoptr (i64 197737664 to %"class.impala::ExprContext"*),
+// %"class.impala::TupleRow"* %row)
+// %0 = extractvalue { i8, i64 } %result, 0
+// %is_null = trunc i8 %0 to i1
+// %1 = zext i1 %is_null to i8
+// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0
+// store i8 %1, i8* %null_byte_loc
// br i1 %is_null, label %null, label %not_null
//
// null: ; preds = %entry
-// store i32 -2128831035, i32* %loc
+// store i64 2166136261, i64* %loc
// br label %continue
//
// not_null: ; preds = %entry
-// %3 = ashr i64 %result, 32
-// %4 = trunc i64 %3 to i32
-// store i32 %4, i32* %loc
+// %val = extractvalue { i8, i64 } %result, 1
+// store i64 %val, i64* %loc
// br label %continue
//
// continue: ; preds = %not_null, %null
// %is_null_phi = phi i1 [ true, %null ], [ false, %not_null ]
// %has_null = or i1 false, %is_null_phi
-// %loc_addr1 = getelementptr i8, i8* %0, i32 8
+// %loc_addr1 = getelementptr i8, i8* %expr_values, i32 8
// %loc2 = bitcast i8* %loc_addr1 to %"struct.impala::StringValue"*
-// %result6 = call { i64, i8* } @GetSlotRef.4(%"class.impala::ExprContext"*
-// inttoptr (i64 158123904 to %"class.impala::ExprContext"*),
-// %"class.impala::TupleRow"* %row)
-// %5 = extractvalue { i64, i8* } %result6, 0
-// %is_null7 = trunc i64 %5 to i1
-// %6 = zext i1 %is_null7 to i8
-// %null_byte_loc8 = getelementptr i8, i8* %1, i32 1
-// store i8 %6, i8* %null_byte_loc8
+// %result6 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"*
+// inttoptr (i64 197738048 to %"class.impala::ExprContext"*),
+// %"class.impala::TupleRow"* %row)
+// %2 = extractvalue { i64, i8* } %result6, 0
+// %is_null7 = trunc i64 %2 to i1
+// %3 = zext i1 %is_null7 to i8
+// %null_byte_loc8 = getelementptr i8, i8* %expr_values_null, i32 1
+// store i8 %3, i8* %null_byte_loc8
// br i1 %is_null7, label %null3, label %not_null4
//
// null3: ; preds = %continue
// %string_ptr = getelementptr inbounds %"struct.impala::StringValue",
-// %"struct.impala::StringValue"* %loc2, i32 0, i32 0
+// %"struct.impala::StringValue"* %loc2, i32 0, i32 0
// %string_len = getelementptr inbounds %"struct.impala::StringValue",
-// %"struct.impala::StringValue"* %loc2, i32 0, i32 1
+// %"struct.impala::StringValue"* %loc2, i32 0, i32 1
// store i8* inttoptr (i32 -2128831035 to i8*), i8** %string_ptr
// store i32 -2128831035, i32* %string_len
// br label %continue5
//
// not_null4: ; preds = %continue
+// %4 = extractvalue { i64, i8* } %result6, 0
+// %5 = ashr i64 %4, 32
+// %6 = trunc i64 %5 to i32
+// %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %6, 1
// %result9 = extractvalue { i64, i8* } %result6, 1
-// %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i8*
%result9, 0
-// %8 = extractvalue { i64, i8* } %result6, 0
-// %9 = ashr i64 %8, 32
-// %10 = trunc i64 %9 to i32
-// %11 = insertvalue %"struct.impala::StringValue" %7, i32 %10, 1
-// store %"struct.impala::StringValue" %11, %"struct.impala::StringValue"*
%loc2
+// %8 = insertvalue %"struct.impala::StringValue" %7, i8* %result9, 0
+// store %"struct.impala::StringValue" %8, %"struct.impala::StringValue"*
%loc2
// br label %continue5
//
// continue5: ; preds = %not_null4,
%null3
@@ -675,6 +679,7 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
// %has_null11 = or i1 %has_null, %is_null_phi10
// ret i1 %has_null11
// }
+//
// For each expr, we create 3 code blocks. The null, not null and continue
blocks.
// Both the null and not null branch into the continue block. The continue
block
// becomes the start of the next block for codegen (either the next expr or
just the
@@ -701,37 +706,31 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state,
bool build, Function**
Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
DCHECK(this_type != NULL);
PointerType* this_ptr_type = PointerType::get(this_type, 0);
- PointerType* buffer_ptr_type = PointerType::get(codegen->ptr_type(), 0);
LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" :
"EvalProbeRow",
codegen->GetType(TYPE_BOOLEAN));
prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+ prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values",
codegen->ptr_type()));
+ prototype.AddArgument(
+ LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));
LLVMContext& context = codegen->context();
LlvmCodeGen::LlvmBuilder builder(context);
- Value* args[2];
+ Value* args[4];
*fn = prototype.GeneratePrototype(&builder, args);
Value* row = args[1];
+ Value* expr_values = args[2];
+ Value* expr_values_null = args[3];
Value* has_null = codegen->false_value();
- // Load cur_expr_values_ into a LLVM pointer.
- Value* cur_expr_values_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
- &expr_values_cache_.cur_expr_values_);
- Value* cur_expr_values = builder.CreateLoad(cur_expr_values_ptr);
-
- // Load cur_expr_values_null_ into a LLVM pointer.
- Value* cur_expr_values_null_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
- &expr_values_cache_.cur_expr_values_null_);
- Value* cur_expr_values_null = builder.CreateLoad(cur_expr_values_null_ptr);
-
for (int i = 0; i < ctxs.size(); ++i) {
// TODO: refactor this to somewhere else? This is not hash table specific
except for
// the null handling bit and would be used for anyone that needs to
materialize a
// vector of exprs
// Convert result buffer to llvm ptr type
int offset = expr_values_cache_.expr_values_offsets(i);
- Value* loc = builder.CreateGEP(NULL, cur_expr_values,
- codegen->GetIntConstant(TYPE_INT, offset), "loc_addr");
+ Value* loc = builder.CreateGEP(
+ NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset),
"loc_addr");
Value* llvm_loc = builder.CreatePointerCast(loc,
codegen->GetPtrType(ctxs[i]->root()->type()), "loc");
@@ -758,8 +757,8 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state,
bool build, Function**
// Set null-byte result
Value* null_byte = builder.CreateZExt(is_null,
codegen->GetType(TYPE_TINYINT));
- Value* llvm_null_byte_loc = builder.CreateGEP(NULL, cur_expr_values_null,
- codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc");
+ Value* llvm_null_byte_loc = builder.CreateGEP(
+ NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i),
"null_byte_loc");
builder.CreateStore(null_byte, llvm_null_byte_loc);
builder.CreateCondBr(is_null, null_block, not_null_block);
@@ -800,15 +799,15 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state,
bool build, Function**
// Codegen for hashing the current row. In the case with both string and
non-string data
// (group by int_col, string_col), the IR looks like:
-// define i32 @HashCurrentRow(%"class.impala::HashTableCtx"* %this_ptr) #33 {
+//
+// define i32 @HashRow(%"class.impala::HashTableCtx"* %this_ptr, i8*
%expr_values,
+// i8* %expr_values_null) #34 {
// entry:
-// %0 = load i8*, i8** inttoptr (i64 230325056 to i8**)
-// %1 = load i8*, i8** inttoptr (i64 230325064 to i8**)
// %seed = call i32 @_ZNK6impala12HashTableCtx11GetHashSeedEv(
-// %"class.impala::HashTableCtx"* %this_ptr)
-// %hash = call i32 @CrcHash8(i8* %0, i32 8, i32 %seed)
-// %loc_addr = getelementptr i8, i8* %0, i32 8
-// %null_byte_loc = getelementptr i8, i8* %1, i32 1
+// %"class.impala::HashTableCtx"* %this_ptr)
+// %hash = call i32 @CrcHash8(i8* %expr_values, i32 8, i32 %seed)
+// %loc_addr = getelementptr i8, i8* %expr_values, i32 8
+// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 1
// %null_byte = load i8, i8* %null_byte_loc
// %is_null = icmp ne i8 %null_byte, 0
// br i1 %is_null, label %null, label %not_null
@@ -819,12 +818,12 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state,
bool build, Function**
//
// not_null: ; preds = %entry
// %str_val = bitcast i8* %loc_addr to %"struct.impala::StringValue"*
-// %2 = getelementptr inbounds %"struct.impala::StringValue",
+// %0 = getelementptr inbounds %"struct.impala::StringValue",
// %"struct.impala::StringValue"* %str_val, i32 0, i32 0
-// %3 = getelementptr inbounds %"struct.impala::StringValue",
+// %1 = getelementptr inbounds %"struct.impala::StringValue",
// %"struct.impala::StringValue"* %str_val, i32 0, i32 1
-// %ptr = load i8*, i8** %2
-// %len = load i32, i32* %3
+// %ptr = load i8*, i8** %0
+// %len = load i32, i32* %1
// %string_hash = call i32 @IrCrcHash(i8* %ptr, i32 %len, i32 %hash)
// br label %continue
//
@@ -832,12 +831,11 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state,
bool build, Function**
// %hash_phi = phi i32 [ %string_hash, %not_null ], [ %str_null, %null ]
// ret i32 %hash_phi
// }
-Status HashTableCtx::CodegenHashCurrentRow(RuntimeState* state, bool
use_murmur,
- Function** fn) {
+Status HashTableCtx::CodegenHashRow(RuntimeState* state, bool use_murmur,
Function** fn) {
for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
// Disable codegen for CHAR
if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) {
- return Status("HashTableCtx::CodegenHashCurrentRow(): CHAR NYI");
+ return Status("HashTableCtx::CodegenHashRow(): CHAR NYI");
}
}
@@ -848,30 +846,21 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState*
state, bool use_murmur,
Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
DCHECK(this_type != NULL);
PointerType* this_ptr_type = PointerType::get(this_type, 0);
- PointerType* buffer_ptr_type = PointerType::get(codegen->ptr_type(), 0);
- LlvmCodeGen::FnPrototype prototype(codegen,
- (use_murmur ? "MurmurHashCurrentRow" : "HashCurrentRow"),
- codegen->GetType(TYPE_INT));
+ LlvmCodeGen::FnPrototype prototype(
+ codegen, (use_murmur ? "MurmurHashRow" : "HashRow"),
codegen->GetType(TYPE_INT));
prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
+ prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values",
codegen->ptr_type()));
+ prototype.AddArgument(
+ LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));
LLVMContext& context = codegen->context();
LlvmCodeGen::LlvmBuilder builder(context);
- Value* this_arg;
- *fn = prototype.GeneratePrototype(&builder, &this_arg);
-
- // Load cur_expr_values_ into a LLVM pointer.
- Value* cur_expr_values_ptr =
- codegen->CastPtrToLlvmPtr(buffer_ptr_type,
&expr_values_cache_.cur_expr_values_);
- Value* cur_expr_values = builder.CreateLoad(cur_expr_values_ptr);
-
- // Load cur_expr_values_null_ into a LLVM pointer.
- Value* cur_expr_values_null = NULL;
- if (stores_nulls_) {
- Value* cur_expr_values_null_ptr = codegen->CastPtrToLlvmPtr(
- buffer_ptr_type, &expr_values_cache_.cur_expr_values_null_);
- cur_expr_values_null = builder.CreateLoad(cur_expr_values_null_ptr);
- }
+ Value* args[3];
+ *fn = prototype.GeneratePrototype(&builder, args);
+ Value* this_arg = args[0];
+ Value* expr_values = args[1];
+ Value* expr_values_null = args[2];
// Call GetHashSeed() to get seeds_[level_]
Function* get_hash_seed_fn =
@@ -889,8 +878,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState*
state, bool use_murmur,
codegen->GetMurmurHashFunction(expr_values_bytes_per_row) :
codegen->GetHashFunction(expr_values_bytes_per_row);
Value* len = codegen->GetIntConstant(TYPE_INT,
expr_values_bytes_per_row);
- hash_result = builder.CreateCall(hash_fn,
- ArrayRef<Value*>({cur_expr_values, len, hash_result}), "hash");
+ hash_result = builder.CreateCall(
+ hash_fn, ArrayRef<Value*>({expr_values, len, hash_result}), "hash");
}
} else {
if (var_result_offset > 0) {
@@ -898,8 +887,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState*
state, bool use_murmur,
codegen->GetMurmurHashFunction(var_result_offset) :
codegen->GetHashFunction(var_result_offset);
Value* len = codegen->GetIntConstant(TYPE_INT, var_result_offset);
- hash_result = builder.CreateCall(hash_fn,
- ArrayRef<Value*>({cur_expr_values, len, hash_result}), "hash");
+ hash_result = builder.CreateCall(
+ hash_fn, ArrayRef<Value*>({expr_values, len, hash_result}), "hash");
}
// Hash string slots
@@ -913,8 +902,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState*
state, bool use_murmur,
Value* str_null_result = NULL;
int offset = expr_values_cache_.expr_values_offsets(i);
- Value* llvm_loc = builder.CreateGEP(NULL, cur_expr_values,
- codegen->GetIntConstant(TYPE_INT, offset), "loc_addr");
+ Value* llvm_loc = builder.CreateGEP(
+ NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset),
"loc_addr");
// If the hash table stores nulls, we need to check if the stringval
// evaluated to NULL
@@ -923,7 +912,7 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState*
state, bool use_murmur,
not_null_block = BasicBlock::Create(context, "not_null", *fn);
continue_block = BasicBlock::Create(context, "continue", *fn);
- Value* llvm_null_byte_loc = builder.CreateGEP(NULL,
cur_expr_values_null,
+ Value* llvm_null_byte_loc = builder.CreateGEP(NULL, expr_values_null,
codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc");
Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte");
Value* is_null = builder.CreateICmpNE(null_byte,
@@ -977,8 +966,8 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState*
state, bool use_murmur,
builder.CreateRet(hash_result);
*fn = codegen->FinalizeFunction(*fn);
if (*fn == NULL) {
- return Status("Codegen'd HashTableCtx::HashCurrentRow() function failed "
- "verification, see log");
+ return Status(
+ "Codegen'd HashTableCtx::HashRow() function failed verification, see
log");
}
return Status::OK();
}
@@ -986,67 +975,67 @@ Status HashTableCtx::CodegenHashCurrentRow(RuntimeState*
state, bool use_murmur,
// Codegen for HashTableCtx::Equals. For a group by with (bigint, string),
// the IR looks like:
//
-// define i1 @Equals(%"class.impala::HashTableCtx"* %this_ptr,
-// %"class.impala::TupleRow"* %row) #33 {
+// define i1 @Equals(%"class.impala::HashTableCtx"* %this_ptr,
%"class.impala::TupleRow"*
+// %row,
+// i8* %expr_values, i8* %expr_values_null) #34 {
// entry:
// %0 = alloca { i64, i8* }
-// %1 = load i8*, i8** inttoptr (i64 230325056 to i8**)
-// %2 = load i8*, i8** inttoptr (i64 230325064 to i8**)
-// %result = call i64 @GetSlotRef(%"class.impala::ExprContext"* inttoptr
-// (i64 165557504 to %"class.impala::ExprContext"*),
-// %"class.impala::TupleRow"* %row)
-// %is_null = trunc i64 %result to i1
-// %null_byte_loc = getelementptr i8, i8* %2, i32 0
-// %3 = load i8, i8* %null_byte_loc
-// %4 = icmp ne i8 %3, 0
-// %loc = getelementptr i8, i8* %1, i32 0
-// %row_val = bitcast i8* %loc to i32*
+// %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"*
+// inttoptr (i64 139107136 to %"class.impala::ExprContext"*),
+// %"class.impala::TupleRow"* %row)
+// %1 = extractvalue { i8, i64 } %result, 0
+// %is_null = trunc i8 %1 to i1
+// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0
+// %2 = load i8, i8* %null_byte_loc
+// %3 = icmp ne i8 %2, 0
+// %loc = getelementptr i8, i8* %expr_values, i32 0
+// %row_val = bitcast i8* %loc to i64*
// br i1 %is_null, label %null, label %not_null
//
-// false_block: ; preds = %cmp9, %not_null2, %null1, %cmp,
%not_null, %null
+// false_block: ; preds = %cmp9,
%not_null2, %null1,
+// %cmp,
%not_null, %null
// ret i1 false
//
// null: ; preds = %entry
-// br i1 %4, label %continue, label %false_block
+// br i1 %3, label %continue, label %false_block
//
// not_null: ; preds = %entry
-// br i1 %4, label %false_block, label %cmp
+// br i1 %3, label %false_block, label %cmp
//
// continue: ; preds = %cmp, %null
-// %result4 = call { i64, i8* } @GetSlotRef.2(%"class.impala::ExprContext"*
-// inttoptr (i64 165557696 to %"class.impala::ExprContext"*),
-// %"class.impala::TupleRow"* %row)
-// %5 = extractvalue { i64, i8* } %result4, 0
-// %is_null5 = trunc i64 %5 to i1
-// %null_byte_loc6 = getelementptr i8, i8* %2, i32 1
-// %6 = load i8, i8* %null_byte_loc6
-// %7 = icmp ne i8 %6, 0
-// %loc7 = getelementptr i8, i8* %1, i32 8
+// %result4 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"*
+// inttoptr (i64 139107328 to %"class.impala::ExprContext"*),
+// %"class.impala::TupleRow"* %row)
+// %4 = extractvalue { i64, i8* } %result4, 0
+// %is_null5 = trunc i64 %4 to i1
+// %null_byte_loc6 = getelementptr i8, i8* %expr_values_null, i32 1
+// %5 = load i8, i8* %null_byte_loc6
+// %6 = icmp ne i8 %5, 0
+// %loc7 = getelementptr i8, i8* %expr_values, i32 8
// %row_val8 = bitcast i8* %loc7 to %"struct.impala::StringValue"*
// br i1 %is_null5, label %null1, label %not_null2
//
// cmp: ; preds = %not_null
-// %8 = load i32, i32* %row_val
-// %9 = ashr i64 %result, 32
-// %10 = trunc i64 %9 to i32
-// %cmp_raw = icmp eq i32 %10, %8
+// %7 = load i64, i64* %row_val
+// %val = extractvalue { i8, i64 } %result, 1
+// %cmp_raw = icmp eq i64 %val, %7
// br i1 %cmp_raw, label %continue, label %false_block
//
// null1: ; preds = %continue
-// br i1 %7, label %continue3, label %false_block
+// br i1 %6, label %continue3, label %false_block
//
// not_null2: ; preds = %continue
-// br i1 %7, label %false_block, label %cmp9
+// br i1 %6, label %false_block, label %cmp9
//
// continue3: ; preds = %cmp9, %null1
// ret i1 true
//
// cmp9: ; preds = %not_null2
// store { i64, i8* } %result4, { i64, i8* }* %0
-// %11 = bitcast { i64, i8* }* %0 to %"struct.impala_udf::StringVal"*
-// %cmp_raw10 = call i1 @_Z13StringValueEqRKN10impala_udf9StringValERKN6
-// impala11StringValueE(%"struct.impala_udf::StringVal"* %11,
-// %"struct.impala::StringValue"* %row_val8)
+// %8 = bitcast { i64, i8* }* %0 to %"struct.impala_udf::StringVal"*
+// %cmp_raw10 = call i1
+// @_Z13StringValueEqRKN10impala_udf9StringValERKN6impala11StringValueE(
+// %"struct.impala_udf::StringVal"* %8, %"struct.impala::StringValue"*
%row_val8)
// br i1 %cmp_raw10, label %continue3, label %false_block
// }
Status HashTableCtx::CodegenEquals(RuntimeState* state, bool
force_null_equality,
@@ -1068,26 +1057,20 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state,
bool force_null_equality
Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
DCHECK(this_type != NULL);
PointerType* this_ptr_type = PointerType::get(this_type, 0);
- PointerType* buffer_ptr_type = PointerType::get(codegen->ptr_type(), 0);
LlvmCodeGen::FnPrototype prototype(codegen, "Equals",
codegen->GetType(TYPE_BOOLEAN));
prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
+ prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values",
codegen->ptr_type()));
+ prototype.AddArgument(
+ LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));
LLVMContext& context = codegen->context();
LlvmCodeGen::LlvmBuilder builder(context);
- Value* args[2];
+ Value* args[4];
*fn = prototype.GeneratePrototype(&builder, args);
Value* row = args[1];
-
- // Load cur_expr_values_ into a LLVM pointer.
- Value* cur_expr_values_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
- &expr_values_cache_.cur_expr_values_);
- Value* cur_expr_values = builder.CreateLoad(cur_expr_values_ptr);
-
- // Load cur_expr_values_null_ into a LLVM pointer.
- Value* cur_expr_values_null_ptr = codegen->CastPtrToLlvmPtr(buffer_ptr_type,
- &expr_values_cache_.cur_expr_values_null_);
- Value* cur_expr_values_null = builder.CreateLoad(cur_expr_values_null_ptr);
+ Value* expr_values = args[2];
+ Value* expr_values_null = args[3];
BasicBlock* false_block = BasicBlock::Create(context, "false_block", *fn);
for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
@@ -1112,24 +1095,24 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state,
bool force_null_equality
build_expr_ctxs_[i]->root()->type(), expr_fn, expr_fn_args, "result");
Value* is_null = result.GetIsNull();
- // Determine if row is null (i.e. cur_expr_values_null_[i] == true). In
+ // Determine if row is null (i.e. expr_values_null[i] == true). In
// the case where the hash table does not store nulls, this is always
false.
Value* row_is_null = codegen->false_value();
// We consider null values equal if we are comparing build rows or if the
join
// predicate is <=>
if (force_null_equality || finds_nulls_[i]) {
- Value* llvm_null_byte_loc = builder.CreateGEP(NULL, cur_expr_values_null,
- codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc");
+ Value* llvm_null_byte_loc = builder.CreateGEP(
+ NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i),
"null_byte_loc");
Value* null_byte = builder.CreateLoad(llvm_null_byte_loc);
row_is_null = builder.CreateICmpNE(null_byte,
codegen->GetIntConstant(TYPE_TINYINT, 0));
}
- // Get llvm value for row_val from 'cur_expr_values_'
+ // Get llvm value for row_val from 'expr_values'
int offset = expr_values_cache_.expr_values_offsets(i);
- Value* loc = builder.CreateGEP(NULL, cur_expr_values,
- codegen->GetIntConstant(TYPE_INT, offset), "loc");
+ Value* loc = builder.CreateGEP(
+ NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc");
Value* row_val = builder.CreatePointerCast(loc,
codegen->GetPtrType(build_expr_ctxs_[i]->root()->type()), "row_val");
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h
index b549aef..58078ad 100644
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -155,12 +155,13 @@ class HashTableCtx {
/// TODO: this is an awkward abstraction but aggregation node can take
advantage of
/// it and save some expr evaluation calls.
void* ALWAYS_INLINE ExprValue(int expr_idx) const {
- return expr_values_cache_.ExprValuePtr(expr_idx);
+ return expr_values_cache_.ExprValuePtr(
+ expr_values_cache_.cur_expr_values(), expr_idx);
}
/// Returns if the expression at 'expr_idx' is evaluated to NULL for the
current row.
bool ALWAYS_INLINE ExprValueNull(int expr_idx) const {
- return static_cast<bool>(*expr_values_cache_.ExprValueNullPtr(expr_idx));
+ return static_cast<bool>(*(expr_values_cache_.cur_expr_values_null() +
expr_idx));
}
/// Evaluate and hash the build/probe row, saving the evaluation to the
current row of
@@ -170,27 +171,26 @@ class HashTableCtx {
/// row should be rejected (doesn't need to be processed further) because
it contains
/// NULL. These need to be inlined in the IR module so we can find and
replace the
/// calls to EvalBuildRow()/EvalProbeRow().
- bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row);
- bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row);
+ bool IR_ALWAYS_INLINE EvalAndHashBuild(const TupleRow* row);
+ bool IR_ALWAYS_INLINE EvalAndHashProbe(const TupleRow* row);
- /// Codegen for evaluating a tuple row. Codegen'd function matches the
signature
+ /// Codegen for evaluating a tuple row. Codegen'd function matches the
signature
/// for EvalBuildRow and EvalTupleRow.
/// If build_row is true, the codegen uses the build_exprs, otherwise the
probe_exprs.
Status CodegenEvalRow(RuntimeState* state, bool build_row, llvm::Function**
fn);
- /// Codegen for evaluating a TupleRow and comparing equality against
- /// 'cur_expr_values_'. Function signature matches HashTable::Equals().
- /// 'force_null_equality' is true if the generated equality function should
treat
- /// all NULLs as equal. See the template parameter to HashTable::Equals().
+ /// Codegen for evaluating a TupleRow and comparing equality. Function
signature
+ /// matches HashTable::Equals(). 'force_null_equality' is true if the
generated
+ /// equality function should treat all NULLs as equal. See the template
parameter
+ /// to HashTable::Equals().
Status CodegenEquals(RuntimeState* state, bool force_null_equality,
llvm::Function** fn);
- /// Codegen for hashing the expr values in 'cur_expr_values_'. Function
prototype
- /// matches HashCurrentRow identically. Unlike HashCurrentRow(), the
returned function
- /// only uses a single hash function, rather than switching based on level_.
- /// If 'use_murmur' is true, murmur hash is used, otherwise CRC is used if
the hardware
- /// supports it (see hash-util.h).
- Status CodegenHashCurrentRow(RuntimeState* state, bool use_murmur,
llvm::Function** fn);
+ /// Codegen for hashing expr values. Function prototype matches HashRow
identically.
+ /// Unlike HashRow(), the returned function only uses a single hash
function, rather
+ /// than switching based on level_. If 'use_murmur' is true, murmur hash is
used,
+ /// otherwise CRC is used if the hardware supports it (see hash-util.h).
+ Status CodegenHashRow(RuntimeState* state, bool use_murmur, llvm::Function**
fn);
/// Struct that returns the number of constants replaced by
ReplaceConstants().
struct HashTableReplacedConstants {
@@ -297,17 +297,24 @@ class HashTableCtx {
void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set<false>(CurIdx(), true);
}
/// Returns the hash values of the current row.
- uint32_t ALWAYS_INLINE ExprValuesHash() const { return
*cur_expr_values_hash_; }
+ uint32_t ALWAYS_INLINE CurExprValuesHash() const { return
*cur_expr_values_hash_; }
/// Sets the hash values for the current row.
- void ALWAYS_INLINE SetExprValuesHash(uint32_t hash) {
*cur_expr_values_hash_ = hash; }
+ void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) {
*cur_expr_values_hash_ = hash; }
- /// Returns a pointer to the expression value at 'expr_idx' for the
current row.
- uint8_t* ExprValuePtr(int expr_idx) const;
+ /// Returns a pointer to the expression value at 'expr_idx' in
'expr_values'.
+ uint8_t* ExprValuePtr(uint8_t* expr_values, int expr_idx) const;
+ const uint8_t* ExprValuePtr(const uint8_t* expr_values, int expr_idx)
const;
- /// Returns a pointer to the boolean indicating the nullness of the
expression value
- /// at 'expr_idx'.
- uint8_t* ExprValueNullPtr(int expr_idx) const;
+ /// Returns the current row's expression buffer. The expression values in
the buffer
+ /// are accessed using ExprValuePtr().
+ uint8_t* ALWAYS_INLINE cur_expr_values() const { return cur_expr_values_; }
+
+ /// Returns null indicator bytes for the current row, one per expression.
Non-zero
+ /// bytes mean NULL, zero bytes mean non-NULL. Indexed by the expression
index.
+ /// These are uint8_t instead of bool to simplify codegen with IRBuilder.
+ /// TODO: is there actually a valid reason why this is necessary for
codegen?
+ uint8_t* ALWAYS_INLINE cur_expr_values_null() const { return
cur_expr_values_null_; }
/// Returns the offset into the results buffer of the expression value at
'expr_idx'.
int ALWAYS_INLINE expr_values_offsets(int expr_idx) const {
@@ -393,46 +400,60 @@ class HashTableCtx {
/// null bits etc. Returns error if allocation causes query memory limit to
be exceeded.
Status Init(RuntimeState* state, int num_build_tuples);
- /// Compute the hash of the values in expr_values_buffer_.
+ /// Compute the hash of the values in 'expr_values' with nullness
'expr_values_null'.
/// This will be replaced by codegen. We don't want this inlined for
replacing
/// with codegen'd functions so the function name does not change.
- uint32_t IR_NO_INLINE HashCurrentRow() const;
+ uint32_t IR_NO_INLINE HashRow(
+ const uint8_t* expr_values, const uint8_t* expr_values_null) const;
/// Wrapper function for calling correct HashUtil function in non-codegen'd
case.
uint32_t Hash(const void* input, int len, uint32_t hash) const;
- /// Evaluate 'row' over build exprs caching the results in
'cur_expr_values_' This
- /// will be replaced by codegen. We do not want this function inlined when
cross
- /// compiled because we need to be able to differentiate between
EvalBuildRow and
- /// EvalProbeRow by name and the build/probe exprs are baked into the
codegen'd
- /// function.
- bool IR_NO_INLINE EvalBuildRow(TupleRow* row) {
- return EvalRow(row, build_expr_ctxs_);
+ /// Evaluate 'row' over build exprs, storing values into 'expr_values' and
nullness into
+ /// 'expr_values_null'. This will be replaced by codegen. We do not want this
+ /// function inlined when cross compiled because we need to be able to
differentiate
+ /// between EvalBuildRow and EvalProbeRow by name and the build/probe exprs
are baked
+ /// into the codegen'd function.
+ bool IR_NO_INLINE EvalBuildRow(
+ const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) {
+ return EvalRow(row, build_expr_ctxs_, expr_values, expr_values_null);
}
- /// Evaluate 'row' over probe exprs caching the results in 'cur_expr_values_'
- /// This will be replaced by codegen.
- bool IR_NO_INLINE EvalProbeRow(TupleRow* row) {
- return EvalRow(row, probe_expr_ctxs_);
+ /// Evaluate 'row' over probe exprs, storing the values into 'expr_values'
and nullness
+ /// into 'expr_values_null'. This will be replaced by codegen.
+ bool IR_NO_INLINE EvalProbeRow(
+ const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) {
+ return EvalRow(row, probe_expr_ctxs_, expr_values, expr_values_null);
}
- /// Compute the hash of the values in expr_values_buffer_ for rows with
variable length
- /// fields (e.g. strings).
- uint32_t HashVariableLenRow() const;
+ /// Compute the hash of the values in 'expr_values' with nullness
'expr_values_null'
+ /// for a row with variable length fields (e.g. strings).
+ uint32_t HashVariableLenRow(
+ const uint8_t* expr_values, const uint8_t* expr_values_null) const;
- /// Evaluate the exprs over row and cache the results in 'cur_expr_values_'.
- /// Returns whether any expr evaluated to NULL.
- /// This will be replaced by codegen.
- bool EvalRow(TupleRow* row, const std::vector<ExprContext*>& ctxs);
+ /// Evaluate the exprs over row, storing the values into 'expr_values' and
nullness into
+ /// 'expr_values_null'. Returns whether any expr evaluated to NULL. This
will be
+ /// replaced by codegen.
+ bool EvalRow(const TupleRow* row, const std::vector<ExprContext*>& ctxs,
+ uint8_t* expr_values, uint8_t* expr_values_null);
/// Returns true if the values of build_exprs evaluated over 'build_row'
equal the
- /// values cached in 'cur_expr_values_'. This will be replaced by codegen.
- /// FORCE_NULL_EQUALITY is true if all nulls should be treated as equal,
regardless
- /// of the values of 'finds_nulls_'.
- template<bool FORCE_NULL_EQUALITY>
- bool IR_NO_INLINE Equals(TupleRow* build_row) const;
+ /// values in 'expr_values' with nullness 'expr_values_null'.
FORCE_NULL_EQUALITY is
+ /// true if all nulls should be treated as equal, regardless of the values of
+ /// 'finds_nulls_'. This will be replaced by codegen.
+ template <bool FORCE_NULL_EQUALITY>
+ bool IR_NO_INLINE Equals(const TupleRow* build_row, const uint8_t*
expr_values,
+ const uint8_t* expr_values_null) const;
+
+ /// Helper function that calls Equals() with the current row. Always inlined
so that
+ /// it does not appear in cross-compiled IR.
+ template <bool FORCE_NULL_EQUALITY>
+ bool ALWAYS_INLINE Equals(const TupleRow* build_row) const {
+ return Equals<FORCE_NULL_EQUALITY>(build_row,
expr_values_cache_.cur_expr_values(),
+ expr_values_cache_.cur_expr_values_null());
+ }
- /// Cross-compiled function to access member variables used in
CodegenHashCurrentRow().
+ /// Cross-compiled function to access member variables used in
CodegenHashRow().
uint32_t GetHashSeed() const;
/// Functions to be replaced by codegen to specialize the hash table.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/hash-table.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.inline.h b/be/src/exec/hash-table.inline.h
index bed02b9..c266704 100644
--- a/be/src/exec/hash-table.inline.h
+++ b/be/src/exec/hash-table.inline.h
@@ -26,17 +26,21 @@
namespace impala {
-inline bool HashTableCtx::EvalAndHashBuild(TupleRow* row) {
- bool has_null = EvalBuildRow(row);
+inline bool HashTableCtx::EvalAndHashBuild(const TupleRow* row) {
+ uint8_t* expr_values = expr_values_cache_.cur_expr_values();
+ uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null();
+ bool has_null = EvalBuildRow(row, expr_values, expr_values_null);
if (!stores_nulls() && has_null) return false;
- expr_values_cache_.SetExprValuesHash(HashCurrentRow());
+ expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values,
expr_values_null));
return true;
}
-inline bool HashTableCtx::EvalAndHashProbe(TupleRow* row) {
- bool has_null = EvalProbeRow(row);
+inline bool HashTableCtx::EvalAndHashProbe(const TupleRow* row) {
+ uint8_t* expr_values = expr_values_cache_.cur_expr_values();
+ uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null();
+ bool has_null = EvalProbeRow(row, expr_values, expr_values_null);
if (has_null && !(stores_nulls() && finds_some_nulls())) return false;
- expr_values_cache_.SetExprValuesHash(HashCurrentRow());
+ expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values,
expr_values_null));
return true;
}
@@ -92,7 +96,7 @@ inline int64_t HashTable::Probe(Bucket* buckets, int64_t
num_buckets,
inline HashTable::HtData* HashTable::InsertInternal(HashTableCtx* ht_ctx) {
++num_probes_;
bool found = false;
- uint32_t hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+ uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
int64_t bucket_idx = Probe<true>(buckets_, num_buckets_, ht_ctx, hash,
&found);
DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND);
if (found) {
@@ -135,7 +139,7 @@ inline void HashTable::PrefetchBucket(uint32_t hash) {
inline HashTable::Iterator HashTable::FindProbeRow(HashTableCtx* ht_ctx) {
++num_probes_;
bool found = false;
- uint32_t hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+ uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
int64_t bucket_idx = Probe<false>(buckets_, num_buckets_, ht_ctx, hash,
&found);
if (found) {
return Iterator(this, ht_ctx->scratch_row(), bucket_idx,
@@ -148,7 +152,7 @@ inline HashTable::Iterator
HashTable::FindProbeRow(HashTableCtx* ht_ctx) {
inline HashTable::Iterator HashTable::FindBuildRowBucket(
HashTableCtx* ht_ctx, bool* found) {
++num_probes_;
- uint32_t hash = ht_ctx->expr_values_cache()->ExprValuesHash();
+ uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash();
int64_t bucket_idx = Probe<true>(buckets_, num_buckets_, ht_ctx, hash,
found);
DuplicateNode* duplicates = NULL;
if (stores_duplicates() && LIKELY(bucket_idx != Iterator::BUCKET_NOT_FOUND))
{
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-aggregation-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node-ir.cc
b/be/src/exec/partitioned-aggregation-node-ir.cc
index 5982b07..194f6c4 100644
--- a/be/src/exec/partitioned-aggregation-node-ir.cc
+++ b/be/src/exec/partitioned-aggregation-node-ir.cc
@@ -79,7 +79,7 @@ void IR_ALWAYS_INLINE
PartitionedAggregationNode::EvalAndHashPrefetchGroup(
is_null = !ht_ctx->EvalAndHashProbe(row);
}
// Hoist lookups out of non-null branch to speed up non-null case.
- const uint32_t hash = expr_vals_cache->ExprValuesHash();
+ const uint32_t hash = expr_vals_cache->CurExprValuesHash();
const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
HashTable* hash_tbl = GetHashTable(partition_idx);
if (is_null) {
@@ -98,7 +98,7 @@ Status PartitionedAggregationNode::ProcessRow(TupleRow*
__restrict__ row,
HashTableCtx* __restrict__ ht_ctx) {
HashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache();
// Hoist lookups out of non-null branch to speed up non-null case.
- const uint32_t hash = expr_vals_cache->ExprValuesHash();
+ const uint32_t hash = expr_vals_cache->CurExprValuesHash();
const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
if (expr_vals_cache->IsRowNull()) return Status::OK();
// To process this row, we first see if it can be aggregated or inserted
into this
@@ -188,7 +188,7 @@ Status
PartitionedAggregationNode::ProcessBatchStreaming(bool needs_serialize,
FOREACH_ROW_LIMIT(in_batch, group_start, cache_size, in_batch_iter) {
// Hoist lookups out of non-null branch to speed up non-null case.
TupleRow* in_row = in_batch_iter.Get();
- const uint32_t hash = expr_vals_cache->ExprValuesHash();
+ const uint32_t hash = expr_vals_cache->CurExprValuesHash();
const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
if (!expr_vals_cache->IsRowNull() &&
!TryAddToHashTable(ht_ctx, hash_partitions_[partition_idx],
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.cc
b/be/src/exec/partitioned-aggregation-node.cc
index 9c91b07..eb5addc 100644
--- a/be/src/exec/partitioned-aggregation-node.cc
+++ b/be/src/exec/partitioned-aggregation-node.cc
@@ -1814,8 +1814,7 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
// The codegen'd ProcessBatch function is only used in Open() with level_
= 0,
// so don't use murmur hash
Function* hash_fn;
- RETURN_IF_ERROR(ht_ctx_->CodegenHashCurrentRow(state_, /* use murmur */
false,
- &hash_fn));
+ RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state_, /* use murmur */ false,
&hash_fn));
// Codegen HashTable::Equals<true>
Function* build_equals_fn;
@@ -1830,7 +1829,7 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
"EvalProbeRow");
DCHECK_EQ(replaced, 1);
- replaced = codegen->ReplaceCallSites(process_batch_fn, hash_fn,
"HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(process_batch_fn, hash_fn, "HashRow");
DCHECK_EQ(replaced, 1);
replaced = codegen->ReplaceCallSites(process_batch_fn, build_equals_fn,
"Equals");
@@ -1888,7 +1887,7 @@ Status
PartitionedAggregationNode::CodegenProcessBatchStreaming() {
// We only use the top-level hash function for streaming aggregations.
Function* hash_fn;
- RETURN_IF_ERROR(ht_ctx_->CodegenHashCurrentRow(state_, false, &hash_fn));
+ RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state_, false, &hash_fn));
// Codegen HashTable::Equals
Function* equals_fn;
@@ -1907,8 +1906,7 @@ Status
PartitionedAggregationNode::CodegenProcessBatchStreaming() {
"EvalProbeRow");
DCHECK_EQ(replaced, 1);
- replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, hash_fn,
- "HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, hash_fn,
"HashRow");
DCHECK_EQ(replaced, 1);
replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, equals_fn,
"Equals");
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-hash-join-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node-ir.cc
b/be/src/exec/partitioned-hash-join-node-ir.cc
index fde1f69..bab1bf8 100644
--- a/be/src/exec/partitioned-hash-join-node-ir.cc
+++ b/be/src/exec/partitioned-hash-join-node-ir.cc
@@ -264,7 +264,7 @@ bool IR_ALWAYS_INLINE PartitionedHashJoinNode::NextProbeRow(
bool skip_row = false;
// The hash of the expressions results for the current probe row.
- uint32_t hash = expr_vals_cache->ExprValuesHash();
+ uint32_t hash = expr_vals_cache->CurExprValuesHash();
// Hoist the followings out of the else statement below to speed up
non-null case.
const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
HashTable* hash_tbl = hash_tbls_[partition_idx];
@@ -344,7 +344,7 @@ void IR_ALWAYS_INLINE
PartitionedHashJoinNode::EvalAndHashProbePrefetchGroup(
TupleRow* row = batch_iter.Get();
if (ht_ctx->EvalAndHashProbe(row)) {
if (prefetch_mode != TPrefetchMode::NONE) {
- uint32_t hash = expr_vals_cache->ExprValuesHash();
+ uint32_t hash = expr_vals_cache->CurExprValuesHash();
const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
HashTable* hash_tbl = hash_tbls_[partition_idx];
if (LIKELY(hash_tbl != NULL)) hash_tbl->PrefetchBucket<true>(hash);
@@ -461,7 +461,7 @@ Status PartitionedHashJoinNode::ProcessBuildBatch(RowBatch*
build_batch,
ctx.local_bloom_filter->Insert(filter_hash);
}
}
- const uint32_t hash = expr_vals_cache->ExprValuesHash();
+ const uint32_t hash = expr_vals_cache->CurExprValuesHash();
const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS);
Partition* partition = hash_partitions_[partition_idx];
const bool result = AppendRow(partition->build_rows(), build_row,
&build_status_);
@@ -485,7 +485,7 @@ bool PartitionedHashJoinNode::Partition::InsertBatch(
FOREACH_ROW_LIMIT(batch, cur_row, prefetch_size, batch_iter) {
if (ht_ctx->EvalAndHashBuild(batch_iter.Get())) {
if (prefetch_mode != TPrefetchMode::NONE) {
- hash_tbl_->PrefetchBucket<false>(expr_vals_cache->ExprValuesHash());
+
hash_tbl_->PrefetchBucket<false>(expr_vals_cache->CurExprValuesHash());
}
} else {
expr_vals_cache->SetRowNull();
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/01b8ade6/be/src/exec/partitioned-hash-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node.cc
b/be/src/exec/partitioned-hash-join-node.cc
index d424fe6..f634193 100644
--- a/be/src/exec/partitioned-hash-join-node.cc
+++ b/be/src/exec/partitioned-hash-join-node.cc
@@ -207,10 +207,9 @@ Status PartitionedHashJoinNode::Prepare(RuntimeState*
state) {
if (state->codegen_enabled()) {
// Codegen for hashing rows
Function* hash_fn;
- codegen_status = ht_ctx_->CodegenHashCurrentRow(state, false, &hash_fn);
+ codegen_status = ht_ctx_->CodegenHashRow(state, false, &hash_fn);
Function* murmur_hash_fn;
- codegen_status.MergeStatus(
- ht_ctx_->CodegenHashCurrentRow(state, true, &murmur_hash_fn));
+ codegen_status.MergeStatus(ht_ctx_->CodegenHashRow(state, true,
&murmur_hash_fn));
// Codegen for evaluating build rows
Function* eval_build_row_fn;
@@ -1685,13 +1684,11 @@ Status
PartitionedHashJoinNode::CodegenProcessBuildBatch(RuntimeState* state,
ConstantInt::get(Type::getInt1Ty(codegen->context()), filters_.size() >
0));
// process_build_batch_fn_level0 uses CRC hash if available,
- replaced = codegen->ReplaceCallSites(process_build_batch_fn_level0, hash_fn,
- "HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(process_build_batch_fn_level0, hash_fn,
"HashRow");
DCHECK_EQ(replaced, 1);
// process_build_batch_fn uses murmur
- replaced = codegen->ReplaceCallSites(process_build_batch_fn, murmur_hash_fn,
- "HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(process_build_batch_fn, murmur_hash_fn,
"HashRow");
DCHECK_EQ(replaced, 1);
// Never build filters after repartitioning, as all rows have already been
added to the
@@ -1769,11 +1766,6 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
DCHECK(process_probe_batch_fn->getLinkage() == GlobalValue::WeakODRLinkage)
<< LlvmCodeGen::Print(process_probe_batch_fn);
- // Bake in %this pointer argument to process_probe_batch_fn.
- Value* this_arg = codegen->GetArgument(process_probe_batch_fn, 0);
- Value* this_loc = codegen->CastPtrToLlvmPtr(this_arg->getType(), this);
- this_arg->replaceAllUsesWith(this_loc);
-
// Replace the parameter 'prefetch_mode' with constant.
Value* prefetch_mode_arg = codegen->GetArgument(process_probe_batch_fn, 1);
TPrefetchMode::type prefetch_mode = state->query_options().prefetch_mode;
@@ -1782,11 +1774,6 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
prefetch_mode_arg->replaceAllUsesWith(
ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode));
- // Bake in %ht_ctx pointer argument to process_probe_batch_fn
- Value* ht_ctx_arg = codegen->GetArgument(process_probe_batch_fn, 3);
- Value* ht_ctx_loc = codegen->CastPtrToLlvmPtr(ht_ctx_arg->getType(),
ht_ctx_.get());
- ht_ctx_arg->replaceAllUsesWith(ht_ctx_loc);
-
// Codegen HashTable::Equals
Function* probe_equals_fn;
RETURN_IF_ERROR(ht_ctx_->CodegenEquals(state, false, &probe_equals_fn));
@@ -1868,12 +1855,10 @@ Status
PartitionedHashJoinNode::CodegenProcessProbeBatch(
// process_probe_batch_fn_level0 uses CRC hash if available,
// process_probe_batch_fn uses murmur
- replaced = codegen->ReplaceCallSites(process_probe_batch_fn_level0, hash_fn,
- "HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(process_probe_batch_fn_level0, hash_fn,
"HashRow");
DCHECK_EQ(replaced, 1);
- replaced = codegen->ReplaceCallSites(process_probe_batch_fn, murmur_hash_fn,
- "HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(process_probe_batch_fn, murmur_hash_fn,
"HashRow");
DCHECK_EQ(replaced, 1);
// Finalize ProcessProbeBatch functions
@@ -1937,9 +1922,9 @@ Status
PartitionedHashJoinNode::CodegenInsertBatch(RuntimeState* state,
Function* insert_batch_fn_level0 = codegen->CloneFunction(insert_batch_fn);
// Use codegen'd hash functions
- replaced = codegen->ReplaceCallSites(insert_batch_fn_level0, hash_fn,
"HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(insert_batch_fn_level0, hash_fn,
"HashRow");
DCHECK_EQ(replaced, 1);
- replaced = codegen->ReplaceCallSites(insert_batch_fn, murmur_hash_fn,
"HashCurrentRow");
+ replaced = codegen->ReplaceCallSites(insert_batch_fn, murmur_hash_fn,
"HashRow");
DCHECK_EQ(replaced, 1);
insert_batch_fn = codegen->FinalizeFunction(insert_batch_fn);