IMPALA-3884: Support TYPE_TIMESTAMP for HashTableCtx::CodegenAssignNullValue()
This change implements support for TYPE_TIMESTAMP for HashTableCtx::CodegenAssignNullValue(). TimestampValue itself is 16 bytes in size. To match RawValue::Write() in the interpreted path, CodegenAssignNullValue() emits code to assign HashUtil::FNV_SEED to both the upper and lower 64-bit of the destination value. This change also fixes the handling of 128-bit Decimal16Value in CodegenAssignNullValue() so the emitted code matches the behavior of the interpreted path. Change-Id: I0211d38cbef46331e0006fa5ed0680e6e0867bc8 Reviewed-on: http://gerrit.cloudera.org:8080/4794 Reviewed-by: Michael Ho <[email protected]> Tested-by: Michael Ho <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/13455b5a Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/13455b5a Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/13455b5a Branch: refs/heads/master Commit: 13455b5a24a9d4d009d1dd0d72944c6cacd54829 Parents: aa7741a Author: Michael Ho <[email protected]> Authored: Fri Oct 21 15:00:56 2016 -0700 Committer: Michael Ho <[email protected]> Committed: Tue Oct 25 05:52:33 2016 +0000 ---------------------------------------------------------------------- be/src/codegen/codegen-anyval.cc | 8 ++--- be/src/codegen/llvm-codegen.cc | 10 ++++-- be/src/codegen/llvm-codegen.h | 9 +++-- be/src/exec/hash-table.cc | 36 ++++++++++++-------- be/src/exec/old-hash-table.cc | 32 +++++++++++------ be/src/util/bit-util.h | 4 +++ .../queries/QueryTest/joins.test | 10 ++++++ 7 files changed, 72 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/codegen/codegen-anyval.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc index a2df356..57c08b5 100644 --- a/be/src/codegen/codegen-anyval.cc +++ b/be/src/codegen/codegen-anyval.cc @@ -380,12 +380,8 @@ void CodegenAnyVal::SetVal(int64_t val) { void CodegenAnyVal::SetVal(int128_t val) { DCHECK_EQ(type_.type, TYPE_DECIMAL); - // TODO: is there a better way to do this? - // Set high bits - Value* ir_val = ConstantInt::get(codegen_->i128_type(), HighBits(val)); - ir_val = builder_->CreateShl(ir_val, 64, "tmp"); - // Set low bits - ir_val = builder_->CreateOr(ir_val, LowBits(val), "tmp"); + vector<uint64_t> vals({LowBits(val), HighBits(val)}); + Value* ir_val = ConstantInt::get(codegen_->context(), APInt(128, vals)); SetVal(ir_val); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/codegen/llvm-codegen.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc index d43ad6e..30cc7d0 100644 --- a/be/src/codegen/llvm-codegen.cc +++ b/be/src/codegen/llvm-codegen.cc @@ -564,7 +564,7 @@ Value* LlvmCodeGen::CastPtrToLlvmPtr(Type* type, const void* ptr) { return ConstantExpr::getIntToPtr(const_int, type); } -Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, int64_t val) { +Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, uint64_t val) { switch (type) { case TYPE_TINYINT: return ConstantInt::get(context(), APInt(8, val)); @@ -580,8 +580,12 @@ Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, int64_t val) { } } -Value* LlvmCodeGen::GetIntConstant(int num_bytes, int64_t val) { - return ConstantInt::get(context(), APInt(8 * num_bytes, val)); +Value* LlvmCodeGen::GetIntConstant(int num_bytes, uint64_t low_bits, uint64_t high_bits) { + DCHECK_GE(num_bytes, 1); + DCHECK_LE(num_bytes, 16); + DCHECK(BitUtil::IsPowerOf2(num_bytes)); + vector<uint64_t> vals({low_bits, high_bits}); + return ConstantInt::get(context(), APInt(8 * num_bytes, vals)); } AllocaInst* LlvmCodeGen::CreateEntryBlockAlloca(Function* f, const NamedVariable& var) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/codegen/llvm-codegen.h ---------------------------------------------------------------------- diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h index 0b81701..602dc5d 100644 --- a/be/src/codegen/llvm-codegen.h +++ b/be/src/codegen/llvm-codegen.h @@ -380,10 +380,13 @@ class LlvmCodeGen { llvm::Value* CastPtrToLlvmPtr(llvm::Type* type, const void* ptr); /// Returns the constant 'val' of 'type'. - llvm::Value* GetIntConstant(PrimitiveType type, int64_t val); + llvm::Value* GetIntConstant(PrimitiveType type, uint64_t val); - /// Returns the constant 'val' of the int type of size 'byte_size'. - llvm::Value* GetIntConstant(int byte_size, int64_t val); + /// Returns a constant int of 'byte_size' bytes based on 'low_bits' and 'high_bits' + /// which stand for the lower and upper 64-bits of the constant respectively. For + /// values less than or equal to 64-bits, 'high_bits' is not used. This function + /// can generate constant up to 128-bit wide. 'byte_size' must be power of 2. + llvm::Value* GetIntConstant(int byte_size, uint64_t low_bits, uint64_t high_bits); /// Returns true/false constants (bool type) llvm::Value* true_value() { return true_value_; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/exec/hash-table.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc index c39e9e9..74fc534 100644 --- a/be/src/exec/hash-table.cc +++ b/be/src/exec/hash-table.cc @@ -569,41 +569,51 @@ string HashTable::PrintStats() const { // we'll pick a more random value. static void CodegenAssignNullValue(LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder, Value* dst, const ColumnType& type) { - int64_t fvn_seed = HashUtil::FNV_SEED; + uint64_t fnv_seed = HashUtil::FNV_SEED; if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) { Value* dst_ptr = builder->CreateStructGEP(NULL, dst, 0, "string_ptr"); Value* dst_len = builder->CreateStructGEP(NULL, dst, 1, "string_len"); - Value* null_len = codegen->GetIntConstant(TYPE_INT, fvn_seed); + Value* null_len = codegen->GetIntConstant(TYPE_INT, fnv_seed); Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); builder->CreateStore(null_ptr, dst_ptr); builder->CreateStore(null_len, dst_len); } else { Value* null_value = NULL; - // Get a type specific representation of fvn_seed + int byte_size = type.GetByteSize(); + // Get a type specific representation of fnv_seed switch (type.type) { case TYPE_BOOLEAN: // In results, booleans are stored as 1 byte dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->GetIntConstant(TYPE_TINYINT, fvn_seed); + null_value = codegen->GetIntConstant(TYPE_TINYINT, fnv_seed); break; + case TYPE_TIMESTAMP: { + // Cast 'dst' to 'i128*' + DCHECK_EQ(byte_size, 16); + PointerType* fnv_seed_ptr_type = + codegen->GetPtrType(Type::getIntNTy(codegen->context(), byte_size * 8)); + dst = builder->CreateBitCast(dst, fnv_seed_ptr_type); + null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); + break; + } case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: case TYPE_BIGINT: case TYPE_DECIMAL: - null_value = codegen->GetIntConstant(type.GetByteSize(), fvn_seed); + null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); break; case TYPE_FLOAT: { // Don't care about the value, just the bit pattern - float fvn_seed_float = *reinterpret_cast<float*>(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_float)); + float fnv_seed_float = *reinterpret_cast<float*>(&fnv_seed); + null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_float)); break; } case TYPE_DOUBLE: { // Don't care about the value, just the bit pattern - double fvn_seed_double = *reinterpret_cast<double*>(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_double)); + double fnv_seed_double = *reinterpret_cast<double*>(&fnv_seed); + null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_double)); break; } default: @@ -685,13 +695,11 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen, // becomes the start of the next block for codegen (either the next expr or just the // end of the function). Status HashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build, Function** fn) { - // TODO: CodegenAssignNullValue() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet const vector<ExprContext*>& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_; for (int i = 0; i < ctxs.size(); ++i) { - PrimitiveType type = ctxs[i]->root()->type().type; - if (type == TYPE_TIMESTAMP || type == TYPE_CHAR) { - return Status(Substitute("HashTableCtx::CodegenEvalRow(): type $0 NYI", - TypeToString(type))); + // Disable codegen for CHAR + if (ctxs[i]->root()->type().type == TYPE_CHAR) { + return Status("HashTableCtx::CodegenEvalRow(): CHAR NYI"); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/exec/old-hash-table.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/old-hash-table.cc b/be/src/exec/old-hash-table.cc index db89782..f51bc74 100644 --- a/be/src/exec/old-hash-table.cc +++ b/be/src/exec/old-hash-table.cc @@ -184,41 +184,52 @@ int OldHashTable::AddBloomFilters() { // we'll pick a more random value. static void CodegenAssignNullValue(LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder, Value* dst, const ColumnType& type) { - int64_t fvn_seed = HashUtil::FNV_SEED; + uint64_t fnv_seed = HashUtil::FNV_SEED; if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) { Value* dst_ptr = builder->CreateStructGEP(NULL, dst, 0, "string_ptr"); Value* dst_len = builder->CreateStructGEP(NULL, dst, 1, "string_len"); - Value* null_len = codegen->GetIntConstant(TYPE_INT, fvn_seed); + Value* null_len = codegen->GetIntConstant(TYPE_INT, fnv_seed); Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); builder->CreateStore(null_ptr, dst_ptr); builder->CreateStore(null_len, dst_len); return; } else { Value* null_value = NULL; - // Get a type specific representation of fvn_seed + int byte_size = type.GetByteSize(); + // Get a type specific representation of fnv_seed switch (type.type) { case TYPE_BOOLEAN: // In results, booleans are stored as 1 byte dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->GetIntConstant(TYPE_TINYINT, fvn_seed); + null_value = codegen->GetIntConstant(TYPE_TINYINT, fnv_seed); break; + case TYPE_TIMESTAMP: { + // Cast 'dst' to 'i128*' + DCHECK_EQ(byte_size, 16); + PointerType* fnv_seed_ptr_type = + codegen->GetPtrType(Type::getIntNTy(codegen->context(), byte_size * 8)); + dst = builder->CreateBitCast(dst, fnv_seed_ptr_type); + null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); + break; + } case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: case TYPE_BIGINT: - null_value = codegen->GetIntConstant(type.type, fvn_seed); + case TYPE_DECIMAL: + null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); break; case TYPE_FLOAT: { // Don't care about the value, just the bit pattern - float fvn_seed_float = *reinterpret_cast<float*>(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_float)); + float fnv_seed_float = *reinterpret_cast<float*>(&fnv_seed); + null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_float)); break; } case TYPE_DOUBLE: { // Don't care about the value, just the bit pattern - double fvn_seed_double = *reinterpret_cast<double*>(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_double)); + double fnv_seed_double = *reinterpret_cast<double*>(&fnv_seed); + null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_double)); break; } default: @@ -256,11 +267,10 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen, // becomes the start of the next block for codegen (either the next expr or just the // end of the function). Function* OldHashTable::CodegenEvalTupleRow(LlvmCodeGen* codegen, bool build) { - // TODO: CodegenAssignNullValue() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet const vector<ExprContext*>& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_; for (int i = 0; i < ctxs.size(); ++i) { PrimitiveType type = ctxs[i]->root()->type().type; - if (type == TYPE_TIMESTAMP || type == TYPE_DECIMAL || type == TYPE_CHAR) return NULL; + if (type == TYPE_CHAR) return NULL; } // Get types to generate function prototype http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/util/bit-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h index 33dd02b..9a5c4a4 100644 --- a/be/src/util/bit-util.h +++ b/be/src/util/bit-util.h @@ -82,6 +82,10 @@ class BitUtil { return value & ~(factor - 1); } + constexpr static inline bool IsPowerOf2(int64_t value) { + return (value & (value - 1)) == 0; + } + /// Specialized round up and down functions for frequently used factors, /// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64). /// Returns the rounded up number of bytes that fit the number of bits. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/testdata/workloads/functional-query/queries/QueryTest/joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test b/testdata/workloads/functional-query/queries/QueryTest/joins.test index ebb6287..db915df 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/joins.test +++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test @@ -720,3 +720,13 @@ on extract(minute from t1.timestamp_col) = extract(hour from t2.timestamp_col); ---- TYPES BIGINT ==== +---- QUERY +# Regression for IMPALA-3884. Exercise HashTableCtx::AssignNullValue() for +# 128-bit TimestampValue. +select count(*) from functional.alltypes t1 right outer join functional.decimal_tbl t2 on +t1.timestamp_col = cast(t2.d4 as TIMESTAMP); +---- RESULTS +5 +---- TYPES +BIGINT +====
