IMPALA-2548: Codegen Tuple::MaterializeExprs() and use in TopN node For the following benchmark query: select count(*) from (select l_orderkey from biglineitem order by l_orderkey limit 1000) a
The overall query time goes from 2.74s to 1.74s, with the top-n node time going from 2.2s to 1.0s. There is no effect on sort node time. The overall approach of this patch is to move the TopNNode::InsertTupleRow() call into a cross-compiled batched function (InsertBatch()), and then replace the MaterializeExprs() calls with new functions built using the IRBuilder. This involves new codegen utilities, such as CodegenAnyVal::WriteToSlot() and the ability to hardcode in a MemPool pointer from which to make varlen data allocations. This patch also adds a new timer measuring the time spent inserting tuple rows. The existing TestQueries::test_top_n and TestQueries::test_sort tests pass with this patch. Change-Id: Ib422a8d50303c21c6a228675157bf867e8619444 Reviewed-on: http://gerrit.cloudera.org:8080/1901 Reviewed-by: Skye Wanderman-Milne <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/8e8df2f2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/8e8df2f2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/8e8df2f2 Branch: refs/heads/master Commit: 8e8df2f2f60f72cf05bf7b9f8a1c5e2839691a87 Parents: da49a37 Author: Skye Wanderman-Milne <[email protected]> Authored: Wed May 11 18:22:50 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Thu May 12 14:18:03 2016 -0700 ---------------------------------------------------------------------- be/src/codegen/codegen-anyval.cc | 76 ++++++++++- be/src/codegen/codegen-anyval.h | 25 +++- be/src/codegen/gen_ir_descriptions.py | 3 + be/src/codegen/impala-ir.cc | 2 + be/src/codegen/llvm-codegen.cc | 51 ++++--- be/src/codegen/llvm-codegen.h | 16 ++- be/src/exec/CMakeLists.txt | 1 + be/src/exec/topn-node-ir.cc | 49 +++++++ be/src/exec/topn-node.cc | 75 +++++++---- be/src/exec/topn-node.h | 17 ++- be/src/runtime/descriptors.cc | 2 + be/src/runtime/descriptors.h | 2 + be/src/runtime/mem-pool.h | 4 + be/src/runtime/raw-value.cc | 3 + be/src/runtime/sorter.cc | 2 +- be/src/runtime/tuple.cc | 207 ++++++++++++++++++++++++++--- be/src/runtime/tuple.h | 81 +++++++++-- 17 files changed, 519 insertions(+), 97 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/codegen/codegen-anyval.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc index 0eb8094..5466c8e 100644 --- a/be/src/codegen/codegen-anyval.cc +++ b/be/src/codegen/codegen-anyval.cc @@ -151,8 +151,8 @@ Value* CodegenAnyVal::CreateCall( CodegenAnyVal CodegenAnyVal::CreateCallWrapped( LlvmCodeGen* cg, LlvmCodeGen::LlvmBuilder* builder, const ColumnType& type, - Function* fn, ArrayRef<Value*> args, const char* name, Value* result_ptr) { - Value* v = CreateCall(cg, builder, fn, args, name, result_ptr); + Function* fn, ArrayRef<Value*> args, const char* name) { + Value* v = CreateCall(cg, builder, fn, args, name); return CodegenAnyVal(cg, builder, type, v, name); } @@ -514,15 +514,24 @@ void CodegenAnyVal::SetFromRawValue(Value* raw_val) { } } -Value* CodegenAnyVal::ToNativeValue() { +Value* CodegenAnyVal::ToNativeValue(MemPool* pool) { Type* raw_type = codegen_->GetType(type_); Value* raw_val = Constant::getNullValue(raw_type); switch (type_.type) { case TYPE_STRING: case TYPE_VARCHAR: { // Convert StringVal to StringValue - raw_val = builder_->CreateInsertValue(raw_val, GetPtr(), 0); - raw_val = builder_->CreateInsertValue(raw_val, GetLen(), 1); + Value* len = GetLen(); + raw_val = builder_->CreateInsertValue(raw_val, len, 1); + if (pool == NULL) { + // Set raw_val.ptr from this->ptr + raw_val = builder_->CreateInsertValue(raw_val, GetPtr(), 0); + } else { + // Allocate raw_val.ptr from 'pool' and copy this->ptr + Value* new_ptr = codegen_->CodegenAllocate(builder_, pool, len, "new_ptr"); + codegen_->CodegenMemcpy(builder_, new_ptr, GetPtr(), len); + raw_val = builder_->CreateInsertValue(raw_val, new_ptr, 0); + } break; } case TYPE_TIMESTAMP: { @@ -554,8 +563,8 @@ Value* CodegenAnyVal::ToNativeValue() { return raw_val; } -Value* CodegenAnyVal::ToNativePtr(Value* native_ptr) { - Value* v = ToNativeValue(); +Value* CodegenAnyVal::ToNativePtr(Value* native_ptr, MemPool* pool) { + Value* v = ToNativeValue(pool); if (native_ptr == NULL) { native_ptr = codegen_->CreateEntryBlockAlloca(*builder_, v->getType()); } @@ -563,6 +572,59 @@ Value* CodegenAnyVal::ToNativePtr(Value* native_ptr) { return native_ptr; } +// Example output for materializing an int slot: +// +// ; [insert point starts here] +// %is_null = trunc i64 %src to i1 +// br i1 %is_null, label %null, label %non_null ; +// +// non_null: ; preds = %entry +// %slot = getelementptr inbounds { i8, i32, %"struct.impala::StringValue" }* %tuple, +// i32 0, i32 1 +// %2 = ashr i64 %src, 32 +// %3 = trunc i64 %2 to i32 +// store i32 %3, i32* %slot +// br label %end_write +// +// null: ; preds = %entry +// call void @SetNull6({ i8, i32, %"struct.impala::StringValue" }* %tuple) +// br label %end_write +// +// end_write: ; preds = %null, %non_null +// ; [insert point ends here] +void CodegenAnyVal::WriteToSlot(const SlotDescriptor& slot_desc, Value* tuple, + MemPool* pool, BasicBlock* insert_before) { + DCHECK(tuple->getType()->isPointerTy()); + DCHECK(tuple->getType()->getPointerElementType()->isStructTy()); + LLVMContext& context = codegen_->context(); + Function* fn = builder_->GetInsertBlock()->getParent(); + + // Create new block that will come after conditional blocks if necessary + if (insert_before == NULL) insert_before = BasicBlock::Create(context, "end_write", fn); + + // Create new basic blocks and br instruction + BasicBlock* non_null_block = BasicBlock::Create(context, "non_null", fn, insert_before); + BasicBlock* null_block = BasicBlock::Create(context, "null", fn, insert_before); + builder_->CreateCondBr(GetIsNull(), null_block, non_null_block); + + // Non-null block: write slot + builder_->SetInsertPoint(non_null_block); + Value* slot = builder_->CreateStructGEP(NULL, tuple, slot_desc.llvm_field_idx(), + "slot"); + ToNativePtr(slot, pool); + builder_->CreateBr(insert_before); + + // Null block: set null bit + builder_->SetInsertPoint(null_block); + Function* set_null_fn = slot_desc.GetUpdateNullFn(codegen_, true); + DCHECK(set_null_fn != NULL); + builder_->CreateCall(set_null_fn, tuple); + builder_->CreateBr(insert_before); + + // Leave builder_ after conditional blocks + builder_->SetInsertPoint(insert_before); +} + Value* CodegenAnyVal::Eq(CodegenAnyVal* other) { DCHECK_EQ(type_, other->type_); switch (type_.type) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/codegen/codegen-anyval.h ---------------------------------------------------------------------- diff --git a/be/src/codegen/codegen-anyval.h b/be/src/codegen/codegen-anyval.h index 67e84e7..cafe522 100644 --- a/be/src/codegen/codegen-anyval.h +++ b/be/src/codegen/codegen-anyval.h @@ -80,8 +80,7 @@ class CodegenAnyVal { /// Same as above but wraps the result in a CodegenAnyVal. static CodegenAnyVal CreateCallWrapped(LlvmCodeGen* cg, LlvmCodeGen::LlvmBuilder* builder, const ColumnType& type, llvm::Function* fn, - llvm::ArrayRef<llvm::Value*> args, const char* name = "", - llvm::Value* result_ptr = NULL); + llvm::ArrayRef<llvm::Value*> args, const char* name = ""); /// Returns the lowered AnyVal type associated with 'type'. /// E.g.: TYPE_BOOLEAN (which corresponds to a BooleanVal) => i16 @@ -191,13 +190,31 @@ class CodegenAnyVal { /// Converts this *Val's value to a native type, StringValue, TimestampValue, etc. /// This should only be used if this *Val is not null. - llvm::Value* ToNativeValue(); + /// + /// If 'pool' is non-NULL, var-len data will be copied into 'pool'. + llvm::Value* ToNativeValue(MemPool* pool = NULL); /// Sets 'native_ptr' to this *Val's value. If non-NULL, 'native_ptr' should be a /// pointer to a native type, StringValue, TimestampValue, etc. If NULL, a pointer is /// alloca'd. In either case the pointer is returned. This should only be used if this /// *Val is not null. - llvm::Value* ToNativePtr(llvm::Value* native_ptr = NULL); + /// + /// If 'pool' is non-NULL, var-len data will be copied into 'pool'. + llvm::Value* ToNativePtr(llvm::Value* native_ptr = NULL, MemPool* pool = NULL); + + /// Writes this *Val's value to the appropriate slot in 'tuple' if non-null, or sets the + /// appropriate null bit if null. This assumes null bits are initialized to 0. Analogous + /// to RawValue::Write(void* value, Tuple*, SlotDescriptor*, MemPool*). 'tuple' should + /// be a pointer to the generated LLVM struct type, not an opaque Tuple*. + /// + /// Creates new basic blocks in order to branch on the 'is_null' fields, and leaves + /// builder_'s insert point at the block after these new blocks. This block will be + /// 'insert_before' if specified, or a new basic block created at the end of the + /// function if 'insert_before' is NULL. + /// + /// If 'pool' is non-NULL, var-len data will be copied into 'pool'. + void WriteToSlot(const SlotDescriptor& slot_desc, llvm::Value* tuple, + MemPool* pool = NULL, llvm::BasicBlock* insert_before = NULL); /// Returns the i1 result of this == other. this and other must be non-null. llvm::Value* Eq(CodegenAnyVal* other); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/codegen/gen_ir_descriptions.py ---------------------------------------------------------------------- diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py index eb9d16e..71fb1a7 100755 --- a/be/src/codegen/gen_ir_descriptions.py +++ b/be/src/codegen/gen_ir_descriptions.py @@ -114,6 +114,9 @@ ir_functions = [ ["IS_NULL_STRING", "IrIsNullString"], ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"], ["RAW_VALUE_COMPARE", "8RawValue7Compare"], + ["TOPN_NODE_INSERT_BATCH", "TopNNode11InsertBatch"], + ["MEMPOOL_ALLOCATE", "MemPool8AllocateILb0"], + ["MEMPOOL_CHECKED_ALLOCATE", "MemPool8AllocateILb1"], ] enums_preamble = '\ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/codegen/impala-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/impala-ir.cc b/be/src/codegen/impala-ir.cc index 7f4faba..c419efd 100644 --- a/be/src/codegen/impala-ir.cc +++ b/be/src/codegen/impala-ir.cc @@ -26,6 +26,7 @@ #include "exec/hdfs-scanner-ir.cc" #include "exec/partitioned-aggregation-node-ir.cc" #include "exec/partitioned-hash-join-node-ir.cc" +#include "exec/topn-node-ir.cc" #include "exprs/aggregate-functions-ir.cc" #include "exprs/cast-functions-ir.cc" #include "exprs/compound-predicates-ir.cc" @@ -42,6 +43,7 @@ #include "exprs/timestamp-functions-ir.cc" #include "exprs/udf-builtins-ir.cc" #include "exprs/utility-functions-ir.cc" +#include "runtime/mem-pool.h" #include "runtime/raw-value-ir.cc" #include "udf/udf-ir.cc" #include "util/hash-util-ir.cc" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/codegen/llvm-codegen.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc index bbfd419..2b3e245 100644 --- a/be/src/codegen/llvm-codegen.cc +++ b/be/src/codegen/llvm-codegen.cc @@ -954,26 +954,39 @@ Status LlvmCodeGen::LoadIntrinsics() { void LlvmCodeGen::CodegenMemcpy(LlvmBuilder* builder, Value* dst, Value* src, int size) { DCHECK_GE(size, 0); if (size == 0) return; + Value* size_val = GetIntConstant(TYPE_BIGINT, size); + CodegenMemcpy(builder, dst, src, size_val); +} - // Cast src/dst to int8_t*. If they already are, this will get optimized away - DCHECK(PointerType::classof(dst->getType())); - DCHECK(PointerType::classof(src->getType())); - dst = builder->CreateBitCast(dst, ptr_type()); - src = builder->CreateBitCast(src, ptr_type()); - - // Get intrinsic function. - Function* memcpy_fn = llvm_intrinsics_[Intrinsic::memcpy]; - DCHECK(memcpy_fn != NULL); - - // The fourth argument is the alignment. For non-zero values, the caller - // must guarantee that the src and dst values are aligned to that byte boundary. - // TODO: We should try to take advantage of this since our tuples are well aligned. - Value* args[] = { - dst, src, GetIntConstant(TYPE_INT, size), - GetIntConstant(TYPE_INT, 0), - false_value() // is_volatile. - }; - builder->CreateCall(memcpy_fn, args); +void LlvmCodeGen::CodegenMemcpy(LlvmBuilder* builder, Value* dst, Value* src, + Value* size) { + DCHECK(dst->getType()->isPointerTy()) << Print(dst); + DCHECK(src->getType()->isPointerTy()) << Print(src); + builder->CreateMemCpy(dst, src, size, /* no alignment */ 0); +} + +void LlvmCodeGen::CodegenMemset(LlvmBuilder* builder, Value* dst, int value, int size) { + DCHECK(dst->getType()->isPointerTy()) << Print(dst); + DCHECK_GE(size, 0); + if (size == 0) return; + Value* value_const = GetIntConstant(TYPE_TINYINT, value); + builder->CreateMemSet(dst, value_const, size, /* no alignment */ 0); +} + +Value* LlvmCodeGen::CodegenAllocate(LlvmBuilder* builder, MemPool* pool, Value* size, + const char* name) { + DCHECK(pool != NULL); + DCHECK(size->getType()->isIntegerTy()); + DCHECK_LE(size->getType()->getIntegerBitWidth(), 64); + // Extend 'size' to i64 if necessary + if (size->getType()->getIntegerBitWidth() < 64) { + size = builder->CreateSExt(size, bigint_type()); + } + Function* allocate_fn = GetFunction(IRFunction::MEMPOOL_ALLOCATE, false); + PointerType* pool_type = GetPtrType(MemPool::LLVM_CLASS_NAME); + Value* pool_val = CastPtrToLlvmPtr(pool_type, pool); + Value* fn_args[] = { pool_val, size }; + return builder->CreateCall(allocate_fn, fn_args, name); } Value* LlvmCodeGen::CodegenArrayAt(LlvmBuilder* builder, Value* array, int idx, http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/codegen/llvm-codegen.h ---------------------------------------------------------------------- diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h index dcb6cc6..b4b5223 100644 --- a/be/src/codegen/llvm-codegen.h +++ b/be/src/codegen/llvm-codegen.h @@ -249,8 +249,8 @@ class LlvmCodeGen { /// false, the module will not be optimized before compilation. Status FinalizeModule(); - /// Replaces all instructions in 'caller' that call 'target_name' with a call instruction - /// to 'new_fn'. Returns the number of call sites updated. + /// Replaces all instructions in 'caller' that call 'target_name' with a call + /// instruction to 'new_fn'. Returns the number of call sites updated. /// /// 'target_name' must be a substring of the mangled symbol of the function to be /// replaced. This usually means that the unmangled function name is sufficient. @@ -386,7 +386,17 @@ class LlvmCodeGen { /// Codegen to call llvm memcpy intrinsic at the current builder location /// dst & src must be pointer types. size is the number of bytes to copy. /// No-op if size is zero. - void CodegenMemcpy(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, int size); + void CodegenMemcpy(LlvmBuilder* builder, llvm::Value* dst, llvm::Value* src, int size); + void CodegenMemcpy(LlvmBuilder* builder, llvm::Value* dst, llvm::Value* src, + llvm::Value* size); + + /// Codegen to call llvm memset intrinsic at the current builder location. 'dst' should + /// be a pointer. No-op if size is zero. + void CodegenMemset(LlvmBuilder* builder, llvm::Value* dst, int value, int size); + + /// Codegen to call pool->Allocate(size). + llvm::Value* CodegenAllocate(LlvmBuilder* builder, MemPool* pool, llvm::Value* size, + const char* name = ""); /// Codegens IR to load array[idx] and returns the loaded value. 'array' should be a /// C-style array (e.g. i32*) or an IR array (e.g. [10 x i32]). This function does not http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/exec/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index af86306..c3208fc 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -80,6 +80,7 @@ add_library(Exec subplan-node.cc text-converter.cc topn-node.cc + topn-node-ir.cc union-node.cc unnest-node.cc ) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/exec/topn-node-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/topn-node-ir.cc b/be/src/exec/topn-node-ir.cc new file mode 100644 index 0000000..e56090e --- /dev/null +++ b/be/src/exec/topn-node-ir.cc @@ -0,0 +1,49 @@ +// Copyright 2016 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "exec/topn-node.h" + +using namespace impala; + +void TopNNode::InsertBatch(RowBatch* batch) { + for (int i = 0; i < batch->num_rows(); ++i) { + InsertTupleRow(batch->GetRow(i)); + } +} + +// Insert if either not at the limit or it's a new TopN tuple_row +void TopNNode::InsertTupleRow(TupleRow* input_row) { + Tuple* insert_tuple = NULL; + + if (priority_queue_->size() < limit_ + offset_) { + insert_tuple = reinterpret_cast<Tuple*>( + tuple_pool_->Allocate(materialized_tuple_desc_->byte_size())); + insert_tuple->MaterializeExprs<false, false>(input_row, *materialized_tuple_desc_, + sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), tuple_pool_.get()); + } else { + DCHECK(!priority_queue_->empty()); + Tuple* top_tuple = priority_queue_->top(); + tmp_tuple_->MaterializeExprs<false, true>(input_row, *materialized_tuple_desc_, + sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), NULL); + if (tuple_row_less_than_->Less(tmp_tuple_, top_tuple)) { + // TODO: DeepCopy() will allocate new buffers for the string data. This needs + // to be fixed to use a freelist + tmp_tuple_->DeepCopy(top_tuple, *materialized_tuple_desc_, tuple_pool_.get()); + insert_tuple = top_tuple; + priority_queue_->pop(); + } + } + + if (insert_tuple != NULL) priority_queue_->push(insert_tuple); +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/exec/topn-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc index a0ad825..a73c3b5 100644 --- a/be/src/exec/topn-node.cc +++ b/be/src/exec/topn-node.cc @@ -16,6 +16,7 @@ #include <sstream> +#include "codegen/llvm-codegen.h" #include "exprs/expr.h" #include "runtime/descriptors.h" #include "runtime/mem-pool.h" @@ -33,6 +34,7 @@ using std::priority_queue; using namespace impala; +using namespace llvm; TopNNode::TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), @@ -41,6 +43,7 @@ TopNNode::TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl tuple_row_less_than_(NULL), tmp_tuple_(NULL), tuple_pool_(NULL), + codegend_insert_batch_fn_(NULL), num_rows_skipped_(0), priority_queue_(NULL) { } @@ -57,10 +60,44 @@ Status TopNNode::Init(const TPlanNode& tnode, RuntimeState* state) { return Status::OK(); } +Status TopNNode::Codegen(RuntimeState* state) { + DCHECK(materialized_tuple_desc_ != NULL); + LlvmCodeGen* codegen; + RETURN_IF_ERROR(state->GetCodegen(&codegen)); + Function* insert_batch_fn = + codegen->GetFunction(IRFunction::TOPN_NODE_INSERT_BATCH, true); + + // Generate two MaterializeExprs() functions, one using tuple_pool_ and one with no + // pool. + Function* materialize_exprs_tuple_pool_fn; + RETURN_IF_ERROR(Tuple::CodegenMaterializeExprs(state, false, *materialized_tuple_desc_, + sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), tuple_pool_.get(), + &materialize_exprs_tuple_pool_fn)); + + Function* materialize_exprs_no_pool_fn; + RETURN_IF_ERROR(Tuple::CodegenMaterializeExprs(state, false, *materialized_tuple_desc_, + sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), NULL, &materialize_exprs_no_pool_fn)); + + int replaced = codegen->ReplaceCallSites(insert_batch_fn, + materialize_exprs_tuple_pool_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL); + DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn); + + replaced = codegen->ReplaceCallSites(insert_batch_fn, materialize_exprs_no_pool_fn, + Tuple::MATERIALIZE_EXPRS_NULL_POOL_SYMBOL); + DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn); + + insert_batch_fn = codegen->FinalizeFunction(insert_batch_fn); + DCHECK(insert_batch_fn != NULL); + codegen->AddFunctionToJit(insert_batch_fn, + reinterpret_cast<void**>(&codegend_insert_batch_fn_)); + return Status::OK(); +} + Status TopNNode::Prepare(RuntimeState* state) { SCOPED_TIMER(runtime_profile_->total_time_counter()); RETURN_IF_ERROR(ExecNode::Prepare(state)); tuple_pool_.reset(new MemPool(mem_tracker())); + materialized_tuple_desc_ = row_descriptor_.tuple_descriptors()[0]; RETURN_IF_ERROR(sort_exec_exprs_.Prepare( state, child(0)->row_desc(), row_descriptor_, expr_mem_tracker())); AddExprCtxsToFree(sort_exec_exprs_); @@ -69,13 +106,16 @@ Status TopNNode::Prepare(RuntimeState* state) { bool codegen_enabled = false; Status codegen_status; if (state->codegen_enabled()) { + // TODO: inline tuple_row_less_than_->Compare() codegen_status = tuple_row_less_than_->Codegen(state); + codegen_status.MergeStatus(Codegen(state)); codegen_enabled = codegen_status.ok(); } AddCodegenExecOption(codegen_enabled, codegen_status); priority_queue_.reset(new priority_queue<Tuple*, vector<Tuple*>, ComparatorWrapper<TupleRowComparator> >(*tuple_row_less_than_)); materialized_tuple_desc_ = row_descriptor_.tuple_descriptors()[0]; + insert_batch_timer_ = ADD_TIMER(runtime_profile(), "InsertBatchTime"); return Status::OK(); } @@ -99,8 +139,13 @@ Status TopNNode::Open(RuntimeState* state) { do { batch.Reset(); RETURN_IF_ERROR(child(0)->GetNext(state, &batch, &eos)); - for (int i = 0; i < batch.num_rows(); ++i) { - InsertTupleRow(batch.GetRow(i)); + { + SCOPED_TIMER(insert_batch_timer_); + if (codegend_insert_batch_fn_ != NULL) { + codegend_insert_batch_fn_(this, &batch); + } else { + InsertBatch(&batch); + } } RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(QueryMaintenance(state)); @@ -161,32 +206,6 @@ void TopNNode::Close(RuntimeState* state) { ExecNode::Close(state); } -// Insert if either not at the limit or it's a new TopN tuple_row -void TopNNode::InsertTupleRow(TupleRow* input_row) { - Tuple* insert_tuple = NULL; - - if (priority_queue_->size() < limit_ + offset_) { - insert_tuple = reinterpret_cast<Tuple*>( - tuple_pool_->Allocate(materialized_tuple_desc_->byte_size())); - insert_tuple->MaterializeExprs<false>(input_row, *materialized_tuple_desc_, - sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), tuple_pool_.get()); - } else { - DCHECK(!priority_queue_->empty()); - Tuple* top_tuple = priority_queue_->top(); - tmp_tuple_->MaterializeExprs<false>(input_row, *materialized_tuple_desc_, - sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), NULL); - if (tuple_row_less_than_->Less(tmp_tuple_, top_tuple)) { - // TODO: DeepCopy() will allocate new buffers for the string data. This needs - // to be fixed to use a freelist - tmp_tuple_->DeepCopy(top_tuple, *materialized_tuple_desc_, tuple_pool_.get()); - insert_tuple = top_tuple; - priority_queue_->pop(); - } - } - - if (insert_tuple != NULL) priority_queue_->push(insert_tuple); -} - // Reverse the order of the tuples in the priority queue void TopNNode::PrepareForOutput() { sorted_top_n_.resize(priority_queue_->size()); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/exec/topn-node.h ---------------------------------------------------------------------- diff --git a/be/src/exec/topn-node.h b/be/src/exec/topn-node.h index a9e0bd9..29c89e0 100644 --- a/be/src/exec/topn-node.h +++ b/be/src/exec/topn-node.h @@ -19,6 +19,7 @@ #include <queue> #include <boost/scoped_ptr.hpp> +#include "codegen/impala-ir.h" #include "exec/exec-node.h" #include "exec/sort-exec-exprs.h" #include "runtime/descriptors.h" // for TupleId @@ -53,9 +54,15 @@ class TopNNode : public ExecNode { friend class TupleLessThan; + /// Creates a codegen'd version of InsertBatch() that is used in Open(). + Status Codegen(RuntimeState* state); + + /// Inserts all the rows in 'batch' into the queue. + void InsertBatch(RowBatch* batch); + /// Inserts a tuple row into the priority queue if it's in the TopN. Creates a deep /// copy of tuple_row, which it stores in tuple_pool_. - void InsertTupleRow(TupleRow* tuple_row); + void IR_ALWAYS_INLINE InsertTupleRow(TupleRow* tuple_row); /// Flatten and reverse the priority queue. void PrepareForOutput(); @@ -86,9 +93,15 @@ class TopNNode : public ExecNode { /// Stores everything referenced in priority_queue_. boost::scoped_ptr<MemPool> tuple_pool_; - // Iterator over elements in sorted_top_n_. + /// Iterator over elements in sorted_top_n_. std::vector<Tuple*>::iterator get_next_iter_; + typedef void (*InsertBatchFn)(TopNNode*, RowBatch*); + InsertBatchFn codegend_insert_batch_fn_; + + /// Timer for time spent in InsertBatch() function (or codegen'd version) + RuntimeProfile::Counter* insert_batch_timer_; + ///////////////////////////////////////// /// BEGIN: Members that must be Reset() http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/runtime/descriptors.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc index dd45a3e..387d377 100644 --- a/be/src/runtime/descriptors.cc +++ b/be/src/runtime/descriptors.cc @@ -60,6 +60,8 @@ namespace impala { const int RowDescriptor::INVALID_IDX; +const char* TupleDescriptor::LLVM_CLASS_NAME = "class.impala::TupleDescriptor"; + string NullIndicatorOffset::DebugString() const { stringstream out; out << "(offset=" << byte_offset http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/runtime/descriptors.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index 45b4183..18d835c 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -395,6 +395,8 @@ class TupleDescriptor { /// The resulting struct definition is cached. llvm::StructType* GetLlvmStruct(LlvmCodeGen* codegen) const; + static const char* LLVM_CLASS_NAME; + protected: friend class DescriptorTbl; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/runtime/mem-pool.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/mem-pool.h b/be/src/runtime/mem-pool.h index 5079447..9e22e9f 100644 --- a/be/src/runtime/mem-pool.h +++ b/be/src/runtime/mem-pool.h @@ -229,6 +229,10 @@ class MemPool { } }; +// Stamp out templated implementations here so they're included in IR module +template uint8_t* MemPool::Allocate<false>(int64_t size); +template uint8_t* MemPool::Allocate<true>(int64_t size); + } #endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/runtime/raw-value.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc index 2874ee1..b8be788 100644 --- a/be/src/runtime/raw-value.cc +++ b/be/src/runtime/raw-value.cc @@ -151,6 +151,9 @@ void RawValue::Write(const void* value, void* dst, const ColumnType& type, dest->len = src->len; if (type.type == TYPE_VARCHAR) DCHECK_LE(dest->len, type.len); if (pool != NULL) { + // Note: if this changes to TryAllocate(), CodegenAnyVal::WriteToSlot() will need + // to reflect this change as well (the codegen'd Allocate() call is actually + // generated in CodegenAnyVal::ToNativeValue()). dest->ptr = reinterpret_cast<char*>(pool->Allocate(dest->len)); memcpy(dest->ptr, src->ptr, dest->len); } else { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/runtime/sorter.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc index 4640374..96c27df 100644 --- a/be/src/runtime/sorter.cc +++ b/be/src/runtime/sorter.cc @@ -454,7 +454,7 @@ Status Sorter::Run::AddBatch(RowBatch* batch, int start_index, int* num_processe TupleRow* input_row = batch->GetRow(cur_input_index); Tuple* new_tuple = cur_fixed_len_block->Allocate<Tuple>(sort_tuple_size_); if (materialize_slots_) { - new_tuple->MaterializeExprs<has_var_len_data>(input_row, *sort_tuple_desc_, + new_tuple->MaterializeExprs<has_var_len_data, true>(input_row, *sort_tuple_desc_, sorter_->sort_tuple_slot_expr_ctxs_, NULL, &string_values, &total_var_len); if (total_var_len > sorter_->block_mgr_->max_block_size()) { return Status(ErrorMsg(TErrorCode::INTERNAL_ERROR, Substitute( http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/runtime/tuple.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc index c2fda93..4142241 100644 --- a/be/src/runtime/tuple.cc +++ b/be/src/runtime/tuple.cc @@ -15,22 +15,31 @@ #include "runtime/tuple.h" #include <vector> +#include "llvm/IR/Function.h" +#include "codegen/codegen-anyval.h" +#include "codegen/llvm-codegen.h" #include "exprs/expr.h" #include "exprs/expr-context.h" #include "runtime/collection-value.h" #include "runtime/descriptors.h" #include "runtime/mem-pool.h" #include "runtime/raw-value.h" -#include "runtime/tuple-row.h" +#include "runtime/runtime-state.h" #include "runtime/string-value.h" +#include "runtime/tuple-row.h" #include "util/debug-util.h" #include "common/names.h" +using namespace llvm; + namespace impala { - const char* Tuple::LLVM_CLASS_NAME = "class.impala::Tuple"; +const char* Tuple::LLVM_CLASS_NAME = "class.impala::Tuple"; + +const char* Tuple::MATERIALIZE_EXPRS_SYMBOL = "MaterializeExprsILb0ELb0"; +const char* Tuple::MATERIALIZE_EXPRS_NULL_POOL_SYMBOL = "MaterializeExprsILb0ELb1"; int64_t Tuple::TotalByteSize(const TupleDescriptor& desc) const { int64_t result = desc.byte_size(); @@ -189,18 +198,13 @@ void Tuple::ConvertOffsetsToPointers(const TupleDescriptor& desc, uint8_t* tuple } } -template <bool collect_string_vals> +template <bool COLLECT_STRING_VALS, bool NO_POOL> void Tuple::MaterializeExprs( - TupleRow* row, const TupleDescriptor& desc, - const vector<ExprContext*>& materialize_expr_ctxs, MemPool* pool, - vector<StringValue*>* non_null_string_values, int* total_string) { - DCHECK_EQ(materialize_expr_ctxs.size(), desc.slots().size()); - if (collect_string_vals) { - non_null_string_values->clear(); - *total_string = 0; - } + TupleRow* row, const TupleDescriptor& desc, ExprContext* const* materialize_expr_ctxs, + MemPool* pool, StringValue** non_null_string_values, int* total_string_lengths, + int* num_non_null_string_values) { memset(this, 0, desc.num_null_bytes()); - // Evaluate the output_slot_exprs and place the results in the tuples. + // Evaluate the materialize_expr_ctxs and place the results in the tuple. for (int i = 0; i < desc.slots().size(); ++i) { SlotDescriptor* slot_desc = desc.slots()[i]; // The FE ensures we don't get any TYPE_NULL expressions by picking an arbitrary type @@ -212,10 +216,11 @@ void Tuple::MaterializeExprs( if (src != NULL) { void* dst = GetSlot(slot_desc->tuple_offset()); RawValue::Write(src, dst, slot_desc->type(), pool); - if (collect_string_vals && slot_desc->type().IsVarLenStringType()) { + if (COLLECT_STRING_VALS && slot_desc->type().IsVarLenStringType()) { StringValue* string_val = reinterpret_cast<StringValue*>(dst); - non_null_string_values->push_back(string_val); - *total_string += string_val->len; + *(non_null_string_values++) = string_val; + *total_string_lengths += string_val->len; + ++(*num_non_null_string_values); } } else { SetNull(slot_desc->null_indicator_offset()); @@ -223,11 +228,171 @@ void Tuple::MaterializeExprs( } } -template void Tuple::MaterializeExprs<false>(TupleRow* row, const TupleDescriptor& desc, - const vector<ExprContext*>& materialize_expr_ctxs, MemPool* pool, - vector<StringValue*>* non_null_var_values, int* total_var_len); +// Codegens an unrolled version of MaterializeExprs(). Uses codegen'd exprs and slot +// writes. If 'pool' is non-NULL, string data is copied into it. Note that the generated +// function ignores its 'pool' arg; instead we hardcode the pointer in the IR. +// +// Example IR for materializing an int column and a string column with non-NULL 'pool': +// +// ; Function Attrs: alwaysinline +// define void @MaterializeExprs(%"class.impala::Tuple"* %opaque_tuple, +// %"class.impala::TupleRow"* %row, %"class.impala::TupleDescriptor"* %desc, +// %"class.impala::ExprContext"** %materialize_expr_ctxs, +// %"class.impala::MemPool"* %pool, +// %"struct.impala::StringValue"** %non_null_string_values, +// i32* %total_string_lengths) #20 { +// entry: +// %tuple = bitcast %"class.impala::Tuple"* %opaque_tuple to +// { i8, i32, %"struct.impala::StringValue" }* +// %0 = bitcast { i8, i32, %"struct.impala::StringValue" }* %tuple to i8* +// call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 1, i32 0, i1 false) +// %1 = getelementptr %"class.impala::ExprContext"** %materialize_expr_ctxs, i32 0 +// %expr_ctx = load %"class.impala::ExprContext"** %1 +// %src = call i64 @GetSlotRef4(%"class.impala::ExprContext"* %expr_ctx, +// %"class.impala::TupleRow"* %row) +// ; ----- generated by CodegenAnyVal::WriteToSlot() ---------------------------------- +// %is_null = trunc i64 %src to i1 +// br i1 %is_null, label %null, label %non_null +// +// non_null: ; preds = %entry +// %slot = getelementptr inbounds { i8, i32, %"struct.impala::StringValue" }* %tuple, +// i32 0, i32 1 +// %2 = ashr i64 %src, 32 +// %3 = trunc i64 %2 to i32 +// store i32 %3, i32* %slot +// br label %end_write +// +// null: ; preds = %entry +// call void @SetNull6({ i8, i32, %"struct.impala::StringValue" }* %tuple) +// br label %end_write +// +// end_write: ; preds = %null, %non_null +// ; ----- end CodegenAnyVal::WriteToSlot() ------------------------------------------- +// %4 = getelementptr %"class.impala::ExprContext"** %materialize_expr_ctxs, i32 1 +// %expr_ctx1 = load %"class.impala::ExprContext"** %4 +// %src2 = call { i64, i8* } @GetSlotRef5(%"class.impala::ExprContext"* %expr_ctx1, +// %"class.impala::TupleRow"* %row) +// ; ----- generated by CodegenAnyVal::WriteToSlot() ---------------------------------- +// %5 = extractvalue { i64, i8* } %src2, 0 +// %is_null5 = trunc i64 %5 to i1 +// br i1 %is_null5, label %null4, label %non_null3 +// +// non_null3: ; preds = %end_write +// %slot7 = getelementptr inbounds { i8, i32, %"struct.impala::StringValue" }* %tuple, +// i32 0, i32 2 +// %6 = extractvalue { i64, i8* } %src2, 0 +// %7 = ashr i64 %6, 32 +// %8 = trunc i64 %7 to i32 +// %9 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %8, 1 +// %new_ptr = call i8* @_ZN6impala7MemPool8AllocateILb0EEEPhi( +// %"class.impala::MemPool"* inttoptr (i64 159661008 to %"class.impala::MemPool"*), +// i32 %8) +// %src8 = extractvalue { i64, i8* } %src2, 1 +// call void @llvm.memcpy.p0i8.p0i8.i32(i8* %new_ptr, i8* %src8, i32 %8, i32 0, +// i1 false) +// %10 = insertvalue %"struct.impala::StringValue" %9, i8* %new_ptr, 0 +// store %"struct.impala::StringValue" %10, %"struct.impala::StringValue"* %slot7 +// br label %end_write6 +// +// null4: ; preds = %end_write +// call void @SetNull7({ i8, i32, %"struct.impala::StringValue" }* %tuple) +// br label %end_write6 +// +// end_write6: ; preds = %null4, %non_null3 +// ; ----- end CodegenAnyVal::WriteToSlot() ------------------------------------------- +// ret void +// } +Status Tuple::CodegenMaterializeExprs(RuntimeState* state, bool collect_string_vals, + const TupleDescriptor& desc, const vector<ExprContext*>& materialize_expr_ctxs, + MemPool* pool, Function** fn) { + DCHECK(!collect_string_vals) << "CodegenMaterializeExprs: collect_string_vals NYI"; + LlvmCodeGen* codegen; + RETURN_IF_ERROR(state->GetCodegen(&codegen)); + SCOPED_TIMER(codegen->codegen_timer()); + LLVMContext& context = codegen->context(); + + // Codegen each compute function from materialize_expr_ctxs + Function* materialize_expr_fns[materialize_expr_ctxs.size()]; + for (int i = 0; i < materialize_expr_ctxs.size(); ++i) { + Status status = materialize_expr_ctxs[i]->root()->GetCodegendComputeFn(state, + &materialize_expr_fns[i]); + if (!status.ok()) { + stringstream ss; + ss << "Could not codegen CodegenMaterializeExprs: " << status.GetDetail(); + return Status(ss.str()); + } + } + + // Construct function signature (this must exactly match the actual signature since it's + // used in xcompiled IR). With 'pool': + // void MaterializeExprs(Tuple* tuple, TupleRow* row, TupleDescriptor* desc, + // ExprContext** materialize_expr_ctxs, MemPool* pool, + // StringValue** non_null_string_values, int* total_string_lengths) + PointerType* opaque_tuple_type = codegen->GetPtrType(Tuple::LLVM_CLASS_NAME); + PointerType* row_type = codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME); + PointerType* desc_type = codegen->GetPtrType(TupleDescriptor::LLVM_CLASS_NAME); + PointerType* expr_ctxs_type = + codegen->GetPtrType(codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME)); + PointerType* pool_type = codegen->GetPtrType(MemPool::LLVM_CLASS_NAME); + PointerType* string_values_type = + codegen->GetPtrType(codegen->GetPtrType(StringValue::LLVM_CLASS_NAME)); + PointerType* int_ptr_type = codegen->GetPtrType(TYPE_INT); + LlvmCodeGen::FnPrototype prototype(codegen, "MaterializeExprs", codegen->void_type()); + prototype.AddArgument("opaque_tuple", opaque_tuple_type); + prototype.AddArgument("row", row_type); + prototype.AddArgument("desc", desc_type); + prototype.AddArgument("materialize_expr_ctxs", expr_ctxs_type); + prototype.AddArgument("pool", pool_type); + prototype.AddArgument("non_null_string_values", string_values_type); + prototype.AddArgument("total_string_lengths", int_ptr_type); + prototype.AddArgument("num_non_null_string_values", int_ptr_type); + + LlvmCodeGen::LlvmBuilder builder(context); + Value* args[7]; + *fn = prototype.GeneratePrototype(&builder, args); + Value* opaque_tuple_arg = args[0]; + Value* row_arg = args[1]; + Value* expr_ctxs_arg = args[3]; + // 'desc', 'pool', 'non_null_string_values', 'total_string_lengths', and + // 'num_non_null_string_values' are unused + + // Cast the opaque Tuple* argument to the generated struct type + Type* tuple_struct_type = desc.GetLlvmStruct(codegen); + PointerType* tuple_type = codegen->GetPtrType(tuple_struct_type); + Value* tuple = builder.CreateBitCast(opaque_tuple_arg, tuple_type, "tuple"); + + // Memset tuple's null bytes + codegen->CodegenMemset(&builder, tuple, 0, desc.num_null_bytes()); + + // Evaluate the materialize_expr_ctxs and place the results in the tuple. + for (int i = 0; i < desc.slots().size(); ++i) { + SlotDescriptor* slot_desc = desc.slots()[i]; + DCHECK(slot_desc->type().type == TYPE_NULL || + slot_desc->type() == materialize_expr_ctxs[i]->root()->type()); + + // Call materialize_expr_fns[i](materialize_expr_ctxs[i], row) + Value* expr_ctx = codegen->CodegenArrayAt(&builder, expr_ctxs_arg, i, "expr_ctx"); + Value* expr_args[] = { expr_ctx, row_arg }; + CodegenAnyVal src = CodegenAnyVal::CreateCallWrapped(codegen, &builder, + materialize_expr_ctxs[i]->root()->type(), + materialize_expr_fns[i], expr_args, "src"); + + // Write expr result 'src' to slot + src.WriteToSlot(*slot_desc, tuple, pool); + } + builder.CreateRetVoid(); + // TODO: if pool != NULL, OptimizeFunctionWithExprs() is inlining the Allocate() + // call. Investigate if this is a good thing. + *fn = codegen->FinalizeFunction(*fn); + return Status::OK(); +} -template void Tuple::MaterializeExprs<true>(TupleRow* row, const TupleDescriptor& desc, - const vector<ExprContext*>& materialize_expr_ctxs, MemPool* pool, - vector<StringValue*>* non_null_var_values, int* total_var_len); +template void Tuple::MaterializeExprs<false, false>(TupleRow*, const TupleDescriptor&, + ExprContext* const*, MemPool*, StringValue**, int*, int*); +template void Tuple::MaterializeExprs<false, true>(TupleRow*, const TupleDescriptor&, + ExprContext* const*, MemPool*, StringValue**, int*, int*); +template void Tuple::MaterializeExprs<true, false>(TupleRow*, const TupleDescriptor&, + ExprContext* const*, MemPool*, StringValue**, int*, int*); +template void Tuple::MaterializeExprs<true, true>(TupleRow*, const TupleDescriptor&, + ExprContext* const*, MemPool*, StringValue**, int*, int*); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8e8df2f2/be/src/runtime/tuple.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h index 6151cb4..c8a3e07 100644 --- a/be/src/runtime/tuple.h +++ b/be/src/runtime/tuple.h @@ -17,11 +17,16 @@ #define IMPALA_RUNTIME_TUPLE_H #include <cstring> +#include "codegen/impala-ir.h" #include "common/logging.h" #include "gutil/macros.h" #include "runtime/descriptors.h" #include "runtime/mem-pool.h" +namespace llvm { +class Function; +} + namespace impala { struct CollectionValue; @@ -103,19 +108,63 @@ class Tuple { /// regardless of this tuple's offset in 'tuple_data'. void ConvertOffsetsToPointers(const TupleDescriptor& desc, uint8_t* tuple_data); - /// Materialize this by evaluating the expressions in materialize_exprs - /// over the specified 'row'. 'pool' is used to allocate var-length data. - /// (Memory for this tuple itself must already be allocated.) - /// If collect_string_vals is true, the materialized non-NULL string value - /// slots and the total length of the string slots are returned in var_values - /// and total_string. + /// Materialize 'this' by evaluating the expressions in 'materialize_exprs_ctxs' over + /// the specified 'row'. + /// + /// If non-NULL, 'pool' is used to allocate var-length data, otherwise var-length data + /// isn't copied. (Memory for this tuple itself must already be allocated.) 'NULL_POOL' + /// should be true if 'pool' is NULL and false otherwise. The template parameter serves + /// only to differentiate the NULL vs. non-NULL pool cases when we replace the function + /// calls during codegen; the parameter means there are two different function symbols. + /// + /// If 'COLLECT_STRING_VALS' is true, the materialized non-NULL string value slots and + /// the total length of the string slots are returned in 'non_null_string_values' and + /// 'total_string_lengths'. 'non_null_string_values' and 'total_string_lengths' must be + /// non-NULL in this case. 'non_null_string_values' does not need to be empty; its + /// original contents will be overwritten. + /// TODO: this function does not collect other var-len types such as collections. - template <bool collect_string_vals> - void MaterializeExprs( - TupleRow* row, const TupleDescriptor& desc, - const std::vector<ExprContext*>& materialize_expr_ctxs, MemPool* pool, - std::vector<StringValue*>* non_null_string_values = NULL, - int* total_string = NULL); + template <bool COLLECT_STRING_VALS, bool NULL_POOL> + inline void IR_ALWAYS_INLINE MaterializeExprs(TupleRow* row, + const TupleDescriptor& desc, const std::vector<ExprContext*>& materialize_expr_ctxs, + MemPool* pool, std::vector<StringValue*>* non_null_string_values = NULL, + int* total_string_lengths = NULL) { + DCHECK_EQ(NULL_POOL, pool == NULL); + DCHECK_EQ(materialize_expr_ctxs.size(), desc.slots().size()); + StringValue** non_null_string_values_array = NULL; + int num_non_null_string_values = 0; + if (COLLECT_STRING_VALS) { + DCHECK(non_null_string_values != NULL); + DCHECK(total_string_lengths != NULL); + // string::resize() will zero-initialize any new values, so we resize to the largest + // possible size here, then truncate the vector below once we know the actual size + // (which preserves already-written values). + non_null_string_values->resize(desc.string_slots().size()); + non_null_string_values_array = non_null_string_values->data(); + *total_string_lengths = 0; + } + MaterializeExprs<COLLECT_STRING_VALS, NULL_POOL>(row, desc, + materialize_expr_ctxs.data(), pool, non_null_string_values_array, + total_string_lengths, &num_non_null_string_values); + if (COLLECT_STRING_VALS) non_null_string_values->resize(num_non_null_string_values); + } + + /// Symbols (or substrings of the symbols) of MaterializeExprs(). These can be passed to + /// LlvmCodeGen::ReplaceCallSites(). + static const char* MATERIALIZE_EXPRS_SYMBOL; + static const char* MATERIALIZE_EXPRS_NULL_POOL_SYMBOL; + + /// Generates an IR version of MaterializeExprs(), returned in 'fn'. Currently only + /// 'collect_string_vals' = false is implemented. + /// + /// 'pool' may be NULL, in which case no pool-related code is generated. Otherwise + /// 'pool's address is used directly in the IR. Note that this requires generating + /// separate functions for the non-NULL and NULL cases, i.e., the 'pool' argument of the + /// generated function is ignored. There are two different MaterializeExprs symbols to + /// differentiate these cases when we replace the function calls during codegen. + static Status CodegenMaterializeExprs(RuntimeState* state, bool collect_string_vals, + const TupleDescriptor& desc, const vector<ExprContext*>& materialize_expr_ctxs, + MemPool* pool, llvm::Function** fn); /// Turn null indicator bit on. For non-nullable slots, the mask will be 0 and /// this is a no-op (but we don't have to branch to check is slots are nulalble). @@ -183,6 +232,14 @@ class Tuple { /// and referenced collection and string data. void DeepCopyVarlenData(const TupleDescriptor& desc, char** data, int* offset, bool convert_ptrs); + + /// Implementation of MaterializedExprs(). This function is replaced during + /// codegen. 'num_non_null_string_values' must be initialized by the caller. + template <bool COLLECT_STRING_VALS, bool NULL_POOL> + void IR_NO_INLINE MaterializeExprs(TupleRow* row, const TupleDescriptor& desc, + ExprContext* const* materialize_expr_ctxs, MemPool* pool, + StringValue** non_null_string_values, int* total_string_lengths, + int* num_non_null_string_values); }; }
