http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exec/partitioned-aggregation-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/partitioned-aggregation-node.cc b/be/src/exec/partitioned-aggregation-node.cc index 17fbb6c..56db26f 100644 --- a/be/src/exec/partitioned-aggregation-node.cc +++ b/be/src/exec/partitioned-aggregation-node.cc @@ -17,21 +17,20 @@ #include "exec/partitioned-aggregation-node.h" -#include <algorithm> #include <math.h> +#include <algorithm> #include <set> #include <sstream> -#include <gutil/strings/substitute.h> -#include <thrift/protocol/TDebugProtocol.h> #include "codegen/codegen-anyval.h" #include "codegen/llvm-codegen.h" #include "exec/hash-table.inline.h" #include "exprs/agg-fn-evaluator.h" #include "exprs/anyval-util.h" -#include "exprs/expr.h" #include "exprs/expr-context.h" +#include "exprs/expr.h" #include "exprs/slot-ref.h" +#include "gutil/strings/substitute.h" #include "runtime/buffered-tuple-stream.inline.h" #include "runtime/descriptors.h" #include "runtime/mem-pool.h" @@ -40,8 +39,8 @@ #include "runtime/row-batch.h" #include "runtime/runtime-state.h" #include "runtime/string-value.inline.h" -#include "runtime/tuple.h" #include "runtime/tuple-row.h" +#include "runtime/tuple.h" #include "udf/udf-internal.h" #include "util/debug-util.h" #include "util/runtime-profile-counters.h" @@ -150,21 +149,20 @@ Status PartitionedAggregationNode::Init(const TPlanNode& tnode, RuntimeState* st Expr::CreateExprTrees(pool_, tnode.agg_node.grouping_exprs, &grouping_expr_ctxs_)); for (int i = 0; i < tnode.agg_node.aggregate_functions.size(); ++i) { AggFnEvaluator* evaluator; - RETURN_IF_ERROR(AggFnEvaluator::Create( - pool_, tnode.agg_node.aggregate_functions[i], &evaluator)); + RETURN_IF_ERROR( + AggFnEvaluator::Create(pool_, tnode.agg_node.aggregate_functions[i], &evaluator)); aggregate_evaluators_.push_back(evaluator); - ExprContext* agg_expr_ctx; - if (evaluator->input_expr_ctxs().size() == 1) { - agg_expr_ctx = evaluator->input_expr_ctxs()[0]; + ExprContext* const* agg_expr_ctxs; + if (evaluator->input_expr_ctxs().size() > 0) { + agg_expr_ctxs = evaluator->input_expr_ctxs().data(); } else { - // CodegenUpdateSlot() can only support aggregate operator with only one ExprContext - // so it doesn't support operator such as group_concat. There are also aggregate - // operators with no ExprContext (e.g. count(*)). In cases above, 'agg_expr_ctxs_' - // will contain NULL for that entry. + // Some aggregate functions have no input expressions and therefore no ExprContext + // (e.g. count(*)). In those cases, 'agg_expr_ctxs_' will contain NULL for that + // entry. DCHECK(evaluator->agg_op() == AggFnEvaluator::OTHER || evaluator->is_count_star()); - agg_expr_ctx = NULL; + agg_expr_ctxs = NULL; } - agg_expr_ctxs_.push_back(agg_expr_ctx); + agg_expr_ctxs_.push_back(agg_expr_ctxs); } return Status::OK(); } @@ -1059,32 +1057,31 @@ void PartitionedAggregationNode::InitAggSlots( intermediate_tuple_desc_->slots().begin() + grouping_expr_ctxs_.size(); for (int i = 0; i < aggregate_evaluators_.size(); ++i, ++slot_desc) { AggFnEvaluator* evaluator = aggregate_evaluators_[i]; + // To minimize branching on the UpdateTuple path, initialize the result value so that + // the Add() UDA function can ignore the NULL bit of its destination value. E.g. for + // SUM(), if we initialize the destination value to 0 (with the NULL bit set), we can + // just start adding to the destination value (rather than repeatedly checking the + // destination NULL bit. The codegen'd version of UpdateSlot() exploits this to + // eliminate a branch per value. + // + // For boolean and numeric types, the default values are false/0, so the nullable + // aggregate functions SUM() and AVG() produce the correct result. For MIN()/MAX(), + // initialize the value to max/min possible value for the same effect. evaluator->Init(agg_fn_ctxs[i], intermediate_tuple); - // Codegen specific path for min/max. - // To minimize branching on the UpdateTuple path, initialize the result value - // so that UpdateTuple doesn't have to check if the aggregation - // dst slot is null. - // TODO: remove when we don't use the irbuilder for codegen here. This optimization - // will no longer be necessary when all aggregates are implemented with the UDA - // interface. - if ((*slot_desc)->type().type != TYPE_STRING && - (*slot_desc)->type().type != TYPE_VARCHAR && - (*slot_desc)->type().type != TYPE_TIMESTAMP && - (*slot_desc)->type().type != TYPE_CHAR) { + + AggFnEvaluator::AggregationOp agg_op = evaluator->agg_op(); + if ((agg_op == AggFnEvaluator::MIN || agg_op == AggFnEvaluator::MAX) + && !evaluator->intermediate_type().IsStringType() + && !evaluator->intermediate_type().IsTimestampType()) { ExprValue default_value; void* default_value_ptr = NULL; - switch (evaluator->agg_op()) { - case AggFnEvaluator::MIN: - default_value_ptr = default_value.SetToMax((*slot_desc)->type()); - RawValue::Write(default_value_ptr, intermediate_tuple, *slot_desc, NULL); - break; - case AggFnEvaluator::MAX: - default_value_ptr = default_value.SetToMin((*slot_desc)->type()); - RawValue::Write(default_value_ptr, intermediate_tuple, *slot_desc, NULL); - break; - default: - break; + if (evaluator->agg_op() == AggFnEvaluator::MIN) { + default_value_ptr = default_value.SetToMax((*slot_desc)->type()); + } else { + DCHECK_EQ(evaluator->agg_op(), AggFnEvaluator::MAX); + default_value_ptr = default_value.SetToMin((*slot_desc)->type()); } + RawValue::Write(default_value_ptr, intermediate_tuple, *slot_desc, NULL); } } } @@ -1450,111 +1447,123 @@ Status PartitionedAggregationNode::QueryMaintenance(RuntimeState* state) { // void UpdateSlot(FunctionContext* agg_fn_ctx, ExprContext* agg_expr_ctx, // AggTuple* agg_tuple, char** row) // -// The IR for sum(double_col) is: +// The IR for sum(double_col), which is constructed directly with the IRBuilder, is: // // define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"* %agg_expr_ctx, -// { i8, [7 x i8], double }* %agg_tuple, -// %"class.impala::TupleRow"* %row) #34 { -// +// %"class.impala::ExprContext"** %agg_expr_ctxs, +// { i8, [7 x i8], double }* %agg_tuple, %"class.impala::TupleRow"* %row) #34 { // entry: -// %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %agg_expr_ctx, -// %"class.impala::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry +// %expr_ctx_ptr = getelementptr %"class.impala::ExprContext"*, +// %"class.impala::ExprContext"** %agg_expr_ctxs, i32 0 +// %expr_ctx = load %"class.impala::ExprContext"*, +// %"class.impala::ExprContext"** %expr_ctx_ptr +// %input0 = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx, +// %"class.impala::TupleRow"* %row) // %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], double }, // { i8, [7 x i8], double }* %agg_tuple, i32 0, i32 2 -// call void @SetNotNull({ i8, [7 x i8], double }* %agg_tuple) // %dst_val = load double, double* %dst_slot_ptr -// %val = extractvalue { i8, double } %src, 1 +// %0 = extractvalue { i8, double } %input0, 0 +// %is_null = trunc i8 %0 to i1 +// br i1 %is_null, label %ret, label %not_null +// +// ret: ; preds = %not_null, %entry +// ret void +// +// not_null: ; preds = %entry +// %val = extractvalue { i8, double } %input0, 1 // %1 = fadd double %dst_val, %val +// %2 = bitcast { i8, [7 x i8], double }* %agg_tuple to i8* +// %null_byte_ptr = getelementptr i8, i8* %2, i32 0 +// %null_byte = load i8, i8* %null_byte_ptr +// %null_bit_cleared = and i8 %null_byte, -2 +// store i8 %null_bit_cleared, i8* %null_byte_ptr // store double %1, double* %dst_slot_ptr // br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void // } // -// The IR for ndv(double_col) is: +// The IR for min(timestamp_col), which uses the UDA interface, is: // // define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"* %agg_expr_ctx, -// { i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple, -// %"class.impala::TupleRow"* %row) #34 { +// %"class.impala::ExprContext"** %agg_expr_ctxs, +// { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple, +// %"class.impala::TupleRow"* %row) #34 { // entry: -// %dst_lowered_ptr = alloca { i64, i8* } -// %src_lowered_ptr = alloca { i8, double } -// %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %agg_expr_ctx, -// %"class.impala::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], %"struct.impala::StringValue" }, -// { i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple, i32 0, i32 2 -// call void @SetNotNull({ i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple) -// %dst_val = -// load %"struct.impala::StringValue", %"struct.impala::StringValue"* %dst_slot_ptr -// store { i8, double } %src, { i8, double }* %src_lowered_ptr -// %src_unlowered_ptr = -// bitcast { i8, double }* %src_lowered_ptr to %"struct.impala_udf::DoubleVal"* -// %ptr = extractvalue %"struct.impala::StringValue" %dst_val, 0 -// %dst = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1 -// %len = extractvalue %"struct.impala::StringValue" %dst_val, 1 -// %1 = extractvalue { i64, i8* } %dst, 0 -// %2 = zext i32 %len to i64 -// %3 = shl i64 %2, 32 -// %4 = and i64 %1, 4294967295 -// %5 = or i64 %4, %3 -// %dst1 = insertvalue { i64, i8* } %dst, i64 %5, 0 -// store { i64, i8* } %dst1, { i64, i8* }* %dst_lowered_ptr -// %dst_unlowered_ptr = -// bitcast { i64, i8* }* %dst_lowered_ptr to %"struct.impala_udf::StringVal"* -// call void @HllUpdate(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"struct.impala_udf::DoubleVal"* %src_unlowered_ptr, -// %"struct.impala_udf::StringVal"* %dst_unlowered_ptr) -// %anyval_result = load { i64, i8* }, { i64, i8* }* %dst_lowered_ptr -// %6 = extractvalue { i64, i8* } %anyval_result, 0 -// %7 = ashr i64 %6, 32 -// %8 = trunc i64 %7 to i32 -// %9 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %8, 1 -// %10 = extractvalue { i64, i8* } %anyval_result, 1 -// %11 = insertvalue %"struct.impala::StringValue" %9, i8* %10, 0 -// store %"struct.impala::StringValue" %11, %"struct.impala::StringValue"* %dst_slot_ptr +// %dst_lowered_ptr = alloca { i64, i64 } +// %input_lowered_ptr = alloca { i64, i64 } +// %expr_ctx_ptr = getelementptr %"class.impala::ExprContext"*, +// %"class.impala::ExprContext"** %agg_expr_ctxs, i32 0 +// %expr_ctx = load %"class.impala::ExprContext"*, +// %"class.impala::ExprContext"** %expr_ctx_ptr +// %input0 = call { i64, i64 } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx, +// %"class.impala::TupleRow"* %row) +// %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], +// %"class.impala::TimestampValue" }, { i8, [7 x i8], +// %"class.impala::TimestampValue" }* %agg_tuple, i32 0, i32 2 +// %dst_val = load %"class.impala::TimestampValue", +// %"class.impala::TimestampValue"* %dst_slot_ptr +// %0 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* +// %null_byte_ptr = getelementptr i8, i8* %0, i32 0 +// %null_byte = load i8, i8* %null_byte_ptr +// %null_mask = and i8 %null_byte, 1 +// %is_null = icmp ne i8 %null_mask, 0 +// %is_null_ext = zext i1 %is_null to i64 +// %1 = or i64 0, %is_null_ext +// %dst = insertvalue { i64, i64 } zeroinitializer, i64 %1, 0 +// %time_of_day = extractvalue %"class.impala::TimestampValue" %dst_val, 0, 0, 0, 0 +// %dst1 = insertvalue { i64, i64 } %dst, i64 %time_of_day, 1 +// %date = extractvalue %"class.impala::TimestampValue" %dst_val, 1, 0, 0 +// %2 = extractvalue { i64, i64 } %dst1, 0 +// %3 = zext i32 %date to i64 +// %4 = shl i64 %3, 32 +// %5 = and i64 %2, 4294967295 +// %6 = or i64 %5, %4 +// %dst2 = insertvalue { i64, i64 } %dst1, i64 %6, 0 +// store { i64, i64 } %input0, { i64, i64 }* %input_lowered_ptr +// %input_unlowered_ptr = bitcast { i64, i64 }* %input_lowered_ptr +// to %"struct.impala_udf::TimestampVal"* +// store { i64, i64 } %dst2, { i64, i64 }* %dst_lowered_ptr +// %dst_unlowered_ptr = bitcast { i64, i64 }* %dst_lowered_ptr +// to %"struct.impala_udf::TimestampVal"* +// call void +// @_ZN6impala18AggregateFunctions3MinIN10impala_udf12TimestampValEEEvPNS2_15FunctionContextERKT_PS6_.2( +// %"class.impala_udf::FunctionContext"* %agg_fn_ctx, +// %"struct.impala_udf::TimestampVal"* %input_unlowered_ptr, +// %"struct.impala_udf::TimestampVal"* %dst_unlowered_ptr) +// %anyval_result = load { i64, i64 }, { i64, i64 }* %dst_lowered_ptr +// %7 = extractvalue { i64, i64 } %anyval_result, 1 +// %8 = insertvalue %"class.impala::TimestampValue" zeroinitializer, i64 %7, 0, 0, 0, 0 +// %9 = extractvalue { i64, i64 } %anyval_result, 0 +// %10 = ashr i64 %9, 32 +// %11 = trunc i64 %10 to i32 +// %12 = insertvalue %"class.impala::TimestampValue" %8, i32 %11, 1, 0, 0 +// %13 = extractvalue { i64, i64 } %anyval_result, 0 +// %result_is_null = trunc i64 %13 to i1 +// %14 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* +// %null_byte_ptr3 = getelementptr i8, i8* %14, i32 0 +// %null_byte4 = load i8, i8* %null_byte_ptr3 +// %null_bit_cleared = and i8 %null_byte4, -2 +// %15 = sext i1 %result_is_null to i8 +// %null_bit = and i8 %15, 1 +// %null_bit_set = or i8 %null_bit_cleared, %null_bit +// store i8 %null_bit_set, i8* %null_byte_ptr3 +// store %"class.impala::TimestampValue" %12, +// %"class.impala::TimestampValue"* %dst_slot_ptr // br label %ret // -// ret: ; preds = %src_not_null, %entry +// ret: ; preds = %entry // ret void // } +// Status PartitionedAggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc, Function** fn) { - // TODO: Fix this DCHECK and Init() once CodegenUpdateSlot() can handle AggFnEvaluator - // with multiple input expressions (e.g. group_concat). - DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1); - ExprContext* agg_expr_ctx = evaluator->input_expr_ctxs()[0]; - Expr* agg_expr = agg_expr_ctx->root(); - - // TODO: implement timestamp - if (agg_expr->type().type == TYPE_TIMESTAMP && - evaluator->agg_op() != AggFnEvaluator::AVG) { - return Status("PartitionedAggregationNode::CodegenUpdateSlot(): timestamp input type " - "NYI"); - } - - Function* agg_expr_fn; - RETURN_IF_ERROR(agg_expr->GetCodegendComputeFn(codegen, &agg_expr_fn)); - PointerType* fn_ctx_type = codegen->GetPtrType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME); - PointerType* expr_ctx_type = codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME); + PointerType* expr_ctxs_type = + codegen->GetPtrPtrType(codegen->GetType(ExprContext::LLVM_CLASS_NAME)); StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen); if (tuple_struct == NULL) { return Status("PartitionedAggregationNode::CodegenUpdateSlot(): failed to generate " - "intermediate tuple desc"); + "intermediate tuple desc"); } PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_struct); PointerType* tuple_row_ptr_type = codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME); @@ -1562,129 +1571,129 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen, // Create UpdateSlot prototype LlvmCodeGen::FnPrototype prototype(codegen, "UpdateSlot", codegen->void_type()); prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_fn_ctx", fn_ctx_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_expr_ctx", expr_ctx_type)); + prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_expr_ctxs", expr_ctxs_type)); prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type)); prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - LlvmCodeGen::LlvmBuilder builder(codegen->context()); + LlvmBuilder builder(codegen->context()); Value* args[4]; *fn = prototype.GeneratePrototype(&builder, &args[0]); Value* agg_fn_ctx_arg = args[0]; - Value* agg_expr_ctx_arg = args[1]; + Value* agg_expr_ctxs_arg = args[1]; Value* agg_tuple_arg = args[2]; Value* row_arg = args[3]; - BasicBlock* src_not_null_block = - BasicBlock::Create(codegen->context(), "src_not_null", *fn); - BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", *fn); - - // Call expr function to get src slot value - Value* agg_expr_fn_args[] = { agg_expr_ctx_arg, row_arg }; - CodegenAnyVal src = CodegenAnyVal::CreateCallWrapped( - codegen, &builder, agg_expr->type(), agg_expr_fn, agg_expr_fn_args, "src"); + DCHECK_GE(evaluator->input_expr_ctxs().size(), 1); + vector<CodegenAnyVal> input_vals; + for (int i = 0; i < evaluator->input_expr_ctxs().size(); ++i) { + ExprContext* agg_expr_ctx = evaluator->input_expr_ctxs()[i]; + Expr* agg_expr = agg_expr_ctx->root(); + Function* agg_expr_fn; + RETURN_IF_ERROR(agg_expr->GetCodegendComputeFn(codegen, &agg_expr_fn)); + DCHECK(agg_expr_fn != NULL); + + // Call expr function with the matching expr context to get src slot value. + Value* expr_ctx_ptr = builder.CreateInBoundsGEP( + agg_expr_ctxs_arg, codegen->GetIntConstant(TYPE_INT, i), "expr_ctx_ptr"); + Value* expr_ctx = builder.CreateLoad(expr_ctx_ptr, "expr_ctx"); + string input_name = Substitute("input$0", i); + input_vals.push_back( + CodegenAnyVal::CreateCallWrapped(codegen, &builder, agg_expr->type(), agg_expr_fn, + ArrayRef<Value*>({expr_ctx, row_arg}), input_name.c_str())); + } + + AggFnEvaluator::AggregationOp agg_op = evaluator->agg_op(); + const ColumnType& dst_type = evaluator->intermediate_type(); + bool dst_is_int_or_float_or_bool = dst_type.IsIntegerType() + || dst_type.IsFloatingPointType() || dst_type.IsBooleanType(); + bool dst_is_numeric_or_bool = dst_is_int_or_float_or_bool || dst_type.IsDecimalType(); - Value* src_is_null = src.GetIsNull(); - builder.CreateCondBr(src_is_null, ret_block, src_not_null_block); + BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", *fn); - // Src slot is not null, update dst_slot - builder.SetInsertPoint(src_not_null_block); - Value* dst_ptr = builder.CreateStructGEP(NULL, agg_tuple_arg, slot_desc->llvm_field_idx(), - "dst_slot_ptr"); + // Emit the code to compute 'result' and set the NULL indicator if needed. First check + // for special cases where we can emit a very simple instruction sequence, then fall + // back to the general-purpose approach of calling the cross-compiled builtin UDA. + CodegenAnyVal& src = input_vals[0]; + // 'dst_slot_ptr' points to the slot in the aggregate tuple to update. + Value* dst_slot_ptr = builder.CreateStructGEP( + NULL, agg_tuple_arg, slot_desc->llvm_field_idx(), "dst_slot_ptr"); Value* result = NULL; - - if (slot_desc->is_nullable()) { - // Dst is NULL, just update dst slot to src slot and clear null bit - Function* clear_null_fn = slot_desc->GetUpdateNullFn(codegen, false); - builder.CreateCall(clear_null_fn, ArrayRef<Value*>({agg_tuple_arg})); - } - - // Update the slot - Value* dst_value = builder.CreateLoad(dst_ptr, "dst_val"); - switch (evaluator->agg_op()) { - case AggFnEvaluator::COUNT: - if (evaluator->is_merge()) { - result = builder.CreateAdd(dst_value, src.GetVal(), "count_sum"); - } else { - result = builder.CreateAdd(dst_value, - codegen->GetIntConstant(TYPE_BIGINT, 1), "count_inc"); - } - break; - case AggFnEvaluator::MIN: { - Function* min_fn = codegen->CodegenMinMax(slot_desc->type(), true); - Value* min_args[] = { dst_value, src.GetVal() }; - result = builder.CreateCall(min_fn, min_args, "min_value"); - break; + Value* dst_value = builder.CreateLoad(dst_slot_ptr, "dst_val"); + if (agg_op == AggFnEvaluator::COUNT) { + src.CodegenBranchIfNull(&builder, ret_block); + if (evaluator->is_merge()) { + result = builder.CreateAdd(dst_value, src.GetVal(), "count_sum"); + } else { + result = builder.CreateAdd( + dst_value, codegen->GetIntConstant(TYPE_BIGINT, 1), "count_inc"); } - case AggFnEvaluator::MAX: { - Function* max_fn = codegen->CodegenMinMax(slot_desc->type(), false); - Value* max_args[] = { dst_value, src.GetVal() }; - result = builder.CreateCall(max_fn, max_args, "max_value"); - break; + DCHECK(!slot_desc->is_nullable()); + } else if ((agg_op == AggFnEvaluator::MIN || agg_op == AggFnEvaluator::MAX) + && dst_is_numeric_or_bool) { + bool is_min = agg_op == AggFnEvaluator::MIN; + src.CodegenBranchIfNull(&builder, ret_block); + Function* min_max_fn = codegen->CodegenMinMax(slot_desc->type(), is_min); + Value* min_max_args[] = {dst_value, src.GetVal()}; + result = + builder.CreateCall(min_max_fn, min_max_args, is_min ? "min_value" : "max_value"); + // Dst may have been NULL, make sure to unset the NULL bit. + DCHECK(slot_desc->is_nullable()); + slot_desc->CodegenSetNullIndicator( + codegen, &builder, agg_tuple_arg, codegen->false_value()); + } else if (agg_op == AggFnEvaluator::SUM && dst_is_int_or_float_or_bool) { + src.CodegenBranchIfNull(&builder, ret_block); + if (dst_type.IsFloatingPointType()) { + result = builder.CreateFAdd(dst_value, src.GetVal()); + } else { + result = builder.CreateAdd(dst_value, src.GetVal()); } - case AggFnEvaluator::SUM: - if (slot_desc->type().type != TYPE_DECIMAL) { - if (slot_desc->type().type == TYPE_FLOAT || - slot_desc->type().type == TYPE_DOUBLE) { - result = builder.CreateFAdd(dst_value, src.GetVal()); - } else { - result = builder.CreateAdd(dst_value, src.GetVal()); - } - break; + // Dst may have been NULL, make sure to unset the NULL bit. + DCHECK(slot_desc->is_nullable()); + slot_desc->CodegenSetNullIndicator( + codegen, &builder, agg_tuple_arg, codegen->false_value()); + } else { + // The remaining cases are implemented using the UDA interface. + // Create intermediate argument 'dst' from 'dst_value' + CodegenAnyVal dst = CodegenAnyVal::GetNonNullVal(codegen, &builder, dst_type, "dst"); + + // For a subset of builtins we generate a different code sequence that exploits two + // properties of the builtins. First, NULL input values can be skipped. Second, the + // value of the slot was initialized in the right way in InitAggSlots() (e.g. 0 for + // SUM) that we get the right result if UpdateSlot() pretends that the NULL bit of + // 'dst' is unset. Empirically this optimisation makes TPC-H Q1 5-10% faster. + bool special_null_handling = !evaluator->intermediate_type().IsStringType() + && !evaluator->intermediate_type().IsTimestampType() + && (agg_op == AggFnEvaluator::MIN || agg_op == AggFnEvaluator::MAX + || agg_op == AggFnEvaluator::SUM || agg_op == AggFnEvaluator::AVG + || agg_op == AggFnEvaluator::NDV); + if (slot_desc->is_nullable()) { + if (special_null_handling) { + src.CodegenBranchIfNull(&builder, ret_block); + slot_desc->CodegenSetNullIndicator( + codegen, &builder, agg_tuple_arg, codegen->false_value()); + } else { + dst.SetIsNull(slot_desc->CodegenIsNull(codegen, &builder, agg_tuple_arg)); } - DCHECK_EQ(slot_desc->type().type, TYPE_DECIMAL); - // Fall through to xcompiled case - case AggFnEvaluator::AVG: - case AggFnEvaluator::NDV: { - // Get xcompiled update/merge function from IR module - const string& symbol = evaluator->is_merge() ? - evaluator->merge_symbol() : evaluator->update_symbol(); - const ColumnType& dst_type = evaluator->intermediate_type(); - Function* ir_fn = codegen->GetFunction(symbol); - DCHECK(ir_fn != NULL); - - // Clone and replace constants. - ir_fn = codegen->CloneFunction(ir_fn); - vector<FunctionContext::TypeDesc> arg_types; - arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc(agg_expr->type())); - Expr::InlineConstants(AnyValUtil::ColumnTypeToTypeDesc(dst_type), arg_types, - codegen, ir_fn); - - // Create pointer to src to pass to ir_fn. We must use the unlowered type. - Value* src_lowered_ptr = codegen->CreateEntryBlockAlloca( - *fn, LlvmCodeGen::NamedVariable("src_lowered_ptr", src.value()->getType())); - builder.CreateStore(src.value(), src_lowered_ptr); - Type* unlowered_ptr_type = - CodegenAnyVal::GetUnloweredPtrType(codegen, agg_expr->type()); - Value* src_unlowered_ptr = - builder.CreateBitCast(src_lowered_ptr, unlowered_ptr_type, "src_unlowered_ptr"); - - // Create intermediate argument 'dst' from 'dst_value' - CodegenAnyVal dst = CodegenAnyVal::GetNonNullVal( - codegen, &builder, dst_type, "dst"); - dst.SetFromRawValue(dst_value); - // Create pointer to dst to pass to ir_fn. We must use the unlowered type. - Value* dst_lowered_ptr = codegen->CreateEntryBlockAlloca( - *fn, LlvmCodeGen::NamedVariable("dst_lowered_ptr", dst.value()->getType())); - builder.CreateStore(dst.value(), dst_lowered_ptr); - unlowered_ptr_type = CodegenAnyVal::GetUnloweredPtrType(codegen, dst_type); - Value* dst_unlowered_ptr = - builder.CreateBitCast(dst_lowered_ptr, unlowered_ptr_type, "dst_unlowered_ptr"); - - // Call 'ir_fn' - builder.CreateCall(ir_fn, - ArrayRef<Value*>({agg_fn_ctx_arg, src_unlowered_ptr, dst_unlowered_ptr})); - - // Convert StringVal intermediate 'dst_arg' back to StringValue - Value* anyval_result = builder.CreateLoad(dst_lowered_ptr, "anyval_result"); - result = CodegenAnyVal(codegen, &builder, dst_type, anyval_result).ToNativeValue(); - break; } - default: - DCHECK(false) << "bad aggregate operator: " << evaluator->agg_op(); + dst.SetFromRawValue(dst_value); + + // Call the UDA to update/merge 'src' into 'dst', with the result stored in + // 'updated_dst_val'. + CodegenAnyVal updated_dst_val; + RETURN_IF_ERROR(CodegenCallUda( + codegen, &builder, evaluator, agg_fn_ctx_arg, input_vals, dst, &updated_dst_val)); + result = updated_dst_val.ToNativeValue(); + + if (slot_desc->is_nullable() && !special_null_handling) { + // Set NULL bit in the slot based on the return value. + Value* result_is_null = updated_dst_val.GetIsNull("result_is_null"); + slot_desc->CodegenSetNullIndicator( + codegen, &builder, agg_tuple_arg, result_is_null); + } } // TODO: Store to register in the loop and store once to memory at the end of the loop. - builder.CreateStore(result, dst_ptr); + builder.CreateStore(result, dst_slot_ptr); builder.CreateBr(ret_block); builder.SetInsertPoint(ret_block); @@ -1698,6 +1707,58 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen, return Status::OK(); } +Status PartitionedAggregationNode::CodegenCallUda(LlvmCodeGen* codegen, + LlvmBuilder* builder, AggFnEvaluator* evaluator, Value* agg_fn_ctx, + const vector<CodegenAnyVal>& input_vals, const CodegenAnyVal& dst, + CodegenAnyVal* updated_dst_val) { + DCHECK_EQ(evaluator->input_expr_ctxs().size(), input_vals.size()); + const string& symbol = + evaluator->is_merge() ? evaluator->merge_symbol() : evaluator->update_symbol(); + const ColumnType& dst_type = evaluator->intermediate_type(); + + // TODO: to support actual UDAs, not just builtin functions using the UDA interface, + // we need to load the function at this point. + Function* uda_fn = codegen->GetFunction(symbol, true); + DCHECK(uda_fn != NULL); + + vector<FunctionContext::TypeDesc> arg_types; + for (int i = 0; i < evaluator->input_expr_ctxs().size(); ++i) { + arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc( + evaluator->input_expr_ctxs()[i]->root()->type())); + } + Expr::InlineConstants( + AnyValUtil::ColumnTypeToTypeDesc(dst_type), arg_types, codegen, uda_fn); + + // Set up arguments for call to UDA, which are the FunctionContext*, followed by + // pointers to all input values, followed by a pointer to the destination value. + vector<Value*> uda_fn_args; + uda_fn_args.push_back(agg_fn_ctx); + + // Create pointers to input args to pass to uda_fn. We must use the unlowered type, + // e.g. IntVal, because the UDA interface expects the values to be passed as const + // references to the classes. + for (int i = 0; i < evaluator->input_expr_ctxs().size(); ++i) { + uda_fn_args.push_back(input_vals[i].GetUnloweredPtr("input_unlowered_ptr")); + } + + // Create pointer to dst to pass to uda_fn. We must use the unlowered type for the + // same reason as above. + Value* dst_lowered_ptr = dst.GetLoweredPtr("dst_lowered_ptr"); + Type* dst_unlowered_ptr_type = CodegenAnyVal::GetUnloweredPtrType(codegen, dst_type); + Value* dst_unlowered_ptr = builder->CreateBitCast( + dst_lowered_ptr, dst_unlowered_ptr_type, "dst_unlowered_ptr"); + uda_fn_args.push_back(dst_unlowered_ptr); + + // Call 'uda_fn' + builder->CreateCall(uda_fn, uda_fn_args); + + // Convert intermediate 'dst_arg' back to the native type. + Value* anyval_result = builder->CreateLoad(dst_lowered_ptr, "anyval_result"); + + *updated_dst_val = CodegenAnyVal(codegen, builder, dst_type, anyval_result); + return Status::OK(); +} + // IR codegen for the UpdateTuple loop. This loop is query specific and based on the // aggregate functions. The function signature must match the non- codegen'd UpdateTuple // exactly. @@ -1706,78 +1767,60 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen, // // ; Function Attrs: alwaysinline // define void @UpdateTuple(%"class.impala::PartitionedAggregationNode"* %this_ptr, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, -// %"class.impala::Tuple"* %tuple, -// %"class.impala::TupleRow"* %row, -// i1 %is_merge) #34 { +// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, %"class.impala::Tuple"* +// %tuple, +// %"class.impala::TupleRow"* %row, i1 %is_merge) #34 { // entry: // %tuple1 = -// bitcast %"class.impala::Tuple"* %tuple to { i8, [7 x i8], i64, i64, double }* +// bitcast %"class.impala::Tuple"* %tuple to { i8, [7 x i8], i64, i64, double }* // %src_slot = getelementptr inbounds { i8, [7 x i8], i64, i64, double }, -// { i8, [7 x i8], i64, i64, double }* %tuple1, i32 0, i32 2 +// { i8, [7 x i8], i64, i64, double }* %tuple1, i32 0, i32 2 // %count_star_val = load i64, i64* %src_slot // %count_star_inc = add i64 %count_star_val, 1 // store i64 %count_star_inc, i64* %src_slot // %0 = getelementptr %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 1 +// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 1 // %agg_fn_ctx = load %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %0 -// %1 = call %"class.impala::ExprContext"* -// @_ZNK6impala26PartitionedAggregationNode17GetAggExprContextEi( -// %"class.impala::PartitionedAggregationNode"* %this_ptr, i32 1) +// %"class.impala_udf::FunctionContext"** %0 +// %1 = call %"class.impala::ExprContext"** +// @_ZNK6impala26PartitionedAggregationNode18GetAggExprContextsEi( +// %"class.impala::PartitionedAggregationNode"* %this_ptr, i32 1) // call void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"* %1, -// { i8, [7 x i8], i64, i64, double }* %tuple1, -// %"class.impala::TupleRow"* %row) +// %"class.impala::ExprContext"** %1, { i8, [7 x i8], i64, i64, double }* %tuple1, +// %"class.impala::TupleRow"* %row) // %2 = getelementptr %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 2 +// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 2 // %agg_fn_ctx2 = load %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %2 -// %3 = call %"class.impala::ExprContext"* -// @_ZNK6impala26PartitionedAggregationNode17GetAggExprContextEi( -// %"class.impala::PartitionedAggregationNode"* %this_ptr, i32 2) -// call void @UpdateSlot.3(%"class.impala_udf::FunctionContext"* %agg_fn_ctx2, -// %"class.impala::ExprContext"* %3, -// { i8, [7 x i8], i64, i64, double }* %tuple1, -// %"class.impala::TupleRow"* %row) +// %"class.impala_udf::FunctionContext"** %2 +// %3 = call %"class.impala::ExprContext"** +// @_ZNK6impala26PartitionedAggregationNode18GetAggExprContextsEi( +// %"class.impala::PartitionedAggregationNode"* %this_ptr, i32 2) +// call void @UpdateSlot.4(%"class.impala_udf::FunctionContext"* %agg_fn_ctx2, +// %"class.impala::ExprContext"** %3, { i8, [7 x i8], i64, i64, double }* %tuple1, +// %"class.impala::TupleRow"* %row) // ret void // } -Status PartitionedAggregationNode::CodegenUpdateTuple(LlvmCodeGen* codegen, - Function** fn) { +Status PartitionedAggregationNode::CodegenUpdateTuple( + LlvmCodeGen* codegen, Function** fn) { SCOPED_TIMER(codegen->codegen_timer()); - int j = grouping_expr_ctxs_.size(); - for (int i = 0; i < aggregate_evaluators_.size(); ++i, ++j) { - SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[j]; - AggFnEvaluator* evaluator = aggregate_evaluators_[i]; - - // Don't codegen things that aren't builtins (for now) - if (!evaluator->is_builtin()) { - return Status("PartitionedAggregationNode::CodegenUpdateTuple(): UDA codegen NYI"); - } - - bool supported = true; - AggFnEvaluator::AggregationOp op = evaluator->agg_op(); - PrimitiveType type = slot_desc->type().type; - // Char and timestamp intermediates aren't supported - if (type == TYPE_TIMESTAMP || type == TYPE_CHAR) supported = false; - // Only AVG and NDV support string intermediates - if ((type == TYPE_STRING || type == TYPE_VARCHAR) && - !(op == AggFnEvaluator::AVG || op == AggFnEvaluator::NDV)) { - supported = false; - } - if (!supported) { - stringstream ss; - ss << "Could not codegen PartitionedAggregationNode::UpdateTuple because " - << "intermediate type " << slot_desc->type() << " is not yet supported for " - << "aggregate function \"" << evaluator->fn_name() << "()\""; - return Status(ss.str()); + for (const SlotDescriptor* slot_desc : intermediate_tuple_desc_->slots()) { + if (slot_desc->type().type == TYPE_CHAR) { + return Status("PartitionedAggregationNode::CodegenUpdateTuple(): cannot codegen" + "CHAR in aggregations"); } } if (intermediate_tuple_desc_->GetLlvmStruct(codegen) == NULL) { return Status("PartitionedAggregationNode::CodegenUpdateTuple(): failed to generate " - "intermediate tuple desc"); + "intermediate tuple desc"); + } + + for (AggFnEvaluator* evaluator : aggregate_evaluators_) { + // Don't codegen things that aren't builtins (for now) + if (!evaluator->is_builtin()) { + return Status("PartitionedAggregationNode::CodegenUpdateTuple(): UDA codegen NYI"); + } } // Get the types to match the UpdateTuple signature @@ -1800,7 +1843,7 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(LlvmCodeGen* codegen, prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); prototype.AddArgument(LlvmCodeGen::NamedVariable("is_merge", codegen->boolean_type())); - LlvmCodeGen::LlvmBuilder builder(codegen->context()); + LlvmBuilder builder(codegen->context()); Value* args[5]; *fn = prototype.GeneratePrototype(&builder, &args[0]); Value* this_arg = args[0]; @@ -1812,13 +1855,13 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(LlvmCodeGen* codegen, // TODO: get rid of this by using right type in function signature tuple_arg = builder.CreateBitCast(tuple_arg, tuple_ptr, "tuple"); - Function* get_expr_ctx_fn = - codegen->GetFunction(IRFunction::PART_AGG_NODE_GET_EXPR_CTX, false); - DCHECK(get_expr_ctx_fn != NULL); + Function* get_expr_ctxs_fn = + codegen->GetFunction(IRFunction::PART_AGG_NODE_GET_EXPR_CTXS, false); + DCHECK(get_expr_ctxs_fn != NULL); // Loop over each expr and generate the IR for that slot. If the expr is not // count(*), generate a helper IR function to update the slot and call that. - j = grouping_expr_ctxs_.size(); + int j = grouping_expr_ctxs_.size(); for (int i = 0; i < aggregate_evaluators_.size(); ++i, ++j) { SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[j]; AggFnEvaluator* evaluator = aggregate_evaluators_[i]; @@ -1838,9 +1881,9 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(LlvmCodeGen* codegen, Value* agg_fn_ctx = builder.CreateLoad(agg_fn_ctx_ptr, "agg_fn_ctx"); // Call GetExprCtx() to get the expression context. DCHECK(agg_expr_ctxs_[i] != NULL); - Value* get_expr_ctx_args[] = { this_arg, codegen->GetIntConstant(TYPE_INT, i) }; - Value* agg_expr_ctx = builder.CreateCall(get_expr_ctx_fn, get_expr_ctx_args); - Value* update_slot_args[] = { agg_fn_ctx, agg_expr_ctx, tuple_arg, row_arg }; + Value* get_expr_ctxs_args[] = {this_arg, codegen->GetIntConstant(TYPE_INT, i)}; + Value* agg_expr_ctxs = builder.CreateCall(get_expr_ctxs_fn, get_expr_ctxs_args); + Value* update_slot_args[] = {agg_fn_ctx, agg_expr_ctxs, tuple_arg, row_arg}; builder.CreateCall(update_slot_fn, update_slot_args); } } @@ -1849,8 +1892,8 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(LlvmCodeGen* codegen, // CodegenProcessBatch() does the final optimizations. *fn = codegen->FinalizeFunction(*fn); if (*fn == NULL) { - return Status("PartitionedAggregationNode::CodegeUpdateTuple(): codegen'd " - "UpdateTuple() function failed verification, see log"); + return Status("PartitionedAggregationNode::CodegenUpdateTuple(): codegen'd " + "UpdateTuple() function failed verification, see log"); } return Status::OK(); }
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exec/partitioned-aggregation-node.h ---------------------------------------------------------------------- diff --git a/be/src/exec/partitioned-aggregation-node.h b/be/src/exec/partitioned-aggregation-node.h index 9e14e66..4287cea 100644 --- a/be/src/exec/partitioned-aggregation-node.h +++ b/be/src/exec/partitioned-aggregation-node.h @@ -30,13 +30,17 @@ #include "runtime/string-value.h" namespace llvm { - class Function; +class BasicBlock; +class Function; +class Value; } namespace impala { class AggFnEvaluator; +class CodegenAnyVal; class LlvmCodeGen; +class LlvmBuilder; class RowBatch; class RuntimeState; struct StringValue; @@ -203,7 +207,7 @@ class PartitionedAggregationNode : public ExecNode { /// version of UpdateTuple() to avoid loading aggregate_evaluators_[i] at runtime. /// An entry is NULL if the aggregate evaluator is not codegen'ed or there is no Expr /// in the aggregate evaluator (e.g. count(*)). - std::vector<ExprContext*> agg_expr_ctxs_; + std::vector<ExprContext* const*> agg_expr_ctxs_; /// FunctionContext for each aggregate function and backing MemPool. String data /// returned by the aggregate functions is allocated via these contexts. @@ -520,12 +524,13 @@ class PartitionedAggregationNode : public ExecNode { /// This function processes each individual row in ProcessBatch(). Must be inlined into /// ProcessBatch for codegen to substitute function calls with codegen'd versions. /// May spill partitions if not enough memory is available. - template<bool AGGREGATED_ROWS> + template <bool AGGREGATED_ROWS> Status IR_ALWAYS_INLINE ProcessRow(TupleRow* row, HashTableCtx* ht_ctx); - /// Accessor for the expression context of an AggFnEvaluator. Used only in codegen'ed + /// Accessor for the expression contexts of an AggFnEvaluator. Returns an array of + /// pointers the the AggFnEvaluator's expression contexts. Used only in codegen'ed /// version of UpdateTuple(). - ExprContext* IR_ALWAYS_INLINE GetAggExprContext(int i) const; + ExprContext* const* IR_ALWAYS_INLINE GetAggExprContexts(int i) const; /// Create a new intermediate tuple in partition, initialized with row. ht_ctx is /// the context for the partition's hash table and hash is the precomputed hash of @@ -640,6 +645,16 @@ class PartitionedAggregationNode : public ExecNode { Status CodegenUpdateSlot(LlvmCodeGen* codegen, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc, llvm::Function** fn); + /// Codegen a call to a function implementing the UDA interface with input values + /// from 'input_vals'. 'dst_val' should contain the previous value of the aggregate + /// function, and 'updated_dst_val' is set to the new value after the Update or Merge + /// operation is applied. The instruction sequence for the UDA call is inserted at + /// the insert position of 'builder'. + Status CodegenCallUda(LlvmCodeGen* codegen, LlvmBuilder* builder, + AggFnEvaluator* evaluator, llvm::Value* agg_fn_ctx, + const std::vector<CodegenAnyVal>& input_vals, const CodegenAnyVal& dst_val, + CodegenAnyVal* updated_dst_val); + /// Codegen UpdateTuple(). Returns non-OK status if codegen is unsuccessful. Status CodegenUpdateTuple(LlvmCodeGen* codegen, llvm::Function** fn); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exec/partitioned-hash-join-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/partitioned-hash-join-node.cc b/be/src/exec/partitioned-hash-join-node.cc index 116ac02..c3211af 100644 --- a/be/src/exec/partitioned-hash-join-node.cc +++ b/be/src/exec/partitioned-hash-join-node.cc @@ -1144,7 +1144,7 @@ Status PartitionedHashJoinNode::CodegenCreateOutputRow(LlvmCodeGen* codegen, prototype.AddArgument(LlvmCodeGen::NamedVariable("build_arg", tuple_row_ptr_type)); LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); + LlvmBuilder builder(context); Value* args[4]; *fn = prototype.GeneratePrototype(&builder, args); Value* out_row_arg = builder.CreateBitCast(args[1], tuple_row_working_type, "out"); @@ -1156,8 +1156,9 @@ Status PartitionedHashJoinNode::CodegenCreateOutputRow(LlvmCodeGen* codegen, // Copy probe row codegen->CodegenMemcpy(&builder, out_row_arg, probe_row_arg, probe_tuple_row_size_); - Value* build_row_idx[] = { codegen->GetIntConstant(TYPE_INT, num_probe_tuples) }; - Value* build_row_dst = builder.CreateGEP(out_row_arg, build_row_idx, "build_dst_ptr"); + Value* build_row_idx[] = {codegen->GetIntConstant(TYPE_INT, num_probe_tuples)}; + Value* build_row_dst = + builder.CreateInBoundsGEP(out_row_arg, build_row_idx, "build_dst_ptr"); // Copy build row. BasicBlock* build_not_null_block = BasicBlock::Create(context, "build_not_null", *fn); @@ -1176,9 +1177,8 @@ Status PartitionedHashJoinNode::CodegenCreateOutputRow(LlvmCodeGen* codegen, // to work. builder.SetInsertPoint(build_null_block); for (int i = 0; i < num_build_tuples; ++i) { - Value* array_idx[] = - { codegen->GetIntConstant(TYPE_INT, i + num_probe_tuples) }; - Value* dst = builder.CreateGEP(out_row_arg, array_idx, "dst_tuple_ptr"); + Value* array_idx[] = {codegen->GetIntConstant(TYPE_INT, i + num_probe_tuples)}; + Value* dst = builder.CreateInBoundsGEP(out_row_arg, array_idx, "dst_tuple_ptr"); builder.CreateStore(codegen->null_ptr_value(), dst); } builder.CreateRetVoid(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exec/text-converter.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc index d38d662..cb5d70c 100644 --- a/be/src/exec/text-converter.cc +++ b/be/src/exec/text-converter.cc @@ -128,19 +128,13 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, if (tuple_type == NULL) return NULL; PointerType* tuple_ptr_type = tuple_type->getPointerTo(); - Function* set_null_fn = slot_desc->GetUpdateNullFn(codegen, true); - if (set_null_fn == NULL) { - LOG(ERROR) << "Could not codegen WriteSlot because slot update codegen failed."; - return NULL; - } - LlvmCodeGen::FnPrototype prototype( codegen, "WriteSlot", codegen->GetType(TYPE_BOOLEAN)); prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple_arg", tuple_ptr_type)); prototype.AddArgument(LlvmCodeGen::NamedVariable("data", codegen->ptr_type())); prototype.AddArgument(LlvmCodeGen::NamedVariable("len", codegen->GetType(TYPE_INT))); - LlvmCodeGen::LlvmBuilder builder(codegen->context()); + LlvmBuilder builder(codegen->context()); Value* args[3]; Function* fn = prototype.GeneratePrototype(&builder, &args[0]); @@ -267,13 +261,13 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, // Parse failed, set slot to null and return false builder.SetInsertPoint(parse_failed_block); - builder.CreateCall(set_null_fn, ArrayRef<Value*>({args[0]})); + slot_desc->CodegenSetNullIndicator(codegen, &builder, args[0], codegen->true_value()); builder.CreateRet(codegen->false_value()); } // Case where data is \N or len == 0 and it is not a string col builder.SetInsertPoint(set_null_block); - builder.CreateCall(set_null_fn, ArrayRef<Value*>({args[0]})); + slot_desc->CodegenSetNullIndicator(codegen, &builder, args[0], codegen->true_value()); builder.CreateRet(codegen->true_value()); return codegen->FinalizeFunction(fn); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/aggregate-functions-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc index c07d1db..7ed6386 100644 --- a/be/src/exprs/aggregate-functions-ir.cc +++ b/be/src/exprs/aggregate-functions-ir.cc @@ -164,12 +164,10 @@ StringVal ToStringVal(FunctionContext* context, T val) { stringstream ss; ss << val; const string &str = ss.str(); - return StringVal::CopyFrom(context, reinterpret_cast<const uint8_t*>(str.c_str()), str.size()); + return StringVal::CopyFrom( + context, reinterpret_cast<const uint8_t*>(str.c_str()), str.size()); } -// Delimiter to use if the separator is NULL. -static const StringVal DEFAULT_STRING_CONCAT_DELIM((uint8_t*)", ", 2); - constexpr int AggregateFunctions::HLL_PRECISION; constexpr int AggregateFunctions::HLL_LEN; @@ -627,15 +625,21 @@ void AggregateFunctions::Max(FunctionContext*, // StringConcatUpdate(). typedef int StringConcatHeader; -void AggregateFunctions::StringConcatUpdate(FunctionContext* ctx, - const StringVal& src, StringVal* result) { - StringConcatUpdate(ctx, src, DEFAULT_STRING_CONCAT_DELIM, result); +// Delimiter to use if the separator is not provided. +static inline StringVal ALWAYS_INLINE DefaultStringConcatDelim() { + return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(", ")), 2); +} + +void AggregateFunctions::StringConcatUpdate( + FunctionContext* ctx, const StringVal& src, StringVal* result) { + StringConcatUpdate(ctx, src, DefaultStringConcatDelim(), result); } -void AggregateFunctions::StringConcatUpdate(FunctionContext* ctx, - const StringVal& src, const StringVal& separator, StringVal* result) { +void AggregateFunctions::StringConcatUpdate(FunctionContext* ctx, const StringVal& src, + const StringVal& separator, StringVal* result) { if (src.is_null) return; - const StringVal* sep = separator.is_null ? &DEFAULT_STRING_CONCAT_DELIM : &separator; + const StringVal default_delim = DefaultStringConcatDelim(); + const StringVal* sep = separator.is_null ? &default_delim : &separator; if (result->is_null) { // Header of the intermediate state holds the length of the first separator. const int header_len = sizeof(StringConcatHeader); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/case-expr.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/case-expr.cc b/be/src/exprs/case-expr.cc index f139d64..3c0db79 100644 --- a/be/src/exprs/case-expr.cc +++ b/be/src/exprs/case-expr.cc @@ -196,7 +196,7 @@ Status CaseExpr::GetCodegendComputeFn(LlvmCodeGen* codegen, Function** fn) { } LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); + LlvmBuilder builder(context); Value* args[2]; Function* function = CreateIrFunctionPrototype(codegen, "CaseExpr", &args); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/compound-predicates.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/compound-predicates.cc b/be/src/exprs/compound-predicates.cc index 531a70c..d367e5a 100644 --- a/be/src/exprs/compound-predicates.cc +++ b/be/src/exprs/compound-predicates.cc @@ -131,7 +131,7 @@ Status CompoundPredicate::CodegenComputeFn( RETURN_IF_ERROR(children()[1]->GetCodegendComputeFn(codegen, &rhs_function)); LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); + LlvmBuilder builder(context); Value* args[2]; Function* function = CreateIrFunctionPrototype(codegen, "CompoundPredicate", &args); @@ -140,8 +140,7 @@ Status CompoundPredicate::CodegenComputeFn( // Control blocks for aggregating results BasicBlock* lhs_null_block = BasicBlock::Create(context, "lhs_null", function); - BasicBlock* lhs_not_null_block = - BasicBlock::Create(context, "lhs_not_null", function); + BasicBlock* lhs_not_null_block = BasicBlock::Create(context, "lhs_not_null", function); BasicBlock* lhs_null_rhs_not_null_block = BasicBlock::Create(context, "lhs_null_rhs_not_null", function); BasicBlock* lhs_not_null_rhs_null_block = @@ -232,7 +231,7 @@ Status CompoundPredicate::CodegenComputeFn( CodegenAnyVal ret(codegen, &builder, TYPE_BOOLEAN, NULL, "ret"); ret.SetIsNull(is_null_phi); ret.SetVal(val_phi); - builder.CreateRet(ret.value()); + builder.CreateRet(ret.GetLoweredValue()); *fn = codegen->FinalizeFunction(function); DCHECK(*fn != NULL); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/expr-codegen-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr-codegen-test.cc b/be/src/exprs/expr-codegen-test.cc index 800a440..6ccbbf4 100644 --- a/be/src/exprs/expr-codegen-test.cc +++ b/be/src/exprs/expr-codegen-test.cc @@ -252,7 +252,7 @@ TEST_F(ExprCodegenTest, TestInlineConstants) { test_udf_file << getenv("IMPALA_HOME") << "/be/build/latest/exprs/expr-codegen-test.ll"; scoped_ptr<LlvmCodeGen> codegen; ASSERT_OK(LlvmCodeGen::CreateFromFile(&pool, test_udf_file.str(), "test", &codegen)); - Function* fn = codegen->GetFunction(TEST_GET_CONSTANT_SYMBOL); + Function* fn = codegen->GetFunction(TEST_GET_CONSTANT_SYMBOL, false); ASSERT_TRUE(fn != NULL); // Function verification should fail because we haven't inlined GetConstant() calls http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/expr.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr.cc b/be/src/exprs/expr.cc index 0141bae..045c030 100644 --- a/be/src/exprs/expr.cc +++ b/be/src/exprs/expr.cc @@ -684,10 +684,10 @@ Status Expr::GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, Function** fn) { ir_compute_fn_ = CreateIrFunctionPrototype(codegen, "CodegenComputeFnWrapper", &args); BasicBlock* entry_block = BasicBlock::Create(codegen->context(), "entry", ir_compute_fn_); - LlvmCodeGen::LlvmBuilder builder(entry_block); + LlvmBuilder builder(entry_block); Value* this_ptr = codegen->CastPtrToLlvmPtr(codegen->GetPtrType(Expr::LLVM_CLASS_NAME), this); - Value* compute_fn_args[] = { this_ptr, args[0], args[1] }; + Value* compute_fn_args[] = {this_ptr, args[0], args[1]}; Value* ret = CodegenAnyVal::CreateCall( codegen, &builder, static_getval_fn, compute_fn_args, "ret"); builder.CreateRet(ret); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/literal.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/literal.cc b/be/src/exprs/literal.cc index 03bdd50..5b075f6 100644 --- a/be/src/exprs/literal.cc +++ b/be/src/exprs/literal.cc @@ -385,7 +385,7 @@ Status Literal::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) Value* args[2]; *fn = CreateIrFunctionPrototype(codegen, "Literal", &args); BasicBlock* entry_block = BasicBlock::Create(codegen->context(), "entry", *fn); - LlvmCodeGen::LlvmBuilder builder(entry_block); + LlvmBuilder builder(entry_block); CodegenAnyVal v = CodegenAnyVal::GetNonNullVal(codegen, &builder, type_); switch (type_.type) { @@ -439,7 +439,7 @@ Status Literal::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) return Status(ss.str()); } - builder.CreateRet(v.value()); + builder.CreateRet(v.GetLoweredValue()); *fn = codegen->FinalizeFunction(*fn); ir_compute_fn_ = *fn; return Status::OK(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/null-literal.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/null-literal.cc b/be/src/exprs/null-literal.cc index a8f4241..6985c5b 100644 --- a/be/src/exprs/null-literal.cc +++ b/be/src/exprs/null-literal.cc @@ -101,7 +101,7 @@ Status NullLiteral::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** Value* args[2]; *fn = CreateIrFunctionPrototype(codegen, "NullLiteral", &args); BasicBlock* entry_block = BasicBlock::Create(codegen->context(), "entry", *fn); - LlvmCodeGen::LlvmBuilder builder(entry_block); + LlvmBuilder builder(entry_block); Value* v = CodegenAnyVal::GetNullVal(codegen, type()); builder.CreateRet(v); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/scalar-fn-call.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/scalar-fn-call.cc b/be/src/exprs/scalar-fn-call.cc index 3a7ffb1..5457ced 100644 --- a/be/src/exprs/scalar-fn-call.cc +++ b/be/src/exprs/scalar-fn-call.cc @@ -319,7 +319,7 @@ Status ScalarFnCall::GetCodegendComputeFn(LlvmCodeGen* codegen, Function** fn) { Value* expr_ctx = args[0]; Value* row = args[1]; BasicBlock* block = BasicBlock::Create(codegen->context(), "entry", *fn); - LlvmCodeGen::LlvmBuilder builder(block); + LlvmBuilder builder(block); // Populate UDF arguments vector<Value*> udf_args; @@ -482,7 +482,7 @@ Status ScalarFnCall::GetUdf(LlvmCodeGen* codegen, Function** udf) { } else if (fn_.binary_type == TFunctionBinaryType::BUILTIN) { // In this path, we're running a builtin with the UDF interface. The IR is // in the llvm module. - *udf = codegen->GetFunction(fn_.scalar_fn.symbol); + *udf = codegen->GetFunction(fn_.scalar_fn.symbol, false); if (*udf == NULL) { // Builtins symbols should exist unless there is a version mismatch. stringstream ss; @@ -502,11 +502,11 @@ Status ScalarFnCall::GetUdf(LlvmCodeGen* codegen, Function** udf) { } else { // We're running an IR UDF. DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::IR); - *udf = codegen->GetFunction(fn_.scalar_fn.symbol); + *udf = codegen->GetFunction(fn_.scalar_fn.symbol, false); if (*udf == NULL) { stringstream ss; - ss << "Unable to locate function " << fn_.scalar_fn.symbol - << " from LLVM module " << fn_.hdfs_location; + ss << "Unable to locate function " << fn_.scalar_fn.symbol << " from LLVM module " + << fn_.hdfs_location; return Status(ss.str()); } *udf = codegen->FinalizeFunction(*udf); @@ -527,7 +527,7 @@ Status ScalarFnCall::GetFunction(RuntimeState* state, const string& symbol, void DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::IR); LlvmCodeGen* codegen = state->codegen(); DCHECK(codegen != NULL); - Function* ir_fn = codegen->GetFunction(symbol); + Function* ir_fn = codegen->GetFunction(symbol, false); if (ir_fn == NULL) { stringstream ss; ss << "Unable to locate function " << symbol << " from LLVM module " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/exprs/slot-ref.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/slot-ref.cc b/be/src/exprs/slot-ref.cc index eeba42f..3a70b6c 100644 --- a/be/src/exprs/slot-ref.cc +++ b/be/src/exprs/slot-ref.cc @@ -134,11 +134,11 @@ string SlotRef::DebugString() const { // br label %check_slot_null // // check_slot_null: ; preds = %entry -// %null_ptr = getelementptr i8* %tuple_ptr, i32 0 +// %null_byte_ptr = getelementptr i8* %tuple_ptr, i32 0 // %null_byte = load i8* %null_ptr // %null_byte_set = and i8 %null_byte, 2 -// %slot_is_null = icmp ne i8 %null_byte_set, 0 -// br i1 %slot_is_null, label %ret, label %get_slot +// %is_null = icmp ne i8 %null_byte_set, 0 +// br i1 %is_null, label %ret, label %get_slot // // get_slot: ; preds = %check_slot_null // %slot_addr = getelementptr i8* %tuple_ptr, i32 8 @@ -189,28 +189,24 @@ Status SlotRef::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) Value* row_ptr = args[1]; Value* tuple_offset = ConstantInt::get(codegen->int_type(), tuple_idx_); - Value* null_byte_offset = - ConstantInt::get(codegen->int_type(), null_indicator_offset_.byte_offset); Value* slot_offset = ConstantInt::get(codegen->int_type(), slot_offset_); - Value* null_mask = - ConstantInt::get(codegen->tinyint_type(), null_indicator_offset_.bit_mask); Value* zero = ConstantInt::get(codegen->GetType(TYPE_TINYINT), 0); Value* one = ConstantInt::get(codegen->GetType(TYPE_TINYINT), 1); BasicBlock* entry_block = BasicBlock::Create(context, "entry", *fn); + bool slot_is_nullable = null_indicator_offset_.bit_mask != 0; BasicBlock* check_slot_null_indicator_block = NULL; - if (null_indicator_offset_.bit_mask != 0) { - check_slot_null_indicator_block = - BasicBlock::Create(context, "check_slot_null", *fn); + if (slot_is_nullable) { + check_slot_null_indicator_block = BasicBlock::Create(context, "check_slot_null", *fn); } BasicBlock* get_slot_block = BasicBlock::Create(context, "get_slot", *fn); BasicBlock* ret_block = BasicBlock::Create(context, "ret", *fn); - LlvmCodeGen::LlvmBuilder builder(entry_block); + LlvmBuilder builder(entry_block); // Get the tuple offset addr from the row Value* cast_row_ptr = builder.CreateBitCast( row_ptr, PointerType::get(codegen->ptr_type(), 0), "cast_row_ptr"); - Value* tuple_addr = builder.CreateGEP(cast_row_ptr, tuple_offset, "tuple_addr"); + Value* tuple_addr = builder.CreateInBoundsGEP(cast_row_ptr, tuple_offset, "tuple_addr"); // Load the tuple* Value* tuple_ptr = builder.CreateLoad(tuple_addr, "tuple_ptr"); @@ -218,32 +214,30 @@ Status SlotRef::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) if (tuple_is_nullable_) { Value* tuple_is_null = builder.CreateIsNull(tuple_ptr, "tuple_is_null"); // Check slot is null only if the null indicator bit is set - if (null_indicator_offset_.bit_mask == 0) { - builder.CreateCondBr(tuple_is_null, ret_block, get_slot_block); - } else { + if (slot_is_nullable) { builder.CreateCondBr(tuple_is_null, ret_block, check_slot_null_indicator_block); + } else { + builder.CreateCondBr(tuple_is_null, ret_block, get_slot_block); } } else { - if (null_indicator_offset_.bit_mask == 0) { - builder.CreateBr(get_slot_block); - } else { + if (slot_is_nullable) { builder.CreateBr(check_slot_null_indicator_block); + } else { + builder.CreateBr(get_slot_block); } } // Branch for tuple* != NULL. Need to check if null-indicator is set - if (check_slot_null_indicator_block != NULL) { + if (slot_is_nullable) { builder.SetInsertPoint(check_slot_null_indicator_block); - Value* null_addr = builder.CreateGEP(tuple_ptr, null_byte_offset, "null_ptr"); - Value* null_val = builder.CreateLoad(null_addr, "null_byte"); - Value* slot_null_mask = builder.CreateAnd(null_val, null_mask, "null_byte_set"); - Value* is_slot_null = builder.CreateICmpNE(slot_null_mask, zero, "slot_is_null"); + Value* is_slot_null = SlotDescriptor::CodegenIsNull( + codegen, &builder, null_indicator_offset_, tuple_ptr); builder.CreateCondBr(is_slot_null, ret_block, get_slot_block); } // Branch for slot != NULL builder.SetInsertPoint(get_slot_block); - Value* slot_ptr = builder.CreateGEP(tuple_ptr, slot_offset, "slot_addr"); + Value* slot_ptr = builder.CreateInBoundsGEP(tuple_ptr, slot_offset, "slot_addr"); Value* val_ptr = builder.CreateBitCast(slot_ptr, codegen->GetPtrType(type_), "val_ptr"); // Depending on the type, load the values we need Value* val = NULL; @@ -313,7 +307,7 @@ Status SlotRef::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) result.SetIsNull(is_null_phi); result.SetPtr(ptr_phi); result.SetLen(len_phi); - builder.CreateRet(result.value()); + builder.CreateRet(result.GetLoweredValue()); } else if (type() == TYPE_TIMESTAMP) { DCHECK(time_of_day != NULL); DCHECK(date != NULL); @@ -343,7 +337,7 @@ Status SlotRef::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) result.SetIsNull(is_null_phi); result.SetTimeOfDay(time_of_day_phi); result.SetDate(date_phi); - builder.CreateRet(result.value()); + builder.CreateRet(result.GetLoweredValue()); } else { DCHECK(val != NULL); PHINode* val_phi = builder.CreatePHI(val->getType(), 2, "val_phi"); @@ -360,7 +354,7 @@ Status SlotRef::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) CodegenAnyVal::GetNonNullVal(codegen, &builder, type(), "result"); result.SetIsNull(is_null_phi); result.SetVal(val_phi); - builder.CreateRet(result.value()); + builder.CreateRet(result.GetLoweredValue()); } *fn = codegen->FinalizeFunction(*fn); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/runtime/descriptors.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc index 41485f9..f8ec79c 100644 --- a/be/src/runtime/descriptors.cc +++ b/be/src/runtime/descriptors.cc @@ -77,9 +77,8 @@ ostream& operator<<(ostream& os, const NullIndicatorOffset& null_indicator) { return os; } -SlotDescriptor::SlotDescriptor( - const TSlotDescriptor& tdesc, const TupleDescriptor* parent, - const TupleDescriptor* collection_item_descriptor) +SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc, + const TupleDescriptor* parent, const TupleDescriptor* collection_item_descriptor) : id_(tdesc.id), type_(ColumnType::FromThrift(tdesc.slotType)), parent_(parent), @@ -89,9 +88,7 @@ SlotDescriptor::SlotDescriptor( null_indicator_offset_(tdesc.nullIndicatorByte, tdesc.nullIndicatorBit), slot_idx_(tdesc.slotIdx), slot_size_(type_.GetSlotSize()), - llvm_field_idx_(-1), - set_not_null_fn_(NULL), - set_null_fn_(NULL) { + llvm_field_idx_(-1) { DCHECK_NE(type_.type, TYPE_STRUCT); DCHECK(parent_ != NULL) << tdesc.parent; if (type_.IsCollectionType()) { @@ -569,60 +566,79 @@ void DescriptorTbl::GetTupleDescs(vector<TupleDescriptor*>* descs) const { } } -// Generate function to set a slot to be null or not-null. The resulting IR -// for SetNotNull looks like: -// (in this case the tuple contains only a nullable double) -// define void @SetNotNull({ i8, double }* %tuple) { -// entry: -// %null_byte_ptr = getelementptr inbounds { i8, double }* %tuple, i32 0, i32 0 -// %null_byte = load i8* %null_byte_ptr -// %0 = and i8 %null_byte, -2 -// store i8 %0, i8* %null_byte_ptr -// ret void -// } -Function* SlotDescriptor::GetUpdateNullFn(LlvmCodeGen* codegen, bool set_null) const { - if (set_null && set_null_fn_ != NULL) return set_null_fn_; - if (!set_null && set_not_null_fn_ != NULL) return set_not_null_fn_; - - StructType* tuple_type = parent()->GetLlvmStruct(codegen); - PointerType* tuple_ptr_type = tuple_type->getPointerTo(); - LlvmCodeGen::FnPrototype prototype(codegen, (set_null) ? "SetNull" :"SetNotNull", - codegen->void_type()); - prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* tuple_arg; - Function* fn = prototype.GeneratePrototype(&builder, &tuple_arg); - - Value* tuple_int8_ptr = - builder.CreateBitCast(tuple_arg, codegen->ptr_type(), "tuple_int8_ptr"); - Value* null_byte_offset = - ConstantInt::get(codegen->int_type(), null_indicator_offset_.byte_offset); - Value* null_byte_ptr = - builder.CreateInBoundsGEP(tuple_int8_ptr, null_byte_offset, "null_byte_ptr"); - Value* null_byte = builder.CreateLoad(null_byte_ptr, "null_byte"); - +Value* SlotDescriptor::CodegenIsNull( + LlvmCodeGen* codegen, LlvmBuilder* builder, Value* tuple) const { + return CodegenIsNull(codegen, builder, null_indicator_offset_, tuple); +} + +// Example IR for getting the first null bit: +// %0 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* +// %null_byte_ptr = getelementptr i8, i8* %0, i32 0 +// %null_byte = load i8, i8* %null_byte_ptr +// %null_mask = and i8 %null_byte, 1 +// %is_null = icmp ne i8 %null_mask, 0 +Value* SlotDescriptor::CodegenIsNull(LlvmCodeGen* codegen, LlvmBuilder* builder, + const NullIndicatorOffset& null_indicator_offset, Value* tuple) { + Value* null_byte = + CodegenGetNullByte(codegen, builder, null_indicator_offset, tuple, NULL); + Constant* mask = + ConstantInt::get(codegen->tinyint_type(), null_indicator_offset.bit_mask); + Value* null_mask = builder->CreateAnd(null_byte, mask, "null_mask"); + Constant* zero = ConstantInt::get(codegen->tinyint_type(), 0); + return builder->CreateICmpNE(null_mask, zero, "is_null"); +} + +// Example IR for setting the first null bit to a non-constant 'is_null' value: +// %14 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* +// %null_byte_ptr3 = getelementptr i8, i8* %14, i32 0 +// %null_byte4 = load i8, i8* %null_byte_ptr3 +// %null_bit_cleared = and i8 %null_byte4, -2 +// %15 = sext i1 %result_is_null to i8 +// %null_bit = and i8 %15, 1 +// %null_bit_set = or i8 %null_bit_cleared, %null_bit +// store i8 %null_bit_set, i8* %null_byte_ptr3 +void SlotDescriptor::CodegenSetNullIndicator( + LlvmCodeGen* codegen, LlvmBuilder* builder, Value* tuple, Value* is_null) const { + DCHECK_EQ(is_null->getType(), codegen->boolean_type()); + Value* null_byte_ptr; + Value* null_byte = + CodegenGetNullByte(codegen, builder, null_indicator_offset_, tuple, &null_byte_ptr); + Constant* mask = + ConstantInt::get(codegen->tinyint_type(), null_indicator_offset_.bit_mask); + Constant* not_mask = + ConstantInt::get(codegen->tinyint_type(), ~null_indicator_offset_.bit_mask); + + ConstantInt* constant_is_null = dyn_cast<ConstantInt>(is_null); Value* result = NULL; - if (set_null) { - Value* null_set = codegen->GetIntConstant( - TYPE_TINYINT, null_indicator_offset_.bit_mask); - result = builder.CreateOr(null_byte, null_set); - } else { - Value* null_clear_val = - codegen->GetIntConstant(TYPE_TINYINT, ~null_indicator_offset_.bit_mask); - result = builder.CreateAnd(null_byte, null_clear_val); - } - - builder.CreateStore(result, null_byte_ptr); - builder.CreateRetVoid(); - - fn = codegen->FinalizeFunction(fn); - if (set_null) { - set_null_fn_ = fn; + if (constant_is_null != NULL) { + if (constant_is_null->isOne()) { + result = builder->CreateOr(null_byte, mask, "null_bit_set"); + } else { + DCHECK(constant_is_null->isZero()); + result = builder->CreateAnd(null_byte, not_mask, "null_bit_cleared"); + } } else { - set_not_null_fn_ = fn; - } - return fn; + // Avoid branching by computing the new byte as: + // (null_byte & ~mask) | (-null & mask); + Value* byte_with_cleared_bit = + builder->CreateAnd(null_byte, not_mask, "null_bit_cleared"); + Value* sign_extended_null = builder->CreateSExt(is_null, codegen->tinyint_type()); + Value* bit_only = builder->CreateAnd(sign_extended_null, mask, "null_bit"); + result = builder->CreateOr(byte_with_cleared_bit, bit_only, "null_bit_set"); + } + + builder->CreateStore(result, null_byte_ptr); +} + +Value* SlotDescriptor::CodegenGetNullByte(LlvmCodeGen* codegen, LlvmBuilder* builder, + const NullIndicatorOffset& null_indicator_offset, Value* tuple, + Value** null_byte_ptr) { + Constant* byte_offset = + ConstantInt::get(codegen->int_type(), null_indicator_offset.byte_offset); + Value* tuple_bytes = builder->CreateBitCast(tuple, codegen->ptr_type()); + Value* byte_ptr = builder->CreateInBoundsGEP(tuple_bytes, byte_offset, "null_byte_ptr"); + if (null_byte_ptr != NULL) *null_byte_ptr = byte_ptr; + return builder->CreateLoad(byte_ptr, "null_byte"); } StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/runtime/descriptors.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index 8122653..4a50800 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -35,19 +35,21 @@ namespace llvm { class Function; class PointerType; class StructType; + class Value; }; namespace impala { +class Expr; +class ExprContext; +class LlvmBuilder; class LlvmCodeGen; class ObjectPool; +class RuntimeState; class TDescriptorTable; class TSlotDescriptor; class TTable; class TTupleDescriptor; -class Expr; -class ExprContext; -class RuntimeState; /// A path into a table schema (e.g. a vector of ColumnTypes) pointing to a particular /// column/field. The i-th element of the path is the ordinal position of the column/field @@ -133,9 +135,22 @@ class SlotDescriptor { std::string DebugString() const; - /// Codegen for: void SetNull(Tuple* tuple) / void SetNotNull(Tuple* tuple) - /// The codegen'd IR function is cached. - llvm::Function* GetUpdateNullFn(LlvmCodeGen*, bool set_null) const; + /// Generate LLVM code at the insert position of 'builder' that returns a boolean value + /// represented as a LLVM i1 indicating whether this slot is null in 'tuple'. + llvm::Value* CodegenIsNull( + LlvmCodeGen* codegen, LlvmBuilder* builder, llvm::Value* tuple) const; + + /// Generate LLVM code at the insert position of 'builder' that returns a boolean value + /// represented as a LLVM i1 with value of the NULL bit at 'null_indicator_offset' in + /// 'tuple'. + static llvm::Value* CodegenIsNull(LlvmCodeGen* codegen, LlvmBuilder* builder, + const NullIndicatorOffset& null_indicator_offset, llvm::Value* tuple); + + /// Generate LLVM code at the insert position of 'builder' to set this slot's + /// NULL bit in the given 'tuple' to the value 'is_null'. 'tuple' is a pointer + /// to the tuple, and 'is_null' is an boolean value represented as a LLVM i1. + void CodegenSetNullIndicator(LlvmCodeGen* codegen, LlvmBuilder* builder, + llvm::Value* tuple, llvm::Value* is_null) const; private: friend class DescriptorTbl; @@ -163,13 +178,16 @@ class SlotDescriptor { /// any padding bytes. int llvm_field_idx_; - /// Cached codegen'd functions - mutable llvm::Function* set_not_null_fn_; - mutable llvm::Function* set_null_fn_; - /// collection_item_descriptor should be non-NULL iff this is a collection slot SlotDescriptor(const TSlotDescriptor& tdesc, const TupleDescriptor* parent, - const TupleDescriptor* collection_item_descriptor); + const TupleDescriptor* collection_item_descriptor); + + /// Generate LLVM code at the insert position of 'builder' to get the i8 value of the + /// the byte containing 'null_indicator_offset' in 'tuple'. If 'null_byte_ptr' is + /// non-NULL, sets that to a pointer to the null byte. + static llvm::Value* CodegenGetNullByte(LlvmCodeGen* codegen, LlvmBuilder* builder, + const NullIndicatorOffset& null_indicator_offset, llvm::Value* tuple, + llvm::Value** null_byte_ptr); }; class ColumnDescriptor { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/runtime/tuple.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc index 7fa8b2b..dc83922 100644 --- a/be/src/runtime/tuple.cc +++ b/be/src/runtime/tuple.cc @@ -349,7 +349,7 @@ Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_ prototype.AddArgument("total_string_lengths", int_ptr_type); prototype.AddArgument("num_non_null_string_values", int_ptr_type); - LlvmCodeGen::LlvmBuilder builder(context); + LlvmBuilder builder(context); Value* args[8]; *fn = prototype.GeneratePrototype(&builder, args); Value* opaque_tuple_arg = args[0]; @@ -363,6 +363,9 @@ Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_ // Cast the opaque Tuple* argument to the generated struct type Type* tuple_struct_type = desc.GetLlvmStruct(codegen); + if (tuple_struct_type == NULL) { + return Status("CodegenMaterializeExprs(): failed to generate tuple desc"); + } PointerType* tuple_type = codegen->GetPtrType(tuple_struct_type); Value* tuple = builder.CreateBitCast(opaque_tuple_arg, tuple_type, "tuple"); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/runtime/types.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index 9558792..b0835bf 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -163,13 +163,28 @@ struct ColumnType { return thrift_type; } + inline bool IsBooleanType() const { return type == TYPE_BOOLEAN; } + + inline bool IsIntegerType() const { + return type == TYPE_TINYINT || type == TYPE_SMALLINT || type == TYPE_INT + || type == TYPE_BIGINT; + } + + inline bool IsFloatingPointType() const { + return type == TYPE_FLOAT || type == TYPE_DOUBLE; + } + + inline bool IsDecimalType() const { return type == TYPE_DECIMAL; } + inline bool IsStringType() const { return type == TYPE_STRING || type == TYPE_VARCHAR || type == TYPE_CHAR; } + inline bool IsTimestampType() const { return type == TYPE_TIMESTAMP; } + inline bool IsVarLenStringType() const { - return type == TYPE_STRING || type == TYPE_VARCHAR || - (type == TYPE_CHAR && len > MAX_CHAR_INLINE_LENGTH); + return type == TYPE_STRING || type == TYPE_VARCHAR + || (type == TYPE_CHAR && len > MAX_CHAR_INLINE_LENGTH); } inline bool IsComplexType() const { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/testutil/test-udas.cc ---------------------------------------------------------------------- diff --git a/be/src/testutil/test-udas.cc b/be/src/testutil/test-udas.cc index 569853e..0097500 100644 --- a/be/src/testutil/test-udas.cc +++ b/be/src/testutil/test-udas.cc @@ -130,3 +130,39 @@ void ArgIsConstUpdate(FunctionContext* context, const IntVal& val, void ArgIsConstMerge(FunctionContext* context, const BooleanVal& src, BooleanVal* dst) { dst->val |= src.val; } + +// Defines aggregate function for testing NULL handling. Returns NULL if an even number +// of non-NULL inputs are consumed or 1 if an odd number of non-NULL inputs are consumed. +void ToggleNullInit(FunctionContext* context, IntVal* total) { + *total = IntVal::null(); +} + +void ToggleNullUpdate(FunctionContext* context, const IntVal& val, IntVal* total) { + if (total->is_null) { + *total = IntVal(1); + } else { + *total = IntVal::null(); + } +} + +void ToggleNullMerge(FunctionContext* context, const IntVal& src, IntVal* dst) { + if (src.is_null != dst->is_null) { + *dst = IntVal(1); + } else { + *dst = IntVal::null(); + } +} + +// Defines aggregate function for testing input NULL handling. Returns the number of NULL +// input values. +void CountNullsInit(FunctionContext* context, BigIntVal* total) { + *total = BigIntVal(0); +} + +void CountNullsUpdate(FunctionContext* context, const BigIntVal& val, BigIntVal* total) { + if (val.is_null) ++total->val; +} + +void CountNullsMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) { + dst->val += src.val; +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/testutil/test-udfs.cc ---------------------------------------------------------------------- diff --git a/be/src/testutil/test-udfs.cc b/be/src/testutil/test-udfs.cc index efdfa0a..56d4524 100644 --- a/be/src/testutil/test-udfs.cc +++ b/be/src/testutil/test-udfs.cc @@ -56,11 +56,7 @@ IntVal AllTypes( StringVal NoArgs(FunctionContext* context) { const char* result = "string"; StringVal ret(context, strlen(result)); - // TODO: llvm 3.3 seems to have a bug if we use memcpy here making - // the IR udf crash. This is fixed in 3.3.1. Fix this when we upgrade. - //memcpy(ret.ptr, result, strlen(result)); - // IMPALA-775 - for (int i = 0; i < strlen(result); ++i) ret.ptr[i] = result[i]; + memcpy(ret.ptr, result, strlen(result)); return ret; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/be/src/util/tuple-row-compare.cc ---------------------------------------------------------------------- diff --git a/be/src/util/tuple-row-compare.cc b/be/src/util/tuple-row-compare.cc index 69892ae..c159853 100644 --- a/be/src/util/tuple-row-compare.cc +++ b/be/src/util/tuple-row-compare.cc @@ -205,7 +205,7 @@ Status TupleRowComparator::CodegenCompare(LlvmCodeGen* codegen, Function** fn) { prototype.AddArgument("lhs", tuple_row_type); prototype.AddArgument("rhs", tuple_row_type); - LlvmCodeGen::LlvmBuilder builder(context); + LlvmBuilder builder(context); Value* args[4]; *fn = prototype.GeneratePrototype(&builder, args); Value* lhs_ctxs_arg = args[0];
