http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/expr.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr.cc b/be/src/exprs/expr.cc index c7fe2ab..68f1c42 100644 --- a/be/src/exprs/expr.cc +++ b/be/src/exprs/expr.cc @@ -15,664 +15,88 @@ // specific language governing permissions and limitations // under the License. -#include <sstream> +#include "exprs/expr.h" -#include <llvm/ExecutionEngine/ExecutionEngine.h> -#include <llvm/IR/InstIterator.h> -#include <llvm/IR/LegacyPassManager.h> -#include <llvm/Transforms/Scalar.h> -#include <llvm/Transforms/Utils/BasicBlockUtils.h> -#include <llvm/Transforms/Utils/UnrollLoop.h> +#include <sstream> #include <thrift/protocol/TDebugProtocol.h> -#include "codegen/codegen-anyval.h" -#include "codegen/llvm-codegen.h" #include "common/object-pool.h" #include "common/status.h" -#include "exprs/aggregate-functions.h" -#include "exprs/anyval-util.h" -#include "exprs/bit-byte-functions.h" -#include "exprs/case-expr.h" -#include "exprs/cast-functions.h" -#include "exprs/compound-predicates.h" -#include "exprs/conditional-functions.h" -#include "exprs/decimal-functions.h" -#include "exprs/decimal-operators.h" -#include "exprs/expr-context.h" -#include "exprs/expr.h" -#include "exprs/hive-udf-call.h" -#include "exprs/in-predicate.h" -#include "exprs/is-not-empty-predicate.h" -#include "exprs/is-null-predicate.h" -#include "exprs/kudu-partition-expr.h" -#include "exprs/like-predicate.h" -#include "exprs/literal.h" -#include "exprs/math-functions.h" -#include "exprs/null-literal.h" -#include "exprs/operators.h" -#include "exprs/scalar-fn-call.h" -#include "exprs/slot-ref.h" -#include "exprs/string-functions.h" -#include "exprs/timestamp-functions.h" -#include "exprs/tuple-is-null-predicate.h" -#include "exprs/udf-builtins.h" -#include "exprs/utility-functions.h" -#include "gen-cpp/Data_types.h" -#include "gen-cpp/Exprs_types.h" +#include "exprs/scalar-expr.h" #include "runtime/lib-cache.h" -#include "runtime/mem-tracker.h" -#include "runtime/raw-value.h" -#include "runtime/runtime-state.h" -#include "runtime/tuple-row.h" -#include "runtime/tuple.h" -#include "udf/udf-internal.h" -#include "udf/udf.h" - +#include "gen-cpp/Data_types.h" #include "gen-cpp/Exprs_types.h" #include "gen-cpp/ImpalaService_types.h" #include "common/names.h" -using namespace impala_udf; -using namespace llvm; - namespace impala { const char* Expr::LLVM_CLASS_NAME = "class.impala::Expr"; -template<class T> -bool ParseString(const string& str, T* val) { - istringstream stream(str); - stream >> *val; - return !stream.fail(); -} - -FunctionContext* Expr::RegisterFunctionContext(ExprContext* ctx, RuntimeState* state, - int varargs_buffer_size) { - FunctionContext::TypeDesc return_type = AnyValUtil::ColumnTypeToTypeDesc(type_); - vector<FunctionContext::TypeDesc> arg_types; - for (int i = 0; i < children_.size(); ++i) { - arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc(children_[i]->type_)); - } - fn_context_index_ = ctx->Register(state, return_type, arg_types, varargs_buffer_size); - return ctx->fn_context(fn_context_index_); -} - -Expr::Expr(const ColumnType& type, bool is_constant, bool is_slotref) - : cache_entry_(NULL), - is_constant_(is_constant), - is_slotref_(is_slotref), - type_(type), - output_scale_(-1), - fn_context_index_(-1), - ir_compute_fn_(NULL) { +Expr::Expr(const ColumnType& type) + : type_(type) { } -Expr::Expr(const TExprNode& node, bool is_slotref) - : cache_entry_(NULL), - is_constant_(node.is_constant), - is_slotref_(is_slotref), - type_(ColumnType::FromThrift(node.type)), - output_scale_(-1), - fn_context_index_(-1), - ir_compute_fn_(NULL) { +Expr::Expr(const TExprNode& node) + : type_(ColumnType::FromThrift(node.type)) { if (node.__isset.fn) fn_ = node.fn; } Expr::~Expr() { - DCHECK(cache_entry_ == NULL); -} - -void Expr::Close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - for (int i = 0; i < children_.size(); ++i) { - children_[i]->Close(state, context, scope); - } - - if (scope == FunctionContext::FRAGMENT_LOCAL) { - // This is the final, non-cloned context to close. Clean up the whole Expr. - if (cache_entry_ != NULL) { - LibCache::instance()->DecrementUseCount(cache_entry_); - cache_entry_ = NULL; + DCHECK(cache_entry_ == nullptr); +} + +Status Expr::CreateTree(const TExpr& texpr, ObjectPool* pool, Expr* root) { + DCHECK(!texpr.nodes.empty()); + DCHECK(root != nullptr); + // The root of the tree at nodes[0] is already created and stored in 'root'. + int child_node_idx = 0; + int num_children = texpr.nodes[0].num_children; + for (int i = 0; i < num_children; ++i) { + ++child_node_idx; + Status status = CreateTreeInternal(texpr.nodes, pool, root, &child_node_idx); + if (UNLIKELY(!status.ok())) { + LOG(ERROR) << "Could not construct expr tree.\n" << status.GetDetail() << "\n" + << apache::thrift::ThriftDebugString(texpr); + return status; } } -} - -Status Expr::CreateExprTree(ObjectPool* pool, const TExpr& texpr, ExprContext** ctx) { - // input is empty - if (texpr.nodes.size() == 0) { - *ctx = NULL; - return Status::OK(); - } - int node_idx = 0; - Expr* e; - Status status = CreateTreeFromThrift(pool, texpr.nodes, NULL, &node_idx, &e, ctx); - if (status.ok() && node_idx + 1 != texpr.nodes.size()) { - status = Status( - "Expression tree only partially reconstructed. Not all thrift nodes were used."); - } - if (!status.ok()) { - LOG(ERROR) << "Could not construct expr tree.\n" << status.GetDetail() << "\n" - << apache::thrift::ThriftDebugString(texpr); - } - return status; -} - -Status Expr::CreateExprTrees(ObjectPool* pool, const vector<TExpr>& texprs, - vector<ExprContext*>* ctxs) { - ctxs->clear(); - for (int i = 0; i < texprs.size(); ++i) { - ExprContext* ctx; - RETURN_IF_ERROR(CreateExprTree(pool, texprs[i], &ctx)); - ctxs->push_back(ctx); + if (UNLIKELY(child_node_idx + 1 != texpr.nodes.size())) { + return Status("Expression tree only partially reconstructed. Not all thrift " \ + "nodes were used."); } return Status::OK(); } -Status Expr::CreateTreeFromThrift(ObjectPool* pool, const vector<TExprNode>& nodes, - Expr* parent, int* node_idx, Expr** root_expr, ExprContext** ctx) { +Status Expr::CreateTreeInternal(const vector<TExprNode>& nodes, ObjectPool* pool, + Expr* root, int* child_node_idx) { // propagate error case - if (*node_idx >= nodes.size()) { + if (*child_node_idx >= nodes.size()) { return Status("Failed to reconstruct expression tree from thrift."); } - int num_children = nodes[*node_idx].num_children; - Expr* expr = NULL; - RETURN_IF_ERROR(CreateExpr(pool, nodes[*node_idx], &expr)); - DCHECK(expr != NULL); - if (parent != NULL) { - parent->AddChild(expr); - } else { - DCHECK(root_expr != NULL); - DCHECK(ctx != NULL); - *root_expr = expr; - *ctx = pool->Add(new ExprContext(expr)); - } - for (int i = 0; i < num_children; i++) { - *node_idx += 1; - RETURN_IF_ERROR(CreateTreeFromThrift(pool, nodes, expr, node_idx, NULL, NULL)); - // we are expecting a child, but have used all nodes - // this means we have been given a bad tree and must fail - if (*node_idx >= nodes.size()) { - return Status("Failed to reconstruct expression tree from thrift."); - } - } - return Status::OK(); -} - -Status Expr::CreateExpr(ObjectPool* pool, const TExprNode& texpr_node, Expr** expr) { - switch (texpr_node.node_type) { - case TExprNodeType::BOOL_LITERAL: - case TExprNodeType::FLOAT_LITERAL: - case TExprNodeType::INT_LITERAL: - case TExprNodeType::STRING_LITERAL: - case TExprNodeType::DECIMAL_LITERAL: - case TExprNodeType::TIMESTAMP_LITERAL: - *expr = pool->Add(new Literal(texpr_node)); - return Status::OK(); - case TExprNodeType::CASE_EXPR: - if (!texpr_node.__isset.case_expr) { - return Status("Case expression not set in thrift node"); - } - *expr = pool->Add(new CaseExpr(texpr_node)); - return Status::OK(); - case TExprNodeType::COMPOUND_PRED: - if (texpr_node.fn.name.function_name == "and") { - *expr = pool->Add(new AndPredicate(texpr_node)); - } else if (texpr_node.fn.name.function_name == "or") { - *expr = pool->Add(new OrPredicate(texpr_node)); - } else { - DCHECK_EQ(texpr_node.fn.name.function_name, "not"); - *expr = pool->Add(new ScalarFnCall(texpr_node)); - } - return Status::OK(); - case TExprNodeType::NULL_LITERAL: - *expr = pool->Add(new NullLiteral(texpr_node)); - return Status::OK(); - case TExprNodeType::SLOT_REF: - if (!texpr_node.__isset.slot_ref) { - return Status("Slot reference not set in thrift node"); - } - *expr = pool->Add(new SlotRef(texpr_node)); - return Status::OK(); - case TExprNodeType::TUPLE_IS_NULL_PRED: - *expr = pool->Add(new TupleIsNullPredicate(texpr_node)); - return Status::OK(); - case TExprNodeType::FUNCTION_CALL: - if (!texpr_node.__isset.fn) { - return Status("Function not set in thrift node"); - } - // Special-case functions that have their own Expr classes - // TODO: is there a better way to do this? - if (texpr_node.fn.name.function_name == "if") { - *expr = pool->Add(new IfExpr(texpr_node)); - } else if (texpr_node.fn.name.function_name == "nullif") { - *expr = pool->Add(new NullIfExpr(texpr_node)); - } else if (texpr_node.fn.name.function_name == "isnull" || - texpr_node.fn.name.function_name == "ifnull" || - texpr_node.fn.name.function_name == "nvl") { - *expr = pool->Add(new IsNullExpr(texpr_node)); - } else if (texpr_node.fn.name.function_name == "coalesce") { - *expr = pool->Add(new CoalesceExpr(texpr_node)); - - } else if (texpr_node.fn.binary_type == TFunctionBinaryType::JAVA) { - *expr = pool->Add(new HiveUdfCall(texpr_node)); - } else { - *expr = pool->Add(new ScalarFnCall(texpr_node)); - } - return Status::OK(); - case TExprNodeType::IS_NOT_EMPTY_PRED: - *expr = pool->Add(new IsNotEmptyPredicate(texpr_node)); - return Status::OK(); - case TExprNodeType::KUDU_PARTITION_EXPR: - *expr = pool->Add(new KuduPartitionExpr(texpr_node)); - return Status::OK(); - default: - stringstream os; - os << "Unknown expr node type: " << texpr_node.node_type; - return Status(os.str()); - } -} - -bool Expr::NeedCodegen(const TExpr& texpr) { - for (const TExprNode& texpr_node : texpr.nodes) { - if (texpr_node.node_type == TExprNodeType::FUNCTION_CALL && texpr_node.__isset.fn && - texpr_node.fn.binary_type == TFunctionBinaryType::IR) { - return true; - } - } - return false; -} - -struct MemLayoutData { - int expr_idx; - int byte_size; - bool variable_length; - int alignment; - - // TODO: sort by type as well? Any reason to do this? - // TODO: would sorting in reverse order of size be faster due to better packing? - // TODO: why put var-len at end? - bool operator<(const MemLayoutData& rhs) const { - // variable_len go at end - if (this->variable_length && !rhs.variable_length) return false; - if (!this->variable_length && rhs.variable_length) return true; - return this->byte_size < rhs.byte_size; - } -}; - -int Expr::ComputeResultsLayout(const vector<Expr*>& exprs, vector<int>* offsets, - int* var_result_begin) { - if (exprs.size() == 0) { - *var_result_begin = -1; - return 0; - } - - // Don't align more than word (8-byte) size. There's no performance gain beyond 8-byte - // alignment, and there is a performance gain to keeping the results buffer small. This - // is consistent with what compilers do. - int MAX_ALIGNMENT = sizeof(int64_t); - - vector<MemLayoutData> data; - data.resize(exprs.size()); - - // Collect all the byte sizes and sort them - for (int i = 0; i < exprs.size(); ++i) { - DCHECK(!exprs[i]->type().IsComplexType()) << "NYI"; - data[i].expr_idx = i; - data[i].byte_size = exprs[i]->type().GetSlotSize(); - DCHECK_GT(data[i].byte_size, 0); - data[i].variable_length = exprs[i]->type().IsVarLenStringType(); - - bool fixed_len_char = exprs[i]->type().type == TYPE_CHAR && !data[i].variable_length; - - // Compute the alignment of this value. Values should be self-aligned for optimal - // memory access speed, up to the max alignment (e.g., if this value is an int32_t, - // its offset in the buffer should be divisible by sizeof(int32_t)). - // TODO: is self-alignment really necessary for perf? - if (!fixed_len_char) { - data[i].alignment = min(data[i].byte_size, MAX_ALIGNMENT); - } else { - // Fixed-len chars are aligned to a one-byte boundary, as if they were char[], - // leaving no padding between them and the previous value. - data[i].alignment = 1; - } - } - - sort(data.begin(), data.end()); - - // Walk the types and store in a packed aligned layout - int byte_offset = 0; - - offsets->resize(exprs.size()); - *var_result_begin = -1; - - for (int i = 0; i < data.size(); ++i) { - - // Increase byte_offset so data[i] is at the right alignment (i.e. add padding between - // this value and the previous). - byte_offset = BitUtil::RoundUp(byte_offset, data[i].alignment); - - (*offsets)[data[i].expr_idx] = byte_offset; - if (data[i].variable_length && *var_result_begin == -1) { - *var_result_begin = byte_offset; - } - DCHECK(!(i == 0 && byte_offset > 0)) << "first value should be at start of layout"; - byte_offset += data[i].byte_size; - } - - return byte_offset; -} -int Expr::ComputeResultsLayout(const vector<ExprContext*>& ctxs, vector<int>* offsets, - int* var_result_begin) { - vector<Expr*> exprs; - for (int i = 0; i < ctxs.size(); ++i) exprs.push_back(ctxs[i]->root()); - return ComputeResultsLayout(exprs, offsets, var_result_begin); -} - -void Expr::Close(const vector<ExprContext*>& ctxs, RuntimeState* state) { - for (int i = 0; i < ctxs.size(); ++i) { - ctxs[i]->Close(state); - } -} + const TExprNode& texpr_node = nodes[*child_node_idx]; + DCHECK_NE(texpr_node.node_type, TExprNodeType::AGGREGATE_EXPR); + ScalarExpr* child_expr; + RETURN_IF_ERROR(ScalarExpr::CreateNode(texpr_node, pool, &child_expr)); + root->children_.push_back(child_expr); -Status Expr::Prepare(const vector<ExprContext*>& ctxs, RuntimeState* state, - const RowDescriptor& row_desc, MemTracker* tracker) { - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->Prepare(state, row_desc, tracker)); + int num_children = nodes[*child_node_idx].num_children; + for (int i = 0; i < num_children; ++i) { + *child_node_idx += 1; + RETURN_IF_ERROR(CreateTreeInternal(nodes, pool, child_expr, child_node_idx)); + DCHECK(child_expr->GetChild(i) != nullptr); } return Status::OK(); } -Status Expr::Prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context) { - DCHECK(type_.type != INVALID_TYPE); - for (int i = 0; i < children_.size(); ++i) { - RETURN_IF_ERROR(children_[i]->Prepare(state, row_desc, context)); +void Expr::Close() { + for (ScalarExpr* child : children_) child->Close(); + if (cache_entry_ != nullptr) { + LibCache::instance()->DecrementUseCount(cache_entry_); + cache_entry_ = nullptr; } - return Status::OK(); -} - -Status Expr::Open(const vector<ExprContext*>& ctxs, RuntimeState* state) { - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->Open(state)); - } - return Status::OK(); -} - -Status Expr::Open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - for (int i = 0; i < children_.size(); ++i) { - RETURN_IF_ERROR(children_[i]->Open(state, context, scope)); - } - return Status::OK(); -} - -Status Expr::CloneIfNotExists(const vector<ExprContext*>& ctxs, RuntimeState* state, - vector<ExprContext*>* new_ctxs) { - DCHECK(new_ctxs != NULL); - if (!new_ctxs->empty()) { - // 'ctxs' was already cloned into '*new_ctxs', nothing to do. - DCHECK_EQ(new_ctxs->size(), ctxs.size()); - for (int i = 0; i < new_ctxs->size(); ++i) DCHECK((*new_ctxs)[i]->is_clone_); - return Status::OK(); - } - new_ctxs->resize(ctxs.size()); - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->Clone(state, &(*new_ctxs)[i])); - } - return Status::OK(); -} - -string Expr::DebugString() const { - // TODO: implement partial debug string for member vars - stringstream out; - out << " type=" << type_.DebugString(); - if (!children_.empty()) { - out << " children=" << DebugString(children_); - } - return out.str(); -} - -string Expr::DebugString(const vector<Expr*>& exprs) { - stringstream out; - out << "["; - for (int i = 0; i < exprs.size(); ++i) { - out << (i == 0 ? "" : " ") << exprs[i]->DebugString(); - } - out << "]"; - return out.str(); -} - -string Expr::DebugString(const vector<ExprContext*>& ctxs) { - vector<Expr*> exprs; - for (int i = 0; i < ctxs.size(); ++i) exprs.push_back(ctxs[i]->root()); - return DebugString(exprs); -} - -bool Expr::IsLiteral() const { - return false; -} - -int Expr::GetSlotIds(vector<SlotId>* slot_ids) const { - int n = 0; - for (int i = 0; i < children_.size(); ++i) { - n += children_[i]->GetSlotIds(slot_ids); - } - return n; -} - -Function* Expr::GetStaticGetValWrapper(ColumnType type, LlvmCodeGen* codegen) { - switch (type.type) { - case TYPE_BOOLEAN: - return codegen->GetFunction(IRFunction::EXPR_GET_BOOLEAN_VAL, false); - case TYPE_TINYINT: - return codegen->GetFunction(IRFunction::EXPR_GET_TINYINT_VAL, false); - case TYPE_SMALLINT: - return codegen->GetFunction(IRFunction::EXPR_GET_SMALLINT_VAL, false); - case TYPE_INT: - return codegen->GetFunction(IRFunction::EXPR_GET_INT_VAL, false); - case TYPE_BIGINT: - return codegen->GetFunction(IRFunction::EXPR_GET_BIGINT_VAL, false); - case TYPE_FLOAT: - return codegen->GetFunction(IRFunction::EXPR_GET_FLOAT_VAL, false); - case TYPE_DOUBLE: - return codegen->GetFunction(IRFunction::EXPR_GET_DOUBLE_VAL, false); - case TYPE_STRING: - case TYPE_CHAR: - case TYPE_VARCHAR: - return codegen->GetFunction(IRFunction::EXPR_GET_STRING_VAL, false); - case TYPE_TIMESTAMP: - return codegen->GetFunction(IRFunction::EXPR_GET_TIMESTAMP_VAL, false); - case TYPE_DECIMAL: - return codegen->GetFunction(IRFunction::EXPR_GET_DECIMAL_VAL, false); - default: - DCHECK(false) << "Invalid type: " << type.DebugString(); - return NULL; - } -} - -Function* Expr::CreateIrFunctionPrototype(LlvmCodeGen* codegen, const string& name, - Value* (*args)[2]) { - Type* return_type = CodegenAnyVal::GetLoweredType(codegen, type()); - LlvmCodeGen::FnPrototype prototype(codegen, name, return_type); - prototype.AddArgument( - LlvmCodeGen::NamedVariable( - "context", codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME))); - prototype.AddArgument( - LlvmCodeGen::NamedVariable("row", codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME))); - Function* function = prototype.GeneratePrototype(NULL, args[0]); - DCHECK(function != NULL); - return function; -} - -void Expr::InitBuiltinsDummy() { - // Call one function from each of the classes to pull all the symbols - // from that class in. - // TODO: is there a better way to do this? - AggregateFunctions::InitNull(NULL, NULL); - BitByteFunctions::CountSet(NULL, TinyIntVal::null()); - CastFunctions::CastToBooleanVal(NULL, TinyIntVal::null()); - CompoundPredicate::Not(NULL, BooleanVal::null()); - ConditionalFunctions::NullIfZero(NULL, TinyIntVal::null()); - DecimalFunctions::Precision(NULL, DecimalVal::null()); - DecimalOperators::CastToDecimalVal(NULL, DecimalVal::null()); - InPredicate::InIterate(NULL, BigIntVal::null(), 0, NULL); - IsNullPredicate::IsNull(NULL, BooleanVal::null()); - LikePredicate::Like(NULL, StringVal::null(), StringVal::null()); - Operators::Add_IntVal_IntVal(NULL, IntVal::null(), IntVal::null()); - MathFunctions::Pi(NULL); - StringFunctions::Length(NULL, StringVal::null()); - TimestampFunctions::Year(NULL, TimestampVal::null()); - TimestampFunctions::UnixAndFromUnixPrepare(NULL, FunctionContext::FRAGMENT_LOCAL); - UdfBuiltins::Pi(NULL); - UtilityFunctions::Pid(NULL); -} - -Status Expr::GetConstVal( - RuntimeState* state, ExprContext* context, AnyVal** const_val) { - DCHECK(context->opened_); - if (!is_constant()) { - *const_val = NULL; - return Status::OK(); - } - - RETURN_IF_ERROR(AllocateAnyVal(state, context->pool_.get(), type_, - "Could not allocate constant expression value", const_val)); - switch (type_.type) { - case TYPE_BOOLEAN: - *reinterpret_cast<BooleanVal*>(*const_val) = GetBooleanVal(context, NULL); - break; - case TYPE_TINYINT: - *reinterpret_cast<TinyIntVal*>(*const_val) = GetTinyIntVal(context, NULL); - break; - case TYPE_SMALLINT: - *reinterpret_cast<SmallIntVal*>(*const_val) = GetSmallIntVal(context, NULL); - break; - case TYPE_INT: - *reinterpret_cast<IntVal*>(*const_val) = GetIntVal(context, NULL); - break; - case TYPE_BIGINT: - *reinterpret_cast<BigIntVal*>(*const_val) = GetBigIntVal(context, NULL); - break; - case TYPE_FLOAT: - *reinterpret_cast<FloatVal*>(*const_val) = GetFloatVal(context, NULL); - break; - case TYPE_DOUBLE: - *reinterpret_cast<DoubleVal*>(*const_val) = GetDoubleVal(context, NULL); - break; - case TYPE_STRING: - case TYPE_CHAR: - case TYPE_VARCHAR: { - StringVal* sv = reinterpret_cast<StringVal*>(*const_val); - *sv = GetStringVal(context, NULL); - if (sv->len > 0) { - // Make sure the memory is owned by 'context'. - uint8_t* ptr_copy = context->pool_->TryAllocate(sv->len); - if (ptr_copy == NULL) { - return context->pool_->mem_tracker()->MemLimitExceeded( - state, "Could not allocate constant string value", sv->len); - } - memcpy(ptr_copy, sv->ptr, sv->len); - sv->ptr = ptr_copy; - } - break; - } - case TYPE_TIMESTAMP: - *reinterpret_cast<TimestampVal*>(*const_val) = GetTimestampVal(context, NULL); - break; - case TYPE_DECIMAL: - *reinterpret_cast<DecimalVal*>(*const_val) = GetDecimalVal(context, NULL); - break; - default: - DCHECK(false) << "Type not implemented: " << type(); - } - // Errors may have been set during expr evaluation. - return GetFnContextError(context); -} - -Status Expr::GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, Function** fn) { - if (ir_compute_fn_ != NULL) { - *fn = ir_compute_fn_; - return Status::OK(); - } - Function* static_getval_fn = GetStaticGetValWrapper(type(), codegen); - - // Call it passing this as the additional first argument. - Value* args[2]; - ir_compute_fn_ = CreateIrFunctionPrototype(codegen, "CodegenComputeFnWrapper", &args); - BasicBlock* entry_block = - BasicBlock::Create(codegen->context(), "entry", ir_compute_fn_); - LlvmBuilder builder(entry_block); - Value* this_ptr = - codegen->CastPtrToLlvmPtr(codegen->GetPtrType(Expr::LLVM_CLASS_NAME), this); - Value* compute_fn_args[] = {this_ptr, args[0], args[1]}; - Value* ret = CodegenAnyVal::CreateCall( - codegen, &builder, static_getval_fn, compute_fn_args, "ret"); - builder.CreateRet(ret); - ir_compute_fn_ = codegen->FinalizeFunction(ir_compute_fn_); - *fn = ir_compute_fn_; - return Status::OK(); -} - -// At least one of these should always be subclassed. -BooleanVal Expr::GetBooleanVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return BooleanVal::null(); -} -TinyIntVal Expr::GetTinyIntVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return TinyIntVal::null(); -} -SmallIntVal Expr::GetSmallIntVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return SmallIntVal::null(); -} -IntVal Expr::GetIntVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return IntVal::null(); -} -BigIntVal Expr::GetBigIntVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return BigIntVal::null(); -} -FloatVal Expr::GetFloatVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return FloatVal::null(); -} -DoubleVal Expr::GetDoubleVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return DoubleVal::null(); -} -StringVal Expr::GetStringVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return StringVal::null(); -} -CollectionVal Expr::GetCollectionVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return CollectionVal::null(); -} -TimestampVal Expr::GetTimestampVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return TimestampVal::null(); -} -DecimalVal Expr::GetDecimalVal(ExprContext* context, const TupleRow* row) { - DCHECK(false) << DebugString(); - return DecimalVal::null(); -} - -Status Expr::GetFnContextError(ExprContext* ctx) { - if (fn_context_index_ != -1) { - FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_); - if (fn_ctx->has_error()) return Status(fn_ctx->error_msg()); - } - return Status::OK(); -} - -string Expr::DebugString(const string& expr_name) const { - stringstream out; - out << expr_name << "(" << Expr::DebugString() << ")"; - return out.str(); } }
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/expr.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index 8919f59..1a47db6 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -16,75 +16,6 @@ // under the License. -/// --- Terminology: -// -/// Compute function: The function that, given a row, performs the computation of an expr -/// and produces a scalar result. This function evaluates the necessary child arguments by -/// calling their compute functions, then performs whatever computation is necessary on -/// the arguments (e.g. calling a UDF with the child arguments). All compute functions -/// take arguments (ExprContext*, const TupleRow*). The return type is a *Val (i.e. a subclass -/// of AnyVal). Thus, a single expression will implement a compute function for every -/// return type it supports. -/// -/// UDX: user-defined X. E.g., user-defined function, user-defined aggregate. Something -/// that is written by an external user. -/// -/// Scalar function call: An expr that returns a single scalar value and can be -/// implemented using the UDF interface. Note that this includes builtins, which although -/// not being user-defined still use the same interface as UDFs (i.e., they are -/// implemented as functions with signature "*Val (FunctionContext*, *Val, *Val...)"). -/// -/// Aggregate function call: a UDA or builtin aggregate function. -/// -/// --- Expr overview: -/// -/// The Expr superclass defines a virtual Get*Val() compute function for each possible -/// return type (GetBooleanVal(), GetStringVal(), etc). Expr subclasses implement the -/// Get*Val() functions associated with their possible return types; for many Exprs this -/// will be a single function. These functions are generally cross-compiled to both native -/// and IR libraries. In the interpreted path, the native compute functions are run as-is. -/// -/// For the codegen path, Expr defines a virtual method GetCodegendComputeFn() that -/// returns the Function* of the expr's compute function. Note that we do not need a -/// separate GetCodegendComputeFn() for each type. -/// -/// Only short-circuited operators (e.g. &&, ||) and other special functions like literals -/// must implement custom Get*Val() compute functions. Scalar function calls use the -/// generic compute functions implemented by ScalarFnCall(). For cross-compiled compute -/// functions, GetCodegendComputeFn() can use ReplaceChildCallsComputeFn(), which takes a -/// cross-compiled IR Get*Val() function, pulls out any calls to the children's Get*Val() -/// functions (which we identify via the Get*Val() static wrappers), and replaces them -/// with the codegen'd version of that function. This allows us to write a single function -/// for both the interpreted and codegen paths. -/// -/// Only short-circuited operators (e.g. &&, ||) and other special functions like -/// literals must implement custom Get*Val() compute functions. Scalar function calls -/// use the generic compute functions implemented by ScalarFnCall(). For cross-compiled -/// compute functions, GetCodegendComputeFn() can use ReplaceChildCallsComputeFn(), which -/// takes a cross-compiled IR Get*Val() function, pulls out any calls to the children's -/// Get*Val() functions (which we identify via the Get*Val() static wrappers), and -/// replaces them with the codegen'd version of that function. This allows us to write a -/// single function for both the interpreted and codegen paths. -/// -/// --- Expr users (e.g. exec nodes): -/// -/// A typical usage pattern will look something like: -/// 1. Expr::CreateExprTrees() -/// 2. Expr::Prepare() -/// 3. Expr::Open() -/// 4. Expr::CloneIfNotExists() [for multi-threaded execution] -/// 5. Evaluate exprs via Get*Val() calls -/// 6. Expr::Close() [called once per ExprContext, including clones] -/// -/// Expr users should use the static Get*Val() wrapper functions to evaluate exprs, -/// cross-compile the resulting function, and use ReplaceGetValCalls() to create the -/// codegen'd function. See the comments on these functions for more details. This is a -/// similar pattern to that used by the cross-compiled compute functions. -/// -/// TODO: -/// - Fix codegen compile time -/// - Fix perf regressions via extra optimization passes + patching LLVM - #ifndef IMPALA_EXPRS_EXPR_H #define IMPALA_EXPRS_EXPR_H @@ -97,21 +28,9 @@ #include "common/status.h" #include "impala-ir/impala-ir-functions.h" #include "runtime/types.h" -#include "udf/udf-internal.h" // for CollectionVal -#include "udf/udf.h" - -using namespace impala_udf; - -namespace llvm { - class BasicBlock; - class Function; - class Type; - class Value; -}; namespace impala { -class ExprContext; class IsNullExpr; class LibCacheEntry; class LlvmCodeGen; @@ -119,279 +38,102 @@ class MemTracker; class ObjectPool; class RowDescriptor; class RuntimeState; +class ScalarExpr; +class SlotDescriptor; class TColumnValue; class TExpr; class TExprNode; class Tuple; class TupleRow; -/// This is the superclass of all expr evaluation nodes. +/// --- Expr overview +/// +/// Expr class represents expression embedded in various operators in a query plan +/// fragment in the backend. For example, it can be the join expressions in a PHJ +/// node, predicates in a scan node or the aggregate functions in a PAGG node. +/// +/// There are two subclasses of Expr: ScalarExpr for scalar expressions and AggFn for +/// aggregate functions; A scalar expression computes a value over a single row while +/// an aggregate function computes a value over a set of rows. ScalarExpr is further +/// subclassed into various expressions such as Literal and ScalarFnCall to name two +/// examples. +/// +/// Expr is internally represented as a tree of nodes. The root node can be either a +/// ScalarExpr or an AggFn node and all descendants are ScalarExpr nodes. Exprs and their +/// subclasses contain query compile-time information and the code to evaluate the exprs +/// (represented by the specific class). ScalarExprEvaluator and AggFnEvluator are the +/// evaluators for ScalarExpr and AggFn respectively. They contain the general runtime +/// state needed for the actual evaluation. They don't need to be subclassed because the +/// expr-specific code sits in the expr subclasses. An Expr can be shared by multiple +/// evaluators. +/// +/// Please see the headers of ScalarExpr and AggFn for further details. +/// class Expr { public: - virtual ~Expr(); - - /// Virtual compute functions for each *Val type. Each Expr subclass should implement - /// the functions for the return type(s) it supports. For example, a boolean function - /// will only implement GetBooleanVal(). Some Exprs, like Literal, have many possible - /// return types and will implement multiple Get*Val() functions. - virtual BooleanVal GetBooleanVal(ExprContext* context, const TupleRow*); - virtual TinyIntVal GetTinyIntVal(ExprContext* context, const TupleRow*); - virtual SmallIntVal GetSmallIntVal(ExprContext* context, const TupleRow*); - virtual IntVal GetIntVal(ExprContext* context, const TupleRow*); - virtual BigIntVal GetBigIntVal(ExprContext* context, const TupleRow*); - virtual FloatVal GetFloatVal(ExprContext* context, const TupleRow*); - virtual DoubleVal GetDoubleVal(ExprContext* context, const TupleRow*); - virtual StringVal GetStringVal(ExprContext* context, const TupleRow*); - virtual CollectionVal GetCollectionVal(ExprContext* context, const TupleRow*); - virtual TimestampVal GetTimestampVal(ExprContext* context, const TupleRow*); - virtual DecimalVal GetDecimalVal(ExprContext* context, const TupleRow*); - const std::string& function_name() const { return fn_.name.function_name; } - /// Get the number of digits after the decimal that should be displayed for this value. - /// Returns -1 if no scale has been specified (currently the scale is only set for - /// doubles set by RoundUpTo). GetValue() must have already been called. - /// TODO: is this still necessary? - int output_scale() const { return output_scale_; } + virtual ~Expr(); - void AddChild(Expr* expr) { children_.push_back(expr); } - Expr* GetChild(int i) const { return children_[i]; } + /// Returns true if the given Expr is an AggFn. Overridden by AggFn. + virtual bool IsAggFn() const { return false; } + + ScalarExpr* GetChild(int i) const { return children_[i]; } int GetNumChildren() const { return children_.size(); } const ColumnType& type() const { return type_; } - bool is_slotref() const { return is_slotref_; } - bool is_constant() const { return is_constant_; } - - const std::vector<Expr*>& children() const { return children_; } - - /// Returns an error status if the function context associated with the - /// expr has an error set. - Status GetFnContextError(ExprContext* ctx); + const std::vector<ScalarExpr*>& children() const { return children_; } - /// Returns true if this is a literal expression. - virtual bool IsLiteral() const; + /// Releases cache entries to LibCache in all nodes of the Expr tree. + virtual void Close(); - /// Returns the number of SlotRef nodes in the expr tree. If this returns 0, it means it - /// is valid to call GetValue(nullptr) on the expr tree. - /// If 'slot_ids' is non-null, add the slot ids to it. - virtual int GetSlotIds(std::vector<SlotId>* slot_ids = nullptr) const; - - /// Returns true iff the expression 'texpr' contains UDF available only as LLVM IR. In - /// which case, it's impossible to interpret this expression and codegen must be used. - static bool NeedCodegen(const TExpr& texpr); - - /// Create expression tree from the list of nodes contained in texpr within 'pool'. - /// Returns the root of expression tree in 'expr' and the corresponding ExprContext in - /// 'ctx'. - static Status CreateExprTree(ObjectPool* pool, const TExpr& texpr, ExprContext** ctx); - - /// Creates vector of ExprContexts containing exprs from the given vector of - /// TExprs within 'pool'. Returns an error if any of the individual conversions caused - /// an error, otherwise OK. - static Status CreateExprTrees(ObjectPool* pool, const std::vector<TExpr>& texprs, - std::vector<ExprContext*>* ctxs); - - /// Convenience function for preparing multiple expr trees. - /// Allocations from 'ctxs' will be counted against 'tracker'. - static Status Prepare(const std::vector<ExprContext*>& ctxs, RuntimeState* state, - const RowDescriptor& row_desc, MemTracker* tracker); - - /// Convenience function for opening multiple expr trees. - static Status Open(const std::vector<ExprContext*>& ctxs, RuntimeState* state); - - /// Clones each ExprContext for multiple expr trees. 'new_ctxs' must be non-NULL. - /// Idempotent: if '*new_ctxs' is empty, a clone of each context in 'ctxs' will be added - /// to it, and if non-empty, it is assumed CloneIfNotExists() was already called and the - /// call is a no-op. The new ExprContexts are created in state->obj_pool(). - static Status CloneIfNotExists(const std::vector<ExprContext*>& ctxs, - RuntimeState* state, std::vector<ExprContext*>* new_ctxs); - - /// Convenience function for closing multiple expr trees. - static void Close(const std::vector<ExprContext*>& ctxs, RuntimeState* state); - - /// Computes a memory efficient layout for storing the results of evaluating - /// 'exprs'. The results are assumed to be void* slot types (vs AnyVal types). Varlen - /// data is not included (e.g. there will be space for a StringValue, but not the data - /// referenced by it). - /// - /// Returns the number of bytes necessary to store all the results and offsets - /// where the result for each expr should be stored. - /// - /// Variable length types are guaranteed to be at the end and 'var_result_begin' - /// will be set the beginning byte offset where variable length results begin. - /// 'var_result_begin' will be set to -1 if there are no variable len types. - static int ComputeResultsLayout(const std::vector<Expr*>& exprs, - std::vector<int>* offsets, int* var_result_begin); - static int ComputeResultsLayout(const std::vector<ExprContext*>& ctxs, - std::vector<int>* offsets, int* var_result_begin); - - /// Returns an llvm::Function* with signature: - /// <subclass of AnyVal> ComputeFn(ExprContext* context, const TupleRow* row) - // - /// The function should evaluate this expr over 'row' and return the result as the - /// appropriate type of AnyVal. - virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) = 0; - - /// If this expr is constant according to is_constant(), evaluates the expr with no - /// input row argument and returns the result in 'const_val'. Otherwise sets - /// 'const_val' to nullptr. The returned AnyVal and associated varlen data is owned by - /// 'context'. This should only be called after Open() has been called on this expr. - /// Returns an error if there was an error evaluating the expression or if memory could - /// not be allocated for the expression result. - virtual Status GetConstVal( - RuntimeState* state, ExprContext* context, AnyVal** const_val); - - virtual std::string DebugString() const; - static std::string DebugString(const std::vector<Expr*>& exprs); - static std::string DebugString(const std::vector<ExprContext*>& ctxs); - - /// The builtin functions are not called from anywhere in the code and the - /// symbols are therefore not included in the binary. We call these functions - /// by using dlsym. The compiler must think this function is callable to - /// not strip these symbols. - static void InitBuiltinsDummy(); + /// Implemeneted by subclasses to provide debug string information about the expr. + virtual std::string DebugString() const = 0; static const char* LLVM_CLASS_NAME; protected: - friend class AggFnEvaluator; - friend class DecimalFunctions; - friend class DecimalOperators; - friend class MathFunctions; - friend class StringFunctions; - friend class TimestampFunctions; - friend class ConditionalFunctions; - friend class UtilityFunctions; - friend class CaseExpr; - friend class InPredicate; - friend class ScalarFnCall; + /// Constructs an Expr tree from the thrift Expr 'texpr'. 'root' is the root of the + /// Expr tree created from texpr.nodes[0] by the caller (either ScalarExpr or AggFn). + /// The newly created Expr nodes are added to 'pool'. Returns error status on failure. + static Status CreateTree(const TExpr& texpr, ObjectPool* pool, Expr* root); - Expr(const ColumnType& type, bool is_constant, bool is_slotref); - Expr(const TExprNode& node, bool is_slotref = false); + Expr(const ColumnType& type); + Expr(const TExprNode& node); - /// Initializes this expr instance for execution. This does not include initializing - /// state in the ExprContext; 'context' should only be used to register a - /// FunctionContext via RegisterFunctionContext(). Any IR functions must be generated - /// here. - /// - /// Subclasses overriding this function should call Expr::Prepare() to recursively call - /// Prepare() on the expr tree. - virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context); + /// Cache entry for the UDF or UDAF loaded from the library. Used by AggFn and + /// some ScalarExpr such as ScalarFnCall. NULL if it's not used. + LibCacheEntry* cache_entry_ = nullptr; - /// Initializes 'context' for execution. If scope if FRAGMENT_LOCAL, both fragment- and - /// thread-local state should be initialized. Otherwise, if scope is THREAD_LOCAL, only - /// thread-local state should be initialized. - // - /// Subclasses overriding this function should call Expr::Open() to recursively call - /// Open() on the expr tree. - virtual Status Open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope = FunctionContext::FRAGMENT_LOCAL); - - /// Subclasses overriding this function should call Expr::Close(). - // - /// If scope if FRAGMENT_LOCAL, both fragment- and thread-local state should be torn - /// down. Otherwise, if scope is THREAD_LOCAL, only thread-local state should be torn - /// down. - virtual void Close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope = FunctionContext::FRAGMENT_LOCAL); - - /// Cache entry for the library implementing this function. - LibCacheEntry* cache_entry_; - - /// Function description. + /// The thrift function. Set only for AggFn and some ScalarExpr such as ScalarFnCall. TFunction fn_; - /// True if this expr should be treated as a constant expression. True if either: - /// * This expr was sent from the frontend and Expr.isConstant() was true. - /// * This expr is a constant literal created in the backend. - const bool is_constant_; - - /// recognize if this node is a slotref in order to speed up GetValue() - const bool is_slotref_; - - /// analysis is done, types are fixed at this point + /// Return type of the expression. const ColumnType type_; - std::vector<Expr*> children_; - int output_scale_; - - /// Index to pass to ExprContext::fn_context() to retrieve this expr's FunctionContext. - /// Set in RegisterFunctionContext(). -1 if this expr does not need a FunctionContext and - /// doesn't call RegisterFunctionContext(). - int fn_context_index_; - - /// Cached codegened compute function. Exprs should set this in GetCodegendComputeFn(). - llvm::Function* ir_compute_fn_; - /// Helper function that calls ctx->Register(), sets fn_context_index_, and returns the - /// registered FunctionContext. - FunctionContext* RegisterFunctionContext( - ExprContext* ctx, RuntimeState* state, int varargs_buffer_size = 0); - - /// Helper function to create an empty Function* with the appropriate signature to be - /// returned by GetCodegendComputeFn(). 'name' is the name of the returned Function*. - /// The arguments to the function are returned in 'args'. - llvm::Function* CreateIrFunctionPrototype(LlvmCodeGen* codegen, const std::string& name, - llvm::Value* (*args)[2]); - - /// Generates an IR compute function that calls the appropriate interpreted Get*Val() - /// compute function. - // - /// This is useful for builtins that can't be implemented with the UDF interface - /// (e.g. functions that need short-circuiting) and that don't have custom codegen - /// functions that use the IRBuilder. It doesn't provide any performance benefit over - /// the interpreted path. - /// TODO: this should be replaced with fancier xcompiling infrastructure - Status GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, llvm::Function** fn); - - /// Returns the IR version of the static Get*Val() wrapper function corresponding to - /// 'type'. This is used for calling interpreted Get*Val() functions from codegen'd - /// functions (e.g. in ScalarFnCall() when codegen is disabled). - llvm::Function* GetStaticGetValWrapper(ColumnType type, LlvmCodeGen* codegen); - - /// Simple debug string that provides no expr subclass-specific information - std::string DebugString(const std::string& expr_name) const; + /// Sub-expressions of this expression tree. + std::vector<ScalarExpr*> children_; private: - friend class ExprContext; friend class ExprTest; friend class ExprCodegenTest; - /// Create a new Expr based on texpr_node.node_type within 'pool'. - static Status CreateExpr(ObjectPool* pool, const TExprNode& texpr_node, Expr** expr); - - /// Creates an expr tree for the node rooted at 'node_idx' via depth-first traversal. - /// parameters - /// nodes: vector of thrift expression nodes to be translated - /// parent: parent of node at node_idx (or NULL for node_idx == 0) - /// node_idx: - /// in: root of TExprNode tree - /// out: next node in 'nodes' that isn't part of tree - /// root_expr: out: root of constructed expr tree - /// ctx: out: context of constructed expr tree + /// Creates an expression tree rooted at 'root' via depth-first traversal. + /// Called recursively to create children expr trees for sub-expressions. + /// + /// parameters: + /// nodes: vector of thrift expression nodes to be unpacked. + /// It is essentially an Expr tree encoded in a depth-first manner. + /// pool: Object pool in which Expr created from nodes are stored. + /// root: root of the new tree. Created and initialized by the caller. + /// child_node_idx: index into 'nodes' to be unpacked. It's the root of the next child + /// child Expr tree to be added to 'root'. Updated as 'nodes' are + /// consumed to construct the tree. /// return /// status.ok() if successful /// !status.ok() if tree is inconsistent or corrupt - static Status CreateTreeFromThrift(ObjectPool* pool, - const std::vector<TExprNode>& nodes, Expr* parent, int* node_idx, - Expr** root_expr, ExprContext** ctx); - - /// Static wrappers around the virtual Get*Val() functions. Calls the appropriate - /// Get*Val() function on expr, passing it the context and row arguments. - // - /// These are used to call Get*Val() functions from generated functions, since I don't - /// know how to call virtual functions directly. GetStaticGetValWrapper() returns the - /// IR function of the appropriate wrapper function. - static BooleanVal GetBooleanVal(Expr* expr, ExprContext* context, const TupleRow* row); - static TinyIntVal GetTinyIntVal(Expr* expr, ExprContext* context, const TupleRow* row); - static SmallIntVal GetSmallIntVal(Expr* expr, ExprContext* context, const TupleRow* row); - static IntVal GetIntVal(Expr* expr, ExprContext* context, const TupleRow* row); - static BigIntVal GetBigIntVal(Expr* expr, ExprContext* context, const TupleRow* row); - static FloatVal GetFloatVal(Expr* expr, ExprContext* context, const TupleRow* row); - static DoubleVal GetDoubleVal(Expr* expr, ExprContext* context, const TupleRow* row); - static StringVal GetStringVal(Expr* expr, ExprContext* context, const TupleRow* row); - static TimestampVal GetTimestampVal(Expr* expr, ExprContext* context, const TupleRow* row); - static DecimalVal GetDecimalVal(Expr* expr, ExprContext* context, const TupleRow* row); + static Status CreateTreeInternal(const std::vector<TExprNode>& nodes, + ObjectPool* pool, Expr* parent, int* child_node_idx); }; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/hive-udf-call.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc index d39e1bd..ae0a407 100644 --- a/be/src/exprs/hive-udf-call.cc +++ b/be/src/exprs/hive-udf-call.cc @@ -23,7 +23,7 @@ #include "codegen/llvm-codegen.h" #include "exprs/anyval-util.h" -#include "exprs/expr-context.h" +#include "exprs/scalar-expr-evaluator.h" #include "rpc/jni-thrift-util.h" #include "runtime/lib-cache.h" #include "runtime/runtime-state.h" @@ -67,14 +67,15 @@ struct JniContext { output_anyval(NULL) {} }; -HiveUdfCall::HiveUdfCall(const TExprNode& node) : Expr(node), input_buffer_size_(0) { +HiveUdfCall::HiveUdfCall(const TExprNode& node) + : ScalarExpr(node), input_buffer_size_(0) { DCHECK_EQ(node.node_type, TExprNodeType::FUNCTION_CALL); DCHECK_EQ(node.fn.binary_type, TFunctionBinaryType::JAVA); DCHECK(executor_cl_ != NULL) << "Init() was not called!"; } -AnyVal* HiveUdfCall::Evaluate(ExprContext* ctx, const TupleRow* row) { - FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_); +AnyVal* HiveUdfCall::Evaluate(ScalarExprEvaluator* eval, const TupleRow* row) const { + FunctionContext* fn_ctx = eval->fn_context(fn_ctx_idx_); JniContext* jni_ctx = reinterpret_cast<JniContext*>( fn_ctx->GetFunctionState(FunctionContext::THREAD_LOCAL)); DCHECK(jni_ctx != NULL); @@ -91,7 +92,7 @@ AnyVal* HiveUdfCall::Evaluate(ExprContext* ctx, const TupleRow* row) { // Evaluate all the children values and put the results in input_values_buffer for (int i = 0; i < GetNumChildren(); ++i) { - void* v = ctx->GetValue(GetChild(i), row); + void* v = eval->GetValue(*GetChild(i), row); if (v == NULL) { jni_ctx->input_nulls_buffer[i] = 1; @@ -152,7 +153,7 @@ AnyVal* HiveUdfCall::Evaluate(ExprContext* ctx, const TupleRow* row) { return jni_ctx->output_anyval; } -Status HiveUdfCall::Init() { +Status HiveUdfCall::InitEnv() { DCHECK(executor_cl_ == NULL) << "Init() already called!"; JNIEnv* env = getJNIEnv(); if (env == NULL) return Status("Failed to get/create JVM"); @@ -169,9 +170,9 @@ Status HiveUdfCall::Init() { return Status::OK(); } -Status HiveUdfCall::Prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* ctx) { - RETURN_IF_ERROR(Expr::Prepare(state, row_desc, ctx)); +Status HiveUdfCall::Init(const RowDescriptor& row_desc, RuntimeState* state) { + // Initialize children first. + RETURN_IF_ERROR(ScalarExpr::Init(row_desc, state)); // Copy the Hive Jar from hdfs to local file system. RETURN_IF_ERROR(LibCache::instance()->GetLocalLibPath( @@ -185,19 +186,16 @@ Status HiveUdfCall::Prepare(RuntimeState* state, const RowDescriptor& row_desc, // one buffer for all rows and we never copy the entire buffer. input_buffer_size_ = BitUtil::RoundUpNumBytes(input_buffer_size_) * 8; } - - // Register FunctionContext in ExprContext - RegisterFunctionContext(ctx, state); - return Status::OK(); } -Status HiveUdfCall::Open(RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { - RETURN_IF_ERROR(Expr::Open(state, ctx, scope)); +Status HiveUdfCall::OpenEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const { + RETURN_IF_ERROR(ScalarExpr::OpenEvaluator(scope, state, eval)); - // Create a JniContext in this thread's FunctionContext - FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_); + // Create a JniContext in this thread's FunctionContext. + DCHECK_GE(fn_ctx_idx_, 0); + FunctionContext* fn_ctx = eval->fn_context(fn_ctx_idx_); JniContext* jni_ctx = new JniContext; fn_ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, jni_ctx); @@ -231,15 +229,15 @@ Status HiveUdfCall::Open(RuntimeState* state, ExprContext* ctx, RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, jni_ctx->executor, &jni_ctx->executor)); - RETURN_IF_ERROR(AllocateAnyVal(state, ctx->pool_.get(), type_, + RETURN_IF_ERROR(AllocateAnyVal(state, eval->mem_pool(), type_, "Could not allocate JNI output value", &jni_ctx->output_anyval)); return Status::OK(); } -void HiveUdfCall::Close(RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { - if (fn_context_index_ != -1) { - FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_); +void HiveUdfCall::CloseEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const { + if (eval->opened()) { + FunctionContext* fn_ctx = eval->fn_context(fn_ctx_idx_); JniContext* jni_ctx = reinterpret_cast<JniContext*>( fn_ctx->GetFunctionState(FunctionContext::THREAD_LOCAL)); @@ -266,12 +264,9 @@ void HiveUdfCall::Close(RuntimeState* state, ExprContext* ctx, jni_ctx->output_anyval = NULL; delete jni_ctx; fn_ctx->SetFunctionState(FunctionContext::THREAD_LOCAL, nullptr); - } else { - DCHECK(!ctx->opened_); } } - - Expr::Close(state, ctx, scope); + ScalarExpr::CloseEvaluator(scope, state, eval); } Status HiveUdfCall::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) { @@ -282,63 +277,73 @@ string HiveUdfCall::DebugString() const { stringstream out; out << "HiveUdfCall(hdfs_location=" << fn_.hdfs_location << " classname=" << fn_.scalar_fn.symbol << " " - << Expr::DebugString() << ")"; + << ScalarExpr::DebugString() << ")"; return out.str(); } -BooleanVal HiveUdfCall::GetBooleanVal(ExprContext* ctx, const TupleRow* row) { +BooleanVal HiveUdfCall::GetBooleanVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_BOOLEAN); - return *reinterpret_cast<BooleanVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<BooleanVal*>(Evaluate(eval, row)); } -TinyIntVal HiveUdfCall::GetTinyIntVal(ExprContext* ctx, const TupleRow* row) { +TinyIntVal HiveUdfCall::GetTinyIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_TINYINT); - return *reinterpret_cast<TinyIntVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<TinyIntVal*>(Evaluate(eval, row)); } -SmallIntVal HiveUdfCall::GetSmallIntVal(ExprContext* ctx, const TupleRow* row) { +SmallIntVal HiveUdfCall::GetSmallIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_SMALLINT); - return * reinterpret_cast<SmallIntVal*>(Evaluate(ctx, row)); + return * reinterpret_cast<SmallIntVal*>(Evaluate(eval, row)); } -IntVal HiveUdfCall::GetIntVal(ExprContext* ctx, const TupleRow* row) { +IntVal HiveUdfCall::GetIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_INT); - return *reinterpret_cast<IntVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<IntVal*>(Evaluate(eval, row)); } -BigIntVal HiveUdfCall::GetBigIntVal(ExprContext* ctx, const TupleRow* row) { +BigIntVal HiveUdfCall::GetBigIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_BIGINT); - return *reinterpret_cast<BigIntVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<BigIntVal*>(Evaluate(eval, row)); } -FloatVal HiveUdfCall::GetFloatVal(ExprContext* ctx, const TupleRow* row) { +FloatVal HiveUdfCall::GetFloatVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_FLOAT); - return *reinterpret_cast<FloatVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<FloatVal*>(Evaluate(eval, row)); } -DoubleVal HiveUdfCall::GetDoubleVal(ExprContext* ctx, const TupleRow* row) { +DoubleVal HiveUdfCall::GetDoubleVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_DOUBLE); - return *reinterpret_cast<DoubleVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<DoubleVal*>(Evaluate(eval, row)); } -StringVal HiveUdfCall::GetStringVal(ExprContext* ctx, const TupleRow* row) { +StringVal HiveUdfCall::GetStringVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_STRING); - StringVal result = *reinterpret_cast<StringVal*>(Evaluate(ctx, row)); + StringVal result = *reinterpret_cast<StringVal*>(Evaluate(eval, row)); // Copy the string into a local allocation with the usual lifetime for expr results. // Needed because the UDF output buffer is owned by the Java UDF executor and may be // freed or reused by the next call into the Java UDF executor. - FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_); + FunctionContext* fn_ctx = eval->fn_context(fn_ctx_idx_); return StringVal::CopyFrom(fn_ctx, result.ptr, result.len); } -TimestampVal HiveUdfCall::GetTimestampVal(ExprContext* ctx, const TupleRow* row) { +TimestampVal HiveUdfCall::GetTimestampVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_TIMESTAMP); - return *reinterpret_cast<TimestampVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<TimestampVal*>(Evaluate(eval, row)); } -DecimalVal HiveUdfCall::GetDecimalVal(ExprContext* ctx, const TupleRow* row) { +DecimalVal HiveUdfCall::GetDecimalVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_DECIMAL); - return *reinterpret_cast<DecimalVal*>(Evaluate(ctx, row)); + return *reinterpret_cast<DecimalVal*>(Evaluate(eval, row)); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/hive-udf-call.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/hive-udf-call.h b/be/src/exprs/hive-udf-call.h index 74302eb..7ce5eb0 100644 --- a/be/src/exprs/hive-udf-call.h +++ b/be/src/exprs/hive-udf-call.h @@ -23,14 +23,26 @@ #include <string> #include <boost/scoped_ptr.hpp> -#include "exprs/expr.h" - -using namespace impala_udf; +#include "exprs/scalar-expr.h" namespace impala { -class TExprNode; +using impala_udf::FunctionContext; +using impala_udf::AnyVal; +using impala_udf::BooleanVal; +using impala_udf::TinyIntVal; +using impala_udf::SmallIntVal; +using impala_udf::IntVal; +using impala_udf::BigIntVal; +using impala_udf::FloatVal; +using impala_udf::DoubleVal; +using impala_udf::TimestampVal; +using impala_udf::StringVal; +using impala_udf::DecimalVal; + class RuntimeState; +class ScalarExprEvaluator; +class TExprNode; /// Executor for hive udfs using JNI. This works with the UdfExecutor on the /// java side which calls into the actual UDF. @@ -58,44 +70,51 @@ class RuntimeState; /// The BE reads the StringValue as normal. // /// If the UDF ran into an error, the FE throws an exception. -class HiveUdfCall : public Expr { +class HiveUdfCall : public ScalarExpr { public: /// Must be called before creating any HiveUdfCall instances. This is called at impalad /// startup time. - static Status Init(); - - virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* ctx); - virtual Status Open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope = FunctionContext::FRAGMENT_LOCAL); - virtual void Close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope = FunctionContext::FRAGMENT_LOCAL); - - virtual BooleanVal GetBooleanVal(ExprContext* ctx, const TupleRow*); - virtual TinyIntVal GetTinyIntVal(ExprContext* ctx, const TupleRow*); - virtual SmallIntVal GetSmallIntVal(ExprContext* ctx, const TupleRow*); - virtual IntVal GetIntVal(ExprContext* ctx, const TupleRow*); - virtual BigIntVal GetBigIntVal(ExprContext* ctx, const TupleRow*); - virtual FloatVal GetFloatVal(ExprContext* ctx, const TupleRow*); - virtual DoubleVal GetDoubleVal(ExprContext* ctx, const TupleRow*); - virtual StringVal GetStringVal(ExprContext* ctx, const TupleRow*); - virtual TimestampVal GetTimestampVal(ExprContext* ctx, const TupleRow*); - virtual DecimalVal GetDecimalVal(ExprContext* ctx, const TupleRow*); - - virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn); + static Status InitEnv() WARN_UNUSED_RESULT; + + virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) + override WARN_UNUSED_RESULT; + virtual std::string DebugString() const override; protected: - friend class Expr; + friend class ScalarExpr; + friend class ScalarExprEvaluator; friend class StringFunctions; + virtual bool HasFnCtx() const override { return true; } + HiveUdfCall(const TExprNode& node); - virtual std::string DebugString() const; + + virtual Status Init(const RowDescriptor& row_desc, RuntimeState* state) + override WARN_UNUSED_RESULT; + virtual Status OpenEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const override + WARN_UNUSED_RESULT; + virtual void CloseEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const override; + + virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual TinyIntVal GetTinyIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual SmallIntVal GetSmallIntVal( + ScalarExprEvaluator*, const TupleRow*) const override; + virtual IntVal GetIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual BigIntVal GetBigIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual FloatVal GetFloatVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual DoubleVal GetDoubleVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual StringVal GetStringVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual TimestampVal GetTimestampVal( + ScalarExprEvaluator*, const TupleRow*) const override; + virtual DecimalVal GetDecimalVal(ScalarExprEvaluator*, const TupleRow*) const override; private: /// Evalutes the UDF over row. Returns the result as an AnyVal. This function /// never returns NULL but rather an AnyVal object with is_null set to true on /// error. - AnyVal* Evaluate(ExprContext* ctx, const TupleRow* row); + AnyVal* Evaluate(ScalarExprEvaluator* eval, const TupleRow* row) const; /// The path on the local FS to the UDF's jar std::string local_location_; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/is-not-empty-predicate.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/is-not-empty-predicate.cc b/be/src/exprs/is-not-empty-predicate.cc index 521ebf4..e0ac41b 100644 --- a/be/src/exprs/is-not-empty-predicate.cc +++ b/be/src/exprs/is-not-empty-predicate.cc @@ -29,15 +29,15 @@ IsNotEmptyPredicate::IsNotEmptyPredicate(const TExprNode& node) : Predicate(node) { } -BooleanVal IsNotEmptyPredicate::GetBooleanVal(ExprContext* ctx, const TupleRow* row) { - CollectionVal coll = children_[0]->GetCollectionVal(ctx, row); +BooleanVal IsNotEmptyPredicate::GetBooleanVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + CollectionVal coll = children_[0]->GetCollectionVal(eval, row); if (coll.is_null) return BooleanVal::null(); return BooleanVal(coll.num_tuples != 0); } -Status IsNotEmptyPredicate::Prepare(RuntimeState* state, - const RowDescriptor& row_desc, ExprContext* ctx) { - RETURN_IF_ERROR(Expr::Prepare(state, row_desc, ctx)); +Status IsNotEmptyPredicate::Init(const RowDescriptor& row_desc, RuntimeState* state) { + RETURN_IF_ERROR(ScalarExpr::Init(row_desc, state)); DCHECK_EQ(children_.size(), 1); return Status::OK(); } @@ -48,7 +48,7 @@ Status IsNotEmptyPredicate::GetCodegendComputeFn(LlvmCodeGen* codegen, } string IsNotEmptyPredicate::DebugString() const { - return Expr::DebugString("IsNotEmptyPredicate"); + return ScalarExpr::DebugString("IsNotEmptyPredicate"); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/is-not-empty-predicate.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/is-not-empty-predicate.h b/be/src/exprs/is-not-empty-predicate.h index 2454a6d..0747a16 100644 --- a/be/src/exprs/is-not-empty-predicate.h +++ b/be/src/exprs/is-not-empty-predicate.h @@ -22,6 +22,7 @@ namespace impala { +class ScalarExprEvaluator; class TExprNode; /// Predicate that checks whether a collection is empty or not. @@ -29,16 +30,16 @@ class TExprNode; /// interface supports CollectionVals. class IsNotEmptyPredicate: public Predicate { public: - virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* ctx); - virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn); - virtual BooleanVal GetBooleanVal(ExprContext* context, const TupleRow* row); - virtual std::string DebugString() const; + virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) override; + virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual std::string DebugString() const override; protected: - friend class Expr; + friend class ScalarExpr; + virtual Status Init(const RowDescriptor& row_desc, RuntimeState* state) override; IsNotEmptyPredicate(const TExprNode& node); + }; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/kudu-partition-expr.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/kudu-partition-expr.cc b/be/src/exprs/kudu-partition-expr.cc index 46da82d..6f35268 100644 --- a/be/src/exprs/kudu-partition-expr.cc +++ b/be/src/exprs/kudu-partition-expr.cc @@ -20,7 +20,7 @@ #include <gutil/strings/substitute.h> #include "exec/kudu-util.h" -#include "exprs/expr-context.h" +#include "exprs/scalar-expr-evaluator.h" #include "runtime/exec-env.h" #include "runtime/query-state.h" #include "runtime/runtime-state.h" @@ -31,11 +31,10 @@ namespace impala { KuduPartitionExpr::KuduPartitionExpr(const TExprNode& node) - : Expr(node), tkudu_partition_expr_(node.kudu_partition_expr) {} + : ScalarExpr(node), tkudu_partition_expr_(node.kudu_partition_expr) {} -Status KuduPartitionExpr::Prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx) { - RETURN_IF_ERROR(Expr::Prepare(state, row_desc, ctx)); +Status KuduPartitionExpr::Init(const RowDescriptor& row_desc, RuntimeState* state) { + RETURN_IF_ERROR(ScalarExpr::Init(row_desc, state)); DCHECK_EQ(tkudu_partition_expr_.referenced_columns.size(), children_.size()); // Create the KuduPartitioner we'll use to get the partition index for each row. @@ -60,9 +59,10 @@ Status KuduPartitionExpr::Prepare( return Status::OK(); } -IntVal KuduPartitionExpr::GetIntVal(ExprContext* ctx, const TupleRow* row) { +IntVal KuduPartitionExpr::GetIntVal(ScalarExprEvaluator* eval, + const TupleRow* row) const { for (int i = 0; i < children_.size(); ++i) { - void* val = ctx->GetValue(GetChild(i), row); + void* val = eval->GetValue(*GetChild(i), row); if (val == NULL) { // We don't currently support nullable partition columns, but pass it along and let // the KuduTableSink generate the error message. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/kudu-partition-expr.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/kudu-partition-expr.h b/be/src/exprs/kudu-partition-expr.h index 6620338..015d6dd 100644 --- a/be/src/exprs/kudu-partition-expr.h +++ b/be/src/exprs/kudu-partition-expr.h @@ -20,29 +20,33 @@ #include <kudu/client/client.h> -#include "exprs/expr.h" +#include "exprs/scalar-expr.h" namespace impala { class KuduTableDescriptor; +class ScalarExprEvaluator; class TExprNode; class TKuduPartitionExpr; /// Expr that calls into the Kudu client to determine the partition index for rows. /// Returns -1 if the row doesn't have a partition or if an error is encountered. /// The children of this Expr produce the values for the partition columns. -class KuduPartitionExpr : public Expr { +class KuduPartitionExpr : public ScalarExpr { protected: - friend class Expr; + friend class ScalarExpr; + friend class ScalarExprEvaluator; KuduPartitionExpr(const TExprNode& node); - virtual Status Prepare( - RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx); + virtual Status Init(const RowDescriptor& row_desc, RuntimeState* state) + override WARN_UNUSED_RESULT; - virtual IntVal GetIntVal(ExprContext* ctx, const TupleRow* row); + virtual IntVal GetIntVal(ScalarExprEvaluator* eval, const TupleRow* row) + const override; - virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn); + virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) + override WARN_UNUSED_RESULT; private: TKuduPartitionExpr tkudu_partition_expr_; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/like-predicate.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/like-predicate.h b/be/src/exprs/like-predicate.h index 9108eec..2a7cb71 100644 --- a/be/src/exprs/like-predicate.h +++ b/be/src/exprs/like-predicate.h @@ -40,7 +40,7 @@ class LikePredicate: public Predicate { ~LikePredicate() { } protected: - friend class Expr; + friend class ScalarExprEvaluator; LikePredicate(const TExprNode& node) : Predicate(node) { } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/literal.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/literal.cc b/be/src/exprs/literal.cc index 4fcf9fc..091eca8 100644 --- a/be/src/exprs/literal.cc +++ b/be/src/exprs/literal.cc @@ -22,11 +22,11 @@ #include "codegen/codegen-anyval.h" #include "codegen/llvm-codegen.h" +#include "exprs/scalar-expr-evaluator.h" #include "gen-cpp/Exprs_types.h" #include "runtime/decimal-value.inline.h" #include "runtime/runtime-state.h" #include "runtime/timestamp-parse-util.h" -#include "gen-cpp/Exprs_types.h" #include "common/names.h" @@ -36,7 +36,7 @@ using namespace impala_udf; namespace impala { Literal::Literal(const TExprNode& node) - : Expr(node) { + : ScalarExpr(node) { switch (type_.type) { case TYPE_BOOLEAN: DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL); @@ -134,46 +134,47 @@ Literal::Literal(const TExprNode& node) default: DCHECK(false) << "Invalid type: " << TypeToString(type_.type); } + DCHECK(cache_entry_ == nullptr); } Literal::Literal(ColumnType type, bool v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { DCHECK_EQ(type.type, TYPE_BOOLEAN) << type; value_.bool_val = v; } Literal::Literal(ColumnType type, int8_t v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { DCHECK_EQ(type.type, TYPE_TINYINT) << type; value_.tinyint_val = v; } Literal::Literal(ColumnType type, int16_t v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { DCHECK_EQ(type.type, TYPE_SMALLINT) << type; value_.smallint_val = v; } Literal::Literal(ColumnType type, int32_t v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { DCHECK_EQ(type.type, TYPE_INT) << type; value_.int_val = v; } Literal::Literal(ColumnType type, int64_t v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { DCHECK_EQ(type.type, TYPE_BIGINT) << type; value_.bigint_val = v; } Literal::Literal(ColumnType type, float v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { DCHECK_EQ(type.type, TYPE_FLOAT) << type; value_.float_val = v; } Literal::Literal(ColumnType type, double v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { if (type.type == TYPE_DOUBLE) { value_.double_val = v; } else if (type.type == TYPE_TIMESTAMP) { @@ -197,27 +198,25 @@ Literal::Literal(ColumnType type, double v) } } -Literal::Literal(ColumnType type, const string& v) : Expr(type, true, false) { +Literal::Literal(ColumnType type, const string& v) + : ScalarExpr(type, true) { value_.Init(v); DCHECK(type.type == TYPE_STRING || type.type == TYPE_CHAR || type.type == TYPE_VARCHAR) << type; } -Literal::Literal(ColumnType type, const StringValue& v) : Expr(type, true, false) { +Literal::Literal(ColumnType type, const StringValue& v) + : ScalarExpr(type, true) { value_.Init(v.DebugString()); DCHECK(type.type == TYPE_STRING || type.type == TYPE_CHAR) << type; } Literal::Literal(ColumnType type, const TimestampValue& v) - : Expr(type, true, false) { + : ScalarExpr(type, true) { DCHECK_EQ(type.type, TYPE_TIMESTAMP) << type; value_.timestamp_val = v; } -bool Literal::IsLiteral() const { - return true; -} - template<class T> bool ParseString(const string& str, T* val) { istringstream stream(str); @@ -288,53 +287,62 @@ Literal* Literal::CreateLiteral(const ColumnType& type, const string& str) { } default: DCHECK(false) << "Invalid type: " << type.DebugString(); - return NULL; + return nullptr; } } -BooleanVal Literal::GetBooleanVal(ExprContext* context, const TupleRow* row) { +BooleanVal Literal::GetBooleanVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_BOOLEAN) << type_; return BooleanVal(value_.bool_val); } -TinyIntVal Literal::GetTinyIntVal(ExprContext* context, const TupleRow* row) { +TinyIntVal Literal::GetTinyIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_TINYINT) << type_; return TinyIntVal(value_.tinyint_val); } -SmallIntVal Literal::GetSmallIntVal(ExprContext* context, const TupleRow* row) { +SmallIntVal Literal::GetSmallIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_SMALLINT) << type_; return SmallIntVal(value_.smallint_val); } -IntVal Literal::GetIntVal(ExprContext* context, const TupleRow* row) { +IntVal Literal::GetIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_INT) << type_; return IntVal(value_.int_val); } -BigIntVal Literal::GetBigIntVal(ExprContext* context, const TupleRow* row) { +BigIntVal Literal::GetBigIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_BIGINT) << type_; return BigIntVal(value_.bigint_val); } -FloatVal Literal::GetFloatVal(ExprContext* context, const TupleRow* row) { +FloatVal Literal::GetFloatVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_FLOAT) << type_; return FloatVal(value_.float_val); } -DoubleVal Literal::GetDoubleVal(ExprContext* context, const TupleRow* row) { +DoubleVal Literal::GetDoubleVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_DOUBLE) << type_; return DoubleVal(value_.double_val); } -StringVal Literal::GetStringVal(ExprContext* context, const TupleRow* row) { +StringVal Literal::GetStringVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK(type_.IsStringType()) << type_; StringVal result; value_.string_val.ToStringVal(&result); return result; } -DecimalVal Literal::GetDecimalVal(ExprContext* context, const TupleRow* row) { +DecimalVal Literal::GetDecimalVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_DECIMAL) << type_; switch (type().GetByteSize()) { case 4: @@ -350,7 +358,8 @@ DecimalVal Literal::GetDecimalVal(ExprContext* context, const TupleRow* row) { return DecimalVal(); } -TimestampVal Literal::GetTimestampVal(ExprContext* context, const TupleRow* row) { +TimestampVal Literal::GetTimestampVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_TIMESTAMP) << type_; TimestampVal result; value_.timestamp_val.ToTimestampVal(&result); @@ -406,7 +415,7 @@ string Literal::DebugString() const { default: out << "[bad type! " << type_ << "]"; } - out << Expr::DebugString() << ")"; + out << ScalarExpr::DebugString() << ")"; return out.str(); } @@ -417,14 +426,14 @@ string Literal::DebugString() const { // ret { i8, i64 } { i8 0, i64 10 } // } Status Literal::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) { - if (ir_compute_fn_ != NULL) { + if (ir_compute_fn_ != nullptr) { *fn = ir_compute_fn_; return Status::OK(); } DCHECK_EQ(GetNumChildren(), 0); Value* args[2]; - *fn = CreateIrFunctionPrototype(codegen, "Literal", &args); + *fn = CreateIrFunctionPrototype("Literal", codegen, &args); BasicBlock* entry_block = BasicBlock::Create(codegen->context(), "entry", *fn); LlvmBuilder builder(entry_block); @@ -488,6 +497,7 @@ Status Literal::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) builder.CreateRet(v.GetLoweredValue()); *fn = codegen->FinalizeFunction(*fn); + if (UNLIKELY(*fn == nullptr)) return Status(TErrorCode::IR_VERIFY_FAILED, "Literal"); ir_compute_fn_ = *fn; return Status::OK(); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/literal.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/literal.h b/be/src/exprs/literal.h index 644e3b0..cf98e98 100644 --- a/be/src/exprs/literal.h +++ b/be/src/exprs/literal.h @@ -20,16 +20,46 @@ #define IMPALA_EXPRS_LITERAL_H_ #include <string> -#include "exprs/expr.h" +#include "exprs/scalar-expr.h" #include "exprs/expr-value.h" #include "runtime/string-value.h" +#include "udf/udf.h" namespace impala { +using impala_udf::FunctionContext; +using impala_udf::AnyVal; +using impala_udf::BooleanVal; +using impala_udf::TinyIntVal; +using impala_udf::SmallIntVal; +using impala_udf::IntVal; +using impala_udf::BigIntVal; +using impala_udf::FloatVal; +using impala_udf::DoubleVal; +using impala_udf::TimestampVal; +using impala_udf::StringVal; +using impala_udf::DecimalVal; + +class ScalarExprEvaluator; class TExprNode; -class Literal: public Expr { +class Literal: public ScalarExpr { public: + virtual bool IsLiteral() const override { return true; } + virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) + override WARN_UNUSED_RESULT; + virtual std::string DebugString() const override; + + /// Test function that parses 'str' according to 'type'. The caller owns the returned + /// Literal. + static Literal* CreateLiteral(const ColumnType& type, const std::string& str); + + protected: + friend class ScalarExpr; + friend class ScalarExprEvaluator; + + Literal(const TExprNode& node); + /// Test ctors Literal(ColumnType type, bool v); Literal(ColumnType type, int8_t v); @@ -42,31 +72,18 @@ class Literal: public Expr { Literal(ColumnType type, const StringValue& v); Literal(ColumnType type, const TimestampValue& v); - /// Test function that parses 'str' according to 'type'. The caller owns the returned - /// Literal. - static Literal* CreateLiteral(const ColumnType& type, const std::string& str); - - virtual bool IsLiteral() const; - - virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn); - - virtual impala_udf::BooleanVal GetBooleanVal(ExprContext*, const TupleRow*); - virtual impala_udf::TinyIntVal GetTinyIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::SmallIntVal GetSmallIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::IntVal GetIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::BigIntVal GetBigIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::FloatVal GetFloatVal(ExprContext*, const TupleRow*); - virtual impala_udf::DoubleVal GetDoubleVal(ExprContext*, const TupleRow*); - virtual impala_udf::StringVal GetStringVal(ExprContext*, const TupleRow*); - virtual impala_udf::DecimalVal GetDecimalVal(ExprContext*, const TupleRow*); - virtual impala_udf::TimestampVal GetTimestampVal(ExprContext*, const TupleRow*); - - protected: - friend class Expr; - - Literal(const TExprNode& node); - - virtual std::string DebugString() const; + virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual TinyIntVal GetTinyIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual SmallIntVal GetSmallIntVal( + ScalarExprEvaluator*, const TupleRow*) const override; + virtual IntVal GetIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual BigIntVal GetBigIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual FloatVal GetFloatVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual DoubleVal GetDoubleVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual StringVal GetStringVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual TimestampVal GetTimestampVal( + ScalarExprEvaluator*, const TupleRow*) const override; + virtual DecimalVal GetDecimalVal(ScalarExprEvaluator*, const TupleRow*) const override; private: ExprValue value_; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/math-functions-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/math-functions-ir.cc b/be/src/exprs/math-functions-ir.cc index 0c76c4a..48b98a9 100644 --- a/be/src/exprs/math-functions-ir.cc +++ b/be/src/exprs/math-functions-ir.cc @@ -22,7 +22,7 @@ #include <math.h> #include "exprs/anyval-util.h" -#include "exprs/expr.h" +#include "exprs/scalar-expr.h" #include "exprs/operators.h" #include "util/string-parser.h" #include "runtime/runtime-state.h" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/math-functions.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/math-functions.h b/be/src/exprs/math-functions.h index ad5da6e..867ca2b 100644 --- a/be/src/exprs/math-functions.h +++ b/be/src/exprs/math-functions.h @@ -24,10 +24,19 @@ #include "util/string-parser.h" #include "udf/udf.h" -using namespace impala_udf; - namespace impala { +using impala_udf::FunctionContext; +using impala_udf::TinyIntVal; +using impala_udf::SmallIntVal; +using impala_udf::IntVal; +using impala_udf::BigIntVal; +using impala_udf::FloatVal; +using impala_udf::DoubleVal; +using impala_udf::TimestampVal; +using impala_udf::StringVal; +using impala_udf::DecimalVal; + class Expr; struct ExprValue; class TupleRow; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/null-literal.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/null-literal.cc b/be/src/exprs/null-literal.cc index 7b0e77c..65064e6 100644 --- a/be/src/exprs/null-literal.cc +++ b/be/src/exprs/null-literal.cc @@ -30,93 +30,104 @@ using namespace llvm; namespace impala { -BooleanVal NullLiteral::GetBooleanVal(ExprContext* context, const TupleRow* row) { +BooleanVal NullLiteral::GetBooleanVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_BOOLEAN) << type_; return BooleanVal::null(); } -TinyIntVal NullLiteral::GetTinyIntVal(ExprContext* context, const TupleRow* row) { +TinyIntVal NullLiteral::GetTinyIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_TINYINT) << type_; return TinyIntVal::null(); } -SmallIntVal NullLiteral::GetSmallIntVal(ExprContext* context, const TupleRow* row) { +SmallIntVal NullLiteral::GetSmallIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_SMALLINT) << type_; return SmallIntVal::null(); } -IntVal NullLiteral::GetIntVal(ExprContext* context, const TupleRow* row) { +IntVal NullLiteral::GetIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_INT) << type_; return IntVal::null(); } -BigIntVal NullLiteral::GetBigIntVal(ExprContext* context, const TupleRow* row) { +BigIntVal NullLiteral::GetBigIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_BIGINT) << type_; return BigIntVal::null(); } -FloatVal NullLiteral::GetFloatVal(ExprContext* context, const TupleRow* row) { +FloatVal NullLiteral::GetFloatVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_FLOAT) << type_; return FloatVal::null(); } -DoubleVal NullLiteral::GetDoubleVal(ExprContext* context, const TupleRow* row) { +DoubleVal NullLiteral::GetDoubleVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_DOUBLE) << type_; return DoubleVal::null(); } -StringVal NullLiteral::GetStringVal(ExprContext* context, const TupleRow* row) { +StringVal NullLiteral::GetStringVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK(type_.IsStringType()) << type_; return StringVal::null(); } -TimestampVal NullLiteral::GetTimestampVal(ExprContext* context, const TupleRow* row) { +TimestampVal NullLiteral::GetTimestampVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_TIMESTAMP) << type_; return TimestampVal::null(); } -DecimalVal NullLiteral::GetDecimalVal(ExprContext* context, const TupleRow* row) { +DecimalVal NullLiteral::GetDecimalVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK_EQ(type_.type, TYPE_DECIMAL) << type_; return DecimalVal::null(); } -CollectionVal NullLiteral::GetCollectionVal(ExprContext* context, const TupleRow* row) { +CollectionVal NullLiteral::GetCollectionVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { DCHECK(type_.IsCollectionType()); return CollectionVal::null(); } -bool NullLiteral::IsLiteral() const { - return true; -} - // Generated IR for a bigint NULL literal: // -// define { i8, i64 } @NullLiteral(i8* %context, %"class.impala::TupleRow"* %row) { +// define { i8, i64 } @NullLiteral( +// %"class.impala::ScalarExprEvaluator"* %eval, %"class.impala::TupleRow"* %row) { // entry: // ret { i8, i64 } { i8 1, i64 0 } // } Status NullLiteral::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) { - if (ir_compute_fn_ != NULL) { + if (ir_compute_fn_ != nullptr) { *fn = ir_compute_fn_; return Status::OK(); } DCHECK_EQ(GetNumChildren(), 0); Value* args[2]; - *fn = CreateIrFunctionPrototype(codegen, "NullLiteral", &args); + *fn = CreateIrFunctionPrototype("NullLiteral", codegen, &args); BasicBlock* entry_block = BasicBlock::Create(codegen->context(), "entry", *fn); LlvmBuilder builder(entry_block); Value* v = CodegenAnyVal::GetNullVal(codegen, type()); builder.CreateRet(v); *fn = codegen->FinalizeFunction(*fn); + if (UNLIKELY(*fn == nullptr)) { + return Status(TErrorCode::IR_VERIFY_FAILED, "NullLiteral"); + } ir_compute_fn_ = *fn; return Status::OK(); } string NullLiteral::DebugString() const { stringstream out; - out << "NullLiteral(" << Expr::DebugString() << ")"; + out << "NullLiteral(" << ScalarExpr::DebugString() << ")"; return out.str(); }
