http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/null-literal.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/null-literal.h b/be/src/exprs/null-literal.h index d7a495a..91f4ca4 100644 --- a/be/src/exprs/null-literal.h +++ b/be/src/exprs/null-literal.h @@ -19,38 +19,54 @@ #ifndef IMPALA_EXPRS_NULL_LITERAL_H_ #define IMPALA_EXPRS_NULL_LITERAL_H_ -#include "exprs/expr.h" +#include "exprs/scalar-expr.h" namespace impala { +using impala_udf::FunctionContext; +using impala_udf::BooleanVal; +using impala_udf::TinyIntVal; +using impala_udf::SmallIntVal; +using impala_udf::IntVal; +using impala_udf::BigIntVal; +using impala_udf::FloatVal; +using impala_udf::DoubleVal; +using impala_udf::TimestampVal; +using impala_udf::StringVal; +using impala_udf::DecimalVal; + class TExprNode; -class NullLiteral: public Expr { +class NullLiteral: public ScalarExpr { public: - NullLiteral(PrimitiveType type) : Expr(type, true, false) { } - - virtual bool IsLiteral() const; - - virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn); + virtual bool IsLiteral() const override { return true; } + virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) + override WARN_UNUSED_RESULT; + virtual std::string DebugString() const override; - virtual impala_udf::BooleanVal GetBooleanVal(ExprContext*, const TupleRow*); - virtual impala_udf::TinyIntVal GetTinyIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::SmallIntVal GetSmallIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::IntVal GetIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::BigIntVal GetBigIntVal(ExprContext*, const TupleRow*); - virtual impala_udf::FloatVal GetFloatVal(ExprContext*, const TupleRow*); - virtual impala_udf::DoubleVal GetDoubleVal(ExprContext*, const TupleRow*); - virtual impala_udf::StringVal GetStringVal(ExprContext*, const TupleRow*); - virtual impala_udf::TimestampVal GetTimestampVal(ExprContext*, const TupleRow*); - virtual impala_udf::DecimalVal GetDecimalVal(ExprContext*, const TupleRow*); - virtual impala_udf::CollectionVal GetCollectionVal(ExprContext*, const TupleRow*); - - virtual std::string DebugString() const; + /// Constructor for test. + NullLiteral(PrimitiveType type) : ScalarExpr(type, true) { } protected: - friend class Expr; + friend class ScalarExpr; + friend class ScalarExprEvaluator; + + NullLiteral(const TExprNode& node) : ScalarExpr(node) { } - NullLiteral(const TExprNode& node) : Expr(node) { } + virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual TinyIntVal GetTinyIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual SmallIntVal GetSmallIntVal( + ScalarExprEvaluator*, const TupleRow*) const override; + virtual IntVal GetIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual BigIntVal GetBigIntVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual FloatVal GetFloatVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual DoubleVal GetDoubleVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual StringVal GetStringVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual TimestampVal GetTimestampVal( + ScalarExprEvaluator*, const TupleRow*) const override; + virtual DecimalVal GetDecimalVal(ScalarExprEvaluator*, const TupleRow*) const override; + virtual CollectionVal GetCollectionVal( + ScalarExprEvaluator*, const TupleRow*) const override; }; }
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/operators.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/operators.h b/be/src/exprs/operators.h index 9b386c8..d3920e8 100644 --- a/be/src/exprs/operators.h +++ b/be/src/exprs/operators.h @@ -20,10 +20,21 @@ #include "udf/udf.h" -using namespace impala_udf; - namespace impala { +using impala_udf::FunctionContext; +using impala_udf::AnyVal; +using impala_udf::BooleanVal; +using impala_udf::TinyIntVal; +using impala_udf::SmallIntVal; +using impala_udf::IntVal; +using impala_udf::BigIntVal; +using impala_udf::FloatVal; +using impala_udf::DoubleVal; +using impala_udf::TimestampVal; +using impala_udf::StringVal; +using impala_udf::DecimalVal; + /// Operators written against the UDF interface. class Operators { public: http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/predicate.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/predicate.h b/be/src/exprs/predicate.h index 28b8b73..d5ef4b8 100644 --- a/be/src/exprs/predicate.h +++ b/be/src/exprs/predicate.h @@ -19,17 +19,17 @@ #ifndef IMPALA_EXPRS_PREDICATE_H_ #define IMPALA_EXPRS_PREDICATE_H_ -#include "exprs/expr.h" +#include "exprs/scalar-expr.h" namespace impala { class TExprNode; -class Predicate: public Expr { +class Predicate: public ScalarExpr { protected: - friend class Expr; + friend class ScalarExpr; - Predicate(const TExprNode& node) : Expr(node) {} + Predicate(const TExprNode& node) : ScalarExpr(node) {} }; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr-evaluator.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/scalar-expr-evaluator.cc b/be/src/exprs/scalar-expr-evaluator.cc new file mode 100644 index 0000000..ae0aa15 --- /dev/null +++ b/be/src/exprs/scalar-expr-evaluator.cc @@ -0,0 +1,452 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/scalar-expr-evaluator.h" + +#include <sstream> + +#include "common/object-pool.h" +#include "common/status.h" +#include "exprs/anyval-util.h" +#include "exprs/scalar-expr.h" +#include "exprs/aggregate-functions.h" +#include "exprs/bit-byte-functions.h" +#include "exprs/case-expr.h" +#include "exprs/cast-functions.h" +#include "exprs/compound-predicates.h" +#include "exprs/conditional-functions.h" +#include "exprs/decimal-functions.h" +#include "exprs/decimal-operators.h" +#include "exprs/hive-udf-call.h" +#include "exprs/in-predicate.h" +#include "exprs/is-not-empty-predicate.h" +#include "exprs/is-null-predicate.h" +#include "exprs/like-predicate.h" +#include "exprs/literal.h" +#include "exprs/math-functions.h" +#include "exprs/null-literal.h" +#include "exprs/operators.h" +#include "exprs/scalar-expr-evaluator.h" +#include "exprs/scalar-fn-call.h" +#include "exprs/slot-ref.h" +#include "exprs/string-functions.h" +#include "exprs/timestamp-functions.h" +#include "exprs/tuple-is-null-predicate.h" +#include "exprs/udf-builtins.h" +#include "exprs/utility-functions.h" +#include "runtime/decimal-value.inline.h" +#include "runtime/mem-pool.h" +#include "runtime/mem-tracker.h" +#include "runtime/raw-value.inline.h" +#include "runtime/runtime-state.h" +#include "udf/udf-internal.h" + +#include "common/names.h" + +using namespace impala; +using namespace impala_udf; + +const char* ScalarExprEvaluator::LLVM_CLASS_NAME = "class.impala::ScalarExprEvaluator"; + +ScalarExprEvaluator::ScalarExprEvaluator(const ScalarExpr& root, MemPool* mem_pool) + : mem_pool_(mem_pool), + root_(root) { +} + +ScalarExprEvaluator::~ScalarExprEvaluator() { + DCHECK(!initialized_ || closed_); +} + +Status ScalarExprEvaluator::Create(const ScalarExpr& root, RuntimeState* state, + ObjectPool* pool, MemPool* mem_pool, ScalarExprEvaluator** eval) { + *eval = pool->Add(new ScalarExprEvaluator(root, mem_pool)); + if (root.fn_ctx_idx_end_ > 0) { + (*eval)->fn_ctxs_.resize(root.fn_ctx_idx_end_, nullptr); + (*eval)->CreateFnCtxs(state, root); + DCHECK_EQ((*eval)->fn_ctxs_.size(), root.fn_ctx_idx_end_); + for (FunctionContext* fn_ctx : (*eval)->fn_ctxs_) DCHECK(fn_ctx != nullptr); + (*eval)->fn_ctxs_ptr_ = (*eval)->fn_ctxs_.data(); + } else { + DCHECK_EQ((*eval)->fn_ctxs_.size(), 0); + DCHECK_EQ(root.fn_ctx_idx_end_, 0); + DCHECK_EQ(root.fn_ctx_idx_, -1); + DCHECK((*eval)->fn_ctxs_ptr_ == nullptr); + } + (*eval)->initialized_ = true; + return Status::OK(); +} + +Status ScalarExprEvaluator::Create(const vector<ScalarExpr*>& exprs, RuntimeState* state, + ObjectPool* pool, MemPool* mem_pool, vector<ScalarExprEvaluator*>* evals) { + for (const ScalarExpr* expr : exprs) { + ScalarExprEvaluator* eval; + Status status = Create(*expr, state, pool, mem_pool, &eval); + // Always add the evaluator to the vector so it can be cleaned up. + evals->push_back(eval); + RETURN_IF_ERROR(status); + } + return Status::OK(); +} + +void ScalarExprEvaluator::CreateFnCtxs(RuntimeState* state, const ScalarExpr& expr) { + const int fn_ctx_idx = expr.fn_ctx_idx(); + const bool has_fn_ctx = fn_ctx_idx != -1; + vector<FunctionContext::TypeDesc> arg_types; + for (const ScalarExpr* child : expr.children()) { + CreateFnCtxs(state, *child); + if (has_fn_ctx) arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc(child->type())); + } + if (has_fn_ctx) { + FunctionContext::TypeDesc return_type = + AnyValUtil::ColumnTypeToTypeDesc(expr.type()); + const int varargs_buffer_size = expr.ComputeVarArgsBufferSize(); + DCHECK_GE(fn_ctx_idx, 0); + DCHECK_LT(fn_ctx_idx, fn_ctxs_.size()); + DCHECK(fn_ctxs_[fn_ctx_idx] == nullptr); + fn_ctxs_[fn_ctx_idx] = FunctionContextImpl::CreateContext( + state, mem_pool_, return_type, arg_types, varargs_buffer_size); + } +} + +Status ScalarExprEvaluator::Open(RuntimeState* state) { + DCHECK(initialized_); + if (opened_) return Status::OK(); + opened_ = true; + // Fragment-local state is only initialized for original contexts. Clones inherit the + // original's fragment state and only need to have thread-local state initialized. + // TODO: Move FRAGMENT_LOCAL state to ScalarExpr. ScalarExprEvaluator should only + // have THREAD_LOCAL state. + FunctionContext::FunctionStateScope scope = + is_clone_ ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; + return root_.OpenEvaluator(scope, state, this); +} + +Status ScalarExprEvaluator::Open( + const vector<ScalarExprEvaluator*>& evals, RuntimeState* state) { + for (int i = 0; i < evals.size(); ++i) RETURN_IF_ERROR(evals[i]->Open(state)); + return Status::OK(); +} + +void ScalarExprEvaluator::Close(RuntimeState* state) { + if (closed_) return; + FunctionContext::FunctionStateScope scope = + is_clone_ ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; + root_.CloseEvaluator(scope, state, this); + for (int i = 0; i < fn_ctxs_.size(); ++i) { + fn_ctxs_[i]->impl()->Close(); + delete fn_ctxs_[i]; + } + fn_ctxs_.clear(); + // Memory allocated by 'fn_ctx_' is still in 'mem_pool_'. It's the responsibility of + // the owner of 'mem_pool_' to free it. + closed_ = true; +} + +void ScalarExprEvaluator::Close( + const vector<ScalarExprEvaluator*>& evals, RuntimeState* state) { + for (ScalarExprEvaluator* eval : evals) eval->Close(state); +} + +Status ScalarExprEvaluator::Clone(ObjectPool* pool, RuntimeState* state, + MemPool* mem_pool, ScalarExprEvaluator** cloned_eval) const { + DCHECK(initialized_); + DCHECK(opened_); + *cloned_eval = pool->Add(new ScalarExprEvaluator(root_, mem_pool)); + for (int i = 0; i < fn_ctxs_.size(); ++i) { + (*cloned_eval)->fn_ctxs_.push_back(fn_ctxs_[i]->impl()->Clone(mem_pool)); + } + (*cloned_eval)->fn_ctxs_ptr_ = (*cloned_eval)->fn_ctxs_.data(); + (*cloned_eval)->is_clone_ = true; + (*cloned_eval)->initialized_ = true; + (*cloned_eval)->opened_ = true; + (*cloned_eval)->output_scale_ = output_scale_; + return root_.OpenEvaluator(FunctionContext::THREAD_LOCAL, state, *cloned_eval); +} + +Status ScalarExprEvaluator::Clone(ObjectPool* pool, RuntimeState* state, + MemPool* mem_pool, const vector<ScalarExprEvaluator*>& evals, + vector<ScalarExprEvaluator*>* cloned_evals) { + DCHECK(cloned_evals != nullptr); + DCHECK(cloned_evals->empty()); + for (int i = 0; i < evals.size(); ++i) { + ScalarExprEvaluator* cloned_eval; + RETURN_IF_ERROR(evals[i]->Clone(pool, state, mem_pool, &cloned_eval)); + cloned_evals->push_back(cloned_eval); + } + return Status::OK(); +} + +bool ScalarExprEvaluator::HasLocalAllocations() const { + for (int i = 0; i < fn_ctxs_.size(); ++i) { + if (fn_ctxs_[i]->impl()->closed()) continue; + if (fn_ctxs_[i]->impl()->HasLocalAllocations()) return true; + } + return false; +} + +bool ScalarExprEvaluator::HasLocalAllocations( + const vector<ScalarExprEvaluator*>& evals) { + for (int i = 0; i < evals.size(); ++i) { + if (evals[i]->HasLocalAllocations()) return true; + } + return false; +} + +void ScalarExprEvaluator::FreeLocalAllocations() { + for (int i = 0; i < fn_ctxs_.size(); ++i) { + if (fn_ctxs_[i]->impl()->closed()) continue; + fn_ctxs_[i]->impl()->FreeLocalAllocations(); + } +} + +void ScalarExprEvaluator::FreeLocalAllocations( + const vector<ScalarExprEvaluator*>& evals) { + for (int i = 0; i < evals.size(); ++i) evals[i]->FreeLocalAllocations(); +} + +Status ScalarExprEvaluator::GetError(int start_idx, int end_idx) const { + DCHECK(opened_); + end_idx = end_idx == -1 ? fn_ctxs_.size() : end_idx; + DCHECK_GE(start_idx, 0); + DCHECK_LE(end_idx, fn_ctxs_.size()); + for (int idx = start_idx; idx < end_idx; ++idx) { + DCHECK_LT(idx, fn_ctxs_.size()); + FunctionContext* fn_ctx = fn_ctxs_[idx]; + if (fn_ctx->has_error()) return Status(fn_ctx->error_msg()); + } + return Status::OK(); +} + +Status ScalarExprEvaluator::GetConstValue(RuntimeState* state, const ScalarExpr& expr, + AnyVal** const_val) { + DCHECK(opened_); + if (!expr.is_constant()) { + *const_val = nullptr; + return Status::OK(); + } + + // A constant expression shouldn't have any SlotRefs expr in it. + DCHECK_EQ(expr.GetSlotIds(), 0); + DCHECK(mem_pool_ != nullptr); + const ColumnType& result_type = expr.type(); + RETURN_IF_ERROR(AllocateAnyVal(state, mem_pool_, result_type, + "Could not allocate constant expression value", const_val)); + + void* result = ScalarExprEvaluator::GetValue(expr, nullptr); + AnyValUtil::SetAnyVal(result, result_type, *const_val); + if (result_type.IsStringType()) { + StringVal* sv = reinterpret_cast<StringVal*>(*const_val); + if (!sv->is_null && sv->len > 0) { + // Make sure the memory is owned by this evaluator. + char* ptr_copy = reinterpret_cast<char*>(mem_pool_->TryAllocate(sv->len)); + if (ptr_copy == nullptr) { + return mem_pool_->mem_tracker()->MemLimitExceeded( + state, "Could not allocate constant string value", sv->len); + } + memcpy(ptr_copy, sv->ptr, sv->len); + sv->ptr = reinterpret_cast<uint8_t*>(ptr_copy); + } + } + return GetError(expr.fn_ctx_idx_start_, expr.fn_ctx_idx_end_); +} + +void* ScalarExprEvaluator::GetValue(const TupleRow* row) { + return GetValue(root_, row); +} + +void* ScalarExprEvaluator::GetValue(const ScalarExpr& expr, const TupleRow* row) { + switch (expr.type_.type) { + case TYPE_BOOLEAN: { + impala_udf::BooleanVal v = expr.GetBooleanVal(this, row); + if (v.is_null) return nullptr; + result_.bool_val = v.val; + return &result_.bool_val; + } + case TYPE_TINYINT: { + impala_udf::TinyIntVal v = expr.GetTinyIntVal(this, row); + if (v.is_null) return nullptr; + result_.tinyint_val = v.val; + return &result_.tinyint_val; + } + case TYPE_SMALLINT: { + impala_udf::SmallIntVal v = expr.GetSmallIntVal(this, row); + if (v.is_null) return nullptr; + result_.smallint_val = v.val; + return &result_.smallint_val; + } + case TYPE_INT: { + impala_udf::IntVal v = expr.GetIntVal(this, row); + if (v.is_null) return nullptr; + result_.int_val = v.val; + return &result_.int_val; + } + case TYPE_BIGINT: { + impala_udf::BigIntVal v = expr.GetBigIntVal(this, row); + if (v.is_null) return nullptr; + result_.bigint_val = v.val; + return &result_.bigint_val; + } + case TYPE_FLOAT: { + impala_udf::FloatVal v = expr.GetFloatVal(this, row); + if (v.is_null) return nullptr; + result_.float_val = v.val; + return &result_.float_val; + } + case TYPE_DOUBLE: { + impala_udf::DoubleVal v = expr.GetDoubleVal(this, row); + if (v.is_null) return nullptr; + result_.double_val = v.val; + return &result_.double_val; + } + case TYPE_STRING: + case TYPE_VARCHAR: { + impala_udf::StringVal v = expr.GetStringVal(this, row); + if (v.is_null) return nullptr; + result_.string_val.ptr = reinterpret_cast<char*>(v.ptr); + result_.string_val.len = v.len; + return &result_.string_val; + } + case TYPE_CHAR: { + impala_udf::StringVal v = expr.GetStringVal(this, row); + if (v.is_null) return nullptr; + result_.string_val.ptr = reinterpret_cast<char*>(v.ptr); + result_.string_val.len = v.len; + if (expr.type_.IsVarLenStringType()) { + return &result_.string_val; + } else { + return result_.string_val.ptr; + } + } + case TYPE_TIMESTAMP: { + impala_udf::TimestampVal v = expr.GetTimestampVal(this, row); + if (v.is_null) return nullptr; + result_.timestamp_val = TimestampValue::FromTimestampVal(v); + return &result_.timestamp_val; + } + case TYPE_DECIMAL: { + DecimalVal v = expr.GetDecimalVal(this, row); + if (v.is_null) return nullptr; + switch (expr.type_.GetByteSize()) { + case 4: + result_.decimal4_val = v.val4; + return &result_.decimal4_val; + case 8: + result_.decimal8_val = v.val8; + return &result_.decimal8_val; + case 16: + result_.decimal16_val = v.val16; + return &result_.decimal16_val; + default: + DCHECK(false) << expr.type_.GetByteSize(); + return nullptr; + } + } + case TYPE_ARRAY: + case TYPE_MAP: { + impala_udf::CollectionVal v = expr.GetCollectionVal(this, row); + if (v.is_null) return nullptr; + result_.collection_val.ptr = v.ptr; + result_.collection_val.num_tuples = v.num_tuples; + return &result_.collection_val; + } + default: + DCHECK(false) << "Type not implemented: " << expr.type_.DebugString(); + return nullptr; + } +} + +void ScalarExprEvaluator::PrintValue(const TupleRow* row, string* str) { + RawValue::PrintValue(GetValue(row), root_.type(), output_scale_, str); +} + +void ScalarExprEvaluator::PrintValue(void* value, string* str) { + RawValue::PrintValue(value, root_.type(), output_scale_, str); +} + +void ScalarExprEvaluator::PrintValue(void* value, stringstream* stream) { + RawValue::PrintValue(value, root_.type(), output_scale_, stream); +} + +void ScalarExprEvaluator::PrintValue(const TupleRow* row, stringstream* stream) { + RawValue::PrintValue(GetValue(row), root_.type(), output_scale_, stream); +} + +BooleanVal ScalarExprEvaluator::GetBooleanVal(TupleRow* row) { + return root_.GetBooleanVal(this, row); +} + +TinyIntVal ScalarExprEvaluator::GetTinyIntVal(TupleRow* row) { + return root_.GetTinyIntVal(this, row); +} + +SmallIntVal ScalarExprEvaluator::GetSmallIntVal(TupleRow* row) { + return root_.GetSmallIntVal(this, row); +} + +IntVal ScalarExprEvaluator::GetIntVal(TupleRow* row) { + return root_.GetIntVal(this, row); +} + +BigIntVal ScalarExprEvaluator::GetBigIntVal(TupleRow* row) { + return root_.GetBigIntVal(this, row); +} + +FloatVal ScalarExprEvaluator::GetFloatVal(TupleRow* row) { + return root_.GetFloatVal(this, row); +} + +DoubleVal ScalarExprEvaluator::GetDoubleVal(TupleRow* row) { + return root_.GetDoubleVal(this, row); +} + +StringVal ScalarExprEvaluator::GetStringVal(TupleRow* row) { + return root_.GetStringVal(this, row); +} + +CollectionVal ScalarExprEvaluator::GetCollectionVal(TupleRow* row) { + return root_.GetCollectionVal(this, row); +} + +TimestampVal ScalarExprEvaluator::GetTimestampVal(TupleRow* row) { + return root_.GetTimestampVal(this, row); +} + +DecimalVal ScalarExprEvaluator::GetDecimalVal(TupleRow* row) { + return root_.GetDecimalVal(this, row); +} + +void ScalarExprEvaluator::InitBuiltinsDummy() { + // Call one function from each of the classes to pull all the symbols + // from that class in. + AggregateFunctions::InitNull(nullptr, nullptr); + BitByteFunctions::CountSet(nullptr, TinyIntVal::null()); + CastFunctions::CastToBooleanVal(nullptr, TinyIntVal::null()); + CompoundPredicate::Not(nullptr, BooleanVal::null()); + ConditionalFunctions::NullIfZero(nullptr, TinyIntVal::null()); + DecimalFunctions::Precision(nullptr, DecimalVal::null()); + DecimalOperators::CastToDecimalVal(nullptr, DecimalVal::null()); + InPredicate::InIterate(nullptr, BigIntVal::null(), 0, nullptr); + IsNullPredicate::IsNull(nullptr, BooleanVal::null()); + LikePredicate::Like(nullptr, StringVal::null(), StringVal::null()); + Operators::Add_IntVal_IntVal(nullptr, IntVal::null(), IntVal::null()); + MathFunctions::Pi(nullptr); + StringFunctions::Length(nullptr, StringVal::null()); + TimestampFunctions::Year(nullptr, TimestampVal::null()); + TimestampFunctions::UnixAndFromUnixPrepare(nullptr, FunctionContext::FRAGMENT_LOCAL); + UdfBuiltins::Pi(nullptr); + UtilityFunctions::Pid(nullptr); +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr-evaluator.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/scalar-expr-evaluator.h b/be/src/exprs/scalar-expr-evaluator.h new file mode 100644 index 0000000..5792d14 --- /dev/null +++ b/be/src/exprs/scalar-expr-evaluator.h @@ -0,0 +1,258 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef IMPALA_EXPRS_SCALAR_EXPR_EVALUATOR_H +#define IMPALA_EXPRS_SCALAR_EXPR_EVALUATOR_H + +#include <boost/scoped_ptr.hpp> + +#include "common/object-pool.h" +#include "common/status.h" +#include "exprs/expr-value.h" +#include "udf/udf-internal.h" // for CollectionVal +#include "udf/udf.h" + +namespace impala { + +using impala_udf::FunctionContext; +using impala_udf::AnyVal; +using impala_udf::BooleanVal; +using impala_udf::TinyIntVal; +using impala_udf::SmallIntVal; +using impala_udf::IntVal; +using impala_udf::BigIntVal; +using impala_udf::FloatVal; +using impala_udf::DoubleVal; +using impala_udf::TimestampVal; +using impala_udf::StringVal; +using impala_udf::DecimalVal; +using impala_udf::CollectionVal; + +class MemPool; +class RuntimeState; +class ScalarExpr; +class Status; +class TupleRow; + +/// ScalarExprEvaluator is the interface for evaluating a scalar expression. It holds a +/// reference to the root of a ScalarExpr tree, runtime state (e.g. FunctionContexts) +/// needed during evaluation and also a buffer for the expression evaluation result. +/// A single evaluator is not thread-safe. It implements Get*Val() interfaces for every +/// possible return type and drives the execution by calling the Get*Val() function of +/// the root ScalarExpr with the input tuple row. +/// +/// A ScalarExprEvaluator is created using the Create() interface. It must be initialized +/// by calling Open() before use and Close() must also be called to free up resources +/// owned by the evaluator. +/// +/// FunctionContext is the interface for Impala to communicate with built-in functions, +/// UDF and UDAF. It is passed to UDF/UDAF to store its thread-private states, propagate +/// errors and allocate memory. An evaluator contains a vector of FunctionContext for +/// the ScalarExpr nodes in the Expr tree. The index of each node's entry is defined in +/// the its 'fn_ctx_idx_' field. The range in the vector for the sub-expression tree +/// rooted at a node is defined by [fn_ctx_idx_start_, fn_ctx_idx_end_). +/// +class ScalarExprEvaluator { + public: + ~ScalarExprEvaluator(); + + /// Creates an evaluator for the scalar expression tree rooted at 'expr' and all + /// FunctionContexts needed during evaluation. Allocations from this evaluator will + /// be from 'mem_pool'. The newly created evaluator will be stored in 'pool' and + /// returned in 'eval'. Returns error status on failure. Note that it's the + /// responsibility to call Close() on all created evaluators even if this function + /// returns error on initialization failure. + static Status Create(const ScalarExpr& expr, RuntimeState* state, ObjectPool* pool, + MemPool* mem_pool, ScalarExprEvaluator** eval) WARN_UNUSED_RESULT; + + /// Convenience function for creating multiple ScalarExprEvaluators. The evaluators + /// are returned in 'evals'. + static Status Create(const std::vector<ScalarExpr*>& exprs, RuntimeState* state, + ObjectPool* pool, MemPool* mem_pool, std::vector<ScalarExprEvaluator*>* evals) + WARN_UNUSED_RESULT; + + /// Initializes the ScalarExprEvaluator on all nodes in the ScalarExpr tree. This is + /// also the location in which constant arguments to functions are computed. Does not + /// need to be called on clones. Idempotent (this allows exprs to be opened multiple + /// times in subplans without reinitializing function states). + Status Open(RuntimeState* state) WARN_UNUSED_RESULT; + + /// Convenience function for opening multiple ScalarExprEvaluators. + static Status Open(const std::vector<ScalarExprEvaluator*>& evals, + RuntimeState* state) WARN_UNUSED_RESULT; + + /// Free resources held by this evaluator. Must be called on every ScalarExprEvaluator, + /// including clones. Has no effect if already closed. + void Close(RuntimeState* state); + + /// Convenience function for closing multiple ScalarExprEvaluators. + static void Close(const std::vector<ScalarExprEvaluator*>& evals, + RuntimeState* state); + + /// Creates a copy of this ScalarExprEvaluator. Open() must be called first. The copy + /// contains clones of each FunctionContext, which share the fragment-local state of the + /// original one but have their own FreePool and thread-local state. This should be used + /// to create an ScalarExprEvaluator for each execution thread that needs to evaluate + /// 'root_'. All allocations will be from 'mem_pool' so callers should use different + /// MemPool for evaluators in different threads. Note that clones are considered opened. + /// The cloned ScalarExprEvaluator cannot be used after the original ScalarExprEvaluator + /// is destroyed because it may reference fragment-local state from the original. + /// TODO: IMPALA-4743: Evaluate input arguments in ScalarExpr::Init() and store them + /// in ScalarExpr. + Status Clone(ObjectPool* pool, RuntimeState* state, MemPool* mem_pool, + ScalarExprEvaluator** new_eval) const WARN_UNUSED_RESULT; + + /// Convenience functions for cloning multiple ScalarExprEvaluators. The newly + /// created evaluators are appended to 'new_evals. + static Status Clone(ObjectPool* pool, RuntimeState* state, MemPool* mem_pool, + const std::vector<ScalarExprEvaluator*>& evals, + std::vector<ScalarExprEvaluator*>* new_evals) WARN_UNUSED_RESULT; + + /// If 'expr' is constant, evaluates it with no input row argument and returns the + /// result in 'const_val'. Sets 'const_val' to NULL if the argument is not constant. + /// The returned AnyVal and associated varlen data is owned by this evaluator. This + /// should only be called after Open() has been called on this expr. Returns an error + /// if there was an error evaluating the expression or if memory could not be allocated + /// for the expression result. + Status GetConstValue( + RuntimeState* state, const ScalarExpr& expr, AnyVal** const_val) WARN_UNUSED_RESULT; + + /// Calls the appropriate Get*Val() function on 'e' and stores the result in result_. + /// This is used by ScalarExpr to call GetValue() on sub-expression, rather than root_. + void* GetValue(const ScalarExpr& e, const TupleRow* row); + + /// Calls the appropriate Get*Val() function on this evaluator's root_ expr tree, stores + /// the result in 'result_' and returns a pointer to it. + void* GetValue(const TupleRow* row); + + /// Evaluates the expression of this evaluator on tuple row 'row' and returns + /// the results. One function for each data type implemented. + BooleanVal GetBooleanVal(TupleRow* row); + TinyIntVal GetTinyIntVal(TupleRow* row); + SmallIntVal GetSmallIntVal(TupleRow* row); + IntVal GetIntVal(TupleRow* row); + BigIntVal GetBigIntVal(TupleRow* row); + FloatVal GetFloatVal(TupleRow* row); + DoubleVal GetDoubleVal(TupleRow* row); + StringVal GetStringVal(TupleRow* row); + CollectionVal GetCollectionVal(TupleRow* row); + TimestampVal GetTimestampVal(TupleRow* row); + DecimalVal GetDecimalVal(TupleRow* row); + + /// Returns an error status if there was any error in evaluating the expression + /// or its sub-expressions. 'start_idx' and 'end_idx' correspond to the range + /// within the vector of FunctionContext for the sub-expressions of interest. + /// The default parameters correspond to the entire expr 'root_'. + Status GetError(int start_idx = 0, int end_idx = -1) const WARN_UNUSED_RESULT; + + /// Convenience functions: print value into 'str' or 'stream'. NULL turns into "NULL". + /// The first two variants will evaluate the tuple row against 'root_'. + void PrintValue(const TupleRow* row, std::string* str); + void PrintValue(const TupleRow* row, std::stringstream* stream); + void PrintValue(void* value, std::string* str); + void PrintValue(void* value, std::stringstream* stream); + + /// Returns true if any of the expression contexts in the array has local allocations. + static bool HasLocalAllocations(const std::vector<ScalarExprEvaluator*>& evals); + bool HasLocalAllocations() const; + + /// Frees all local allocations made by fn_ctxs_. This can be called when result + /// data from this context is no longer needed. + void FreeLocalAllocations(); + static void FreeLocalAllocations(const std::vector<ScalarExprEvaluator*>& evals); + + /// Get the number of digits after the decimal that should be displayed for this value. + /// Returns -1 if no scale has been specified (currently the scale is only set for + /// doubles set by RoundUpTo). GetValue() must have already been called. + /// TODO: remove this (IMPALA-4720). + int output_scale() const { return output_scale_; } + const ScalarExpr& root() const { return root_; } + bool opened() const { return opened_; } + bool closed() const { return closed_; } + bool is_clone() const { return is_clone_; } + MemPool* mem_pool() const { return mem_pool_; } + + /// The builtin functions are not called from anywhere in the code and the + /// symbols are therefore not included in the binary. We call these functions + /// by using dlsym. The compiler must think this function is callable to + /// not strip these symbols. + static void InitBuiltinsDummy(); + + static const char* LLVM_CLASS_NAME; + + protected: + /// Users of fn_context(); + friend class CaseExpr; + friend class HiveUdfCall; + friend class ScalarFnCall; + + /// Retrieves a registered FunctionContext. 'i' is the 'fn_context_index_' of the + /// corresponding sub-expression in the Expr tree. + FunctionContext* fn_context(int i) { + DCHECK_GE(i, 0); + DCHECK_LT(i, fn_ctxs_.size()); + return fn_ctxs_[i]; + } + + private: + friend class ScalarExpr; + + /// FunctionContexts for nodes in this Expr tree. Created by this ScalarExprEvaluator + /// and live in the same object pool as this evaluator (i.e. same life span as the + /// evaluator). + std::vector<FunctionContext*> fn_ctxs_; + + /// Array access to fn_ctxs_. Used by ScalarFnCall's codegend compute function + /// to access the correct FunctionContext. + FunctionContext** fn_ctxs_ptr_ = nullptr; + + /// Pointer to the MemPool which all allocations (including fn_ctxs_') come from. + /// Owned by the exec node which owns this evaluator. + MemPool* mem_pool_; + + /// The expr tree which this evaluator is for. + const ScalarExpr& root_; + + /// Stores the result of evaluation for this expr tree (or any sub-expression). + /// This is used in interpreted path when we need to return a void*. + ExprValue result_; + + /// True if this evaluator came from a Clone() call. Used to manage FunctionStateScope. + bool is_clone_ = false; + + /// Variables keeping track of current state. + bool initialized_ = false; + bool opened_ = false; + bool closed_ = false; + + /// The number of digits after the decimal that should be displayed for this value. + /// -1 if no scale has been specified (currently the scale is only set for doubles + /// set by RoundUpTo). This value relies on FunctionContext to be allocated first + /// before it's derived so it lives in the evaluator instead of Expr. + /// TODO: move this to Expr initialization after IMPALA-4743 is fixed. + int output_scale_ = -1; + + ScalarExprEvaluator(const ScalarExpr& root, MemPool* mem_pool); + + /// Walks the expression tree 'expr' and fills in 'fn_ctxs_' for all Expr nodes + /// which need FunctionContext. + void CreateFnCtxs(RuntimeState* state, const ScalarExpr& expr); +}; + +} + +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/scalar-expr-ir.cc b/be/src/exprs/scalar-expr-ir.cc new file mode 100644 index 0000000..051cc4e --- /dev/null +++ b/be/src/exprs/scalar-expr-ir.cc @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/scalar-expr.h" +#include "udf/udf.h" + +#ifdef IR_COMPILE + +/// Compile ScalarExprEvaluator declaration to IR so we can use it in codegen'd functions +#include "exprs/scalar-expr-evaluator.h" + +/// Dummy function to force compilation of UDF types. +/// The arguments are pointers to prevent Clang from lowering the struct types +/// (e.g. IntVal={bool, i32} can be coerced to i64). +void dummy(impala_udf::FunctionContext*, impala_udf::BooleanVal*, impala_udf::TinyIntVal*, + impala_udf::SmallIntVal*, impala_udf::IntVal*, impala_udf::BigIntVal*, + impala_udf::FloatVal*, impala_udf::DoubleVal*, impala_udf::StringVal*, + impala_udf::TimestampVal*, impala_udf::DecimalVal*, impala::ScalarExprEvaluator*) { } +#endif + +/// The following are compute functions that are cross-compiled to both native and IR +/// libraries. In the interpreted path, these functions are executed as-is from the native +/// code. In the codegen'd path, we load the IR functions and replace the Get*Val() calls +/// with the appropriate child's codegen'd compute function. + +using namespace impala; +using namespace impala_udf; + +/// Static wrappers around Get*Val() functions. We'd like to be able to call these from +/// directly from native code as well as from generated IR functions. +BooleanVal ScalarExpr::GetBooleanVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetBooleanVal(eval, row); +} + +TinyIntVal ScalarExpr::GetTinyIntVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetTinyIntVal(eval, row); +} + +SmallIntVal ScalarExpr::GetSmallIntVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetSmallIntVal(eval, row); +} + +IntVal ScalarExpr::GetIntVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetIntVal(eval, row); +} + +BigIntVal ScalarExpr::GetBigIntVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetBigIntVal(eval, row); +} + +FloatVal ScalarExpr::GetFloatVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetFloatVal(eval, row); +} + +DoubleVal ScalarExpr::GetDoubleVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetDoubleVal(eval, row); +} + +StringVal ScalarExpr::GetStringVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetStringVal(eval, row); +} + +TimestampVal ScalarExpr::GetTimestampVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetTimestampVal(eval, row); +} + +DecimalVal ScalarExpr::GetDecimalVal( + ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) { + return expr->GetDecimalVal(eval, row); +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/scalar-expr.cc b/be/src/exprs/scalar-expr.cc new file mode 100644 index 0000000..956e188 --- /dev/null +++ b/be/src/exprs/scalar-expr.cc @@ -0,0 +1,480 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/scalar-expr.h" + +#include <sstream> +#include <thrift/protocol/TDebugProtocol.h> + +#include "codegen/codegen-anyval.h" +#include "codegen/llvm-codegen.h" +#include "common/object-pool.h" +#include "common/status.h" +#include "exprs/case-expr.h" +#include "exprs/compound-predicates.h" +#include "exprs/conditional-functions.h" +#include "exprs/hive-udf-call.h" +#include "exprs/in-predicate.h" +#include "exprs/is-not-empty-predicate.h" +#include "exprs/is-null-predicate.h" +#include "exprs/kudu-partition-expr.h" +#include "exprs/like-predicate.h" +#include "exprs/literal.h" +#include "exprs/null-literal.h" +#include "exprs/operators.h" +#include "exprs/scalar-expr-evaluator.h" +#include "exprs/scalar-fn-call.h" +#include "exprs/slot-ref.h" +#include "exprs/string-functions.h" +#include "exprs/timestamp-functions.h" +#include "exprs/tuple-is-null-predicate.h" +#include "exprs/udf-builtins.h" +#include "exprs/utility-functions.h" +#include "runtime/runtime-state.h" +#include "runtime/tuple-row.h" +#include "runtime/tuple.h" +#include "udf/udf-internal.h" +#include "udf/udf.h" + +#include "gen-cpp/Exprs_types.h" +#include "gen-cpp/ImpalaService_types.h" + +#include "common/names.h" + +using namespace impala_udf; +using namespace llvm; + +namespace impala { + +const char* ScalarExpr::LLVM_CLASS_NAME = "class.impala::ScalarExpr"; + +ScalarExpr::ScalarExpr(const ColumnType& type, bool is_constant) + : Expr(type), + is_constant_(is_constant) { +} + +ScalarExpr::ScalarExpr(const TExprNode& node) + : Expr(node), + is_constant_(node.is_constant) { + if (node.__isset.fn) fn_ = node.fn; +} + +Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc, + RuntimeState* state, ObjectPool* pool, ScalarExpr** scalar_expr) { + *scalar_expr = nullptr; + ScalarExpr* root; + RETURN_IF_ERROR(CreateNode(texpr.nodes[0], pool, &root)); + RETURN_IF_ERROR(Expr::CreateTree(texpr, pool, root)); + Status status = root->Init(row_desc, state); + if (UNLIKELY(!status.ok())) { + root->Close(); + return status; + } + int fn_ctx_idx = 0; + root->AssignFnCtxIdx(&fn_ctx_idx); + *scalar_expr = root; + return Status::OK(); +} + +Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& row_desc, + RuntimeState* state, ObjectPool* pool, vector<ScalarExpr*>* exprs) { + exprs->clear(); + for (const TExpr& texpr: texprs) { + ScalarExpr* expr; + RETURN_IF_ERROR(Create(texpr, row_desc, state, pool, &expr)); + DCHECK(expr != nullptr); + exprs->push_back(expr); + } + return Status::OK(); +} + +Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc, + RuntimeState* state, ScalarExpr** scalar_expr) { + return ScalarExpr::Create(texpr, row_desc, state, state->obj_pool(), scalar_expr); +} + +Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& row_desc, + RuntimeState* state, vector<ScalarExpr*>* exprs) { + return ScalarExpr::Create(texprs, row_desc, state, state->obj_pool(), exprs); +} + +void ScalarExpr::AssignFnCtxIdx(int* next_fn_ctx_idx) { + fn_ctx_idx_start_ = *next_fn_ctx_idx; + if (HasFnCtx()) { + fn_ctx_idx_ = *next_fn_ctx_idx; + ++(*next_fn_ctx_idx); + } + for (ScalarExpr* child : children()) child->AssignFnCtxIdx(next_fn_ctx_idx); + fn_ctx_idx_end_ = *next_fn_ctx_idx; +} + +Status ScalarExpr::CreateNode( + const TExprNode& texpr_node, ObjectPool* pool, ScalarExpr** expr) { + switch (texpr_node.node_type) { + case TExprNodeType::BOOL_LITERAL: + case TExprNodeType::FLOAT_LITERAL: + case TExprNodeType::INT_LITERAL: + case TExprNodeType::STRING_LITERAL: + case TExprNodeType::DECIMAL_LITERAL: + case TExprNodeType::TIMESTAMP_LITERAL: + *expr = pool->Add(new Literal(texpr_node)); + return Status::OK(); + case TExprNodeType::CASE_EXPR: + if (!texpr_node.__isset.case_expr) { + return Status("Case expression not set in thrift node"); + } + *expr = pool->Add(new CaseExpr(texpr_node)); + return Status::OK(); + case TExprNodeType::COMPOUND_PRED: + if (texpr_node.fn.name.function_name == "and") { + *expr = pool->Add(new AndPredicate(texpr_node)); + } else if (texpr_node.fn.name.function_name == "or") { + *expr = pool->Add(new OrPredicate(texpr_node)); + } else { + DCHECK_EQ(texpr_node.fn.name.function_name, "not"); + *expr = pool->Add(new ScalarFnCall(texpr_node)); + } + return Status::OK(); + case TExprNodeType::NULL_LITERAL: + *expr = pool->Add(new NullLiteral(texpr_node)); + return Status::OK(); + case TExprNodeType::SLOT_REF: + if (!texpr_node.__isset.slot_ref) { + return Status("Slot reference not set in thrift node"); + } + *expr = pool->Add(new SlotRef(texpr_node)); + return Status::OK(); + case TExprNodeType::TUPLE_IS_NULL_PRED: + *expr = pool->Add(new TupleIsNullPredicate(texpr_node)); + return Status::OK(); + case TExprNodeType::FUNCTION_CALL: + if (!texpr_node.__isset.fn) { + return Status("Function not set in thrift node"); + } + // Special-case functions that have their own Expr classes + // TODO: is there a better way to do this? + if (texpr_node.fn.name.function_name == "if") { + *expr = pool->Add(new IfExpr(texpr_node)); + } else if (texpr_node.fn.name.function_name == "nullif") { + *expr = pool->Add(new NullIfExpr(texpr_node)); + } else if (texpr_node.fn.name.function_name == "isnull" || + texpr_node.fn.name.function_name == "ifnull" || + texpr_node.fn.name.function_name == "nvl") { + *expr = pool->Add(new IsNullExpr(texpr_node)); + } else if (texpr_node.fn.name.function_name == "coalesce") { + *expr = pool->Add(new CoalesceExpr(texpr_node)); + } else if (texpr_node.fn.binary_type == TFunctionBinaryType::JAVA) { + *expr = pool->Add(new HiveUdfCall(texpr_node)); + } else { + *expr = pool->Add(new ScalarFnCall(texpr_node)); + } + return Status::OK(); + case TExprNodeType::IS_NOT_EMPTY_PRED: + *expr = pool->Add(new IsNotEmptyPredicate(texpr_node)); + return Status::OK(); + case TExprNodeType::KUDU_PARTITION_EXPR: + *expr = pool->Add(new KuduPartitionExpr(texpr_node)); + return Status::OK(); + default: + *expr = nullptr; + stringstream os; + os << "Unknown expr node type: " << texpr_node.node_type; + return Status(os.str()); + } +} + +Status ScalarExpr::OpenEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const { + for (int i = 0; i < children_.size(); ++i) { + RETURN_IF_ERROR(children_[i]->OpenEvaluator(scope, state, eval)); + } + return Status::OK(); +} + +void ScalarExpr::CloseEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const { + for (ScalarExpr* child : children_) child->CloseEvaluator(scope, state, eval); +} + +void ScalarExpr::Close() { + Expr::Close(); +} + +void ScalarExpr::Close(const vector<ScalarExpr*>& exprs) { + for (ScalarExpr* expr : exprs) expr->Close(); +} + +struct MemLayoutData { + int expr_idx; + int byte_size; + bool variable_length; + int alignment; + + // TODO: sort by type as well? Any reason to do this? + // TODO: would sorting in reverse order of size be faster due to better packing? + // TODO: why put var-len at end? + bool operator<(const MemLayoutData& rhs) const { + // variable_len go at end + if (this->variable_length && !rhs.variable_length) return false; + if (!this->variable_length && rhs.variable_length) return true; + return this->byte_size < rhs.byte_size; + } +}; + +int ScalarExpr::ComputeResultsLayout(const vector<ScalarExpr*>& exprs, + vector<int>* offsets, int* var_result_begin) { + if (exprs.size() == 0) { + *var_result_begin = -1; + return 0; + } + + // Don't align more than word (8-byte) size. There's no performance gain beyond 8-byte + // alignment, and there is a performance gain to keeping the results buffer small. This + // is consistent with what compilers do. + int MAX_ALIGNMENT = sizeof(int64_t); + + vector<MemLayoutData> data; + data.resize(exprs.size()); + + // Collect all the byte sizes and sort them + for (int i = 0; i < exprs.size(); ++i) { + DCHECK(!exprs[i]->type().IsComplexType()) << "NYI"; + data[i].expr_idx = i; + data[i].byte_size = exprs[i]->type().GetSlotSize(); + DCHECK_GT(data[i].byte_size, 0); + data[i].variable_length = exprs[i]->type().IsVarLenStringType(); + + bool fixed_len_char = exprs[i]->type().type == TYPE_CHAR && !data[i].variable_length; + + // Compute the alignment of this value. Values should be self-aligned for optimal + // memory access speed, up to the max alignment (e.g., if this value is an int32_t, + // its offset in the buffer should be divisible by sizeof(int32_t)). + // TODO: is self-alignment really necessary for perf? + if (!fixed_len_char) { + data[i].alignment = min(data[i].byte_size, MAX_ALIGNMENT); + } else { + // Fixed-len chars are aligned to a one-byte boundary, as if they were char[], + // leaving no padding between them and the previous value. + data[i].alignment = 1; + } + } + + sort(data.begin(), data.end()); + + // Walk the types and store in a packed aligned layout + int byte_offset = 0; + + offsets->resize(exprs.size()); + *var_result_begin = -1; + + for (int i = 0; i < data.size(); ++i) { + // Increase byte_offset so data[i] is at the right alignment (i.e. add padding between + // this value and the previous). + byte_offset = BitUtil::RoundUp(byte_offset, data[i].alignment); + + (*offsets)[data[i].expr_idx] = byte_offset; + if (data[i].variable_length && *var_result_begin == -1) { + *var_result_begin = byte_offset; + } + DCHECK(!(i == 0 && byte_offset > 0)) << "first value should be at start of layout"; + byte_offset += data[i].byte_size; + } + + return byte_offset; +} + +Status ScalarExpr::Init(const RowDescriptor& row_desc, RuntimeState* state) { + DCHECK(type_.type != INVALID_TYPE); + for (int i = 0; i < children_.size(); ++i) { + RETURN_IF_ERROR(children_[i]->Init(row_desc, state)); + } + return Status::OK(); +} + +string ScalarExpr::DebugString() const { + // TODO: implement partial debug string for member vars + stringstream out; + out << " type=" << type_.DebugString(); + if (!children_.empty()) { + out << " children=" << DebugString(children_); + } + return out.str(); +} + +string ScalarExpr::DebugString(const vector<ScalarExpr*>& exprs) { + stringstream out; + out << "["; + for (int i = 0; i < exprs.size(); ++i) { + out << (i == 0 ? "" : " ") << exprs[i]->DebugString(); + } + out << "]"; + return out.str(); +} + +int ScalarExpr::GetSlotIds(vector<SlotId>* slot_ids) const { + int n = 0; + for (int i = 0; i < children_.size(); ++i) { + n += children_[i]->GetSlotIds(slot_ids); + } + return n; +} + +Function* ScalarExpr::GetStaticGetValWrapper(ColumnType type, LlvmCodeGen* codegen) { + switch (type.type) { + case TYPE_BOOLEAN: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BOOLEAN_VAL, false); + case TYPE_TINYINT: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TINYINT_VAL, false); + case TYPE_SMALLINT: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_SMALLINT_VAL, false); + case TYPE_INT: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_INT_VAL, false); + case TYPE_BIGINT: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BIGINT_VAL, false); + case TYPE_FLOAT: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_FLOAT_VAL, false); + case TYPE_DOUBLE: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DOUBLE_VAL, false); + case TYPE_STRING: + case TYPE_CHAR: + case TYPE_VARCHAR: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_STRING_VAL, false); + case TYPE_TIMESTAMP: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TIMESTAMP_VAL, false); + case TYPE_DECIMAL: + return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DECIMAL_VAL, false); + default: + DCHECK(false) << "Invalid type: " << type.DebugString(); + return NULL; + } +} + +Function* ScalarExpr::CreateIrFunctionPrototype(const string& name, + LlvmCodeGen* codegen, Value* (*args)[2]) { + Type* return_type = CodegenAnyVal::GetLoweredType(codegen, type()); + LlvmCodeGen::FnPrototype prototype(codegen, name, return_type); + prototype.AddArgument( + LlvmCodeGen::NamedVariable( + "eval", codegen->GetPtrType(ScalarExprEvaluator::LLVM_CLASS_NAME))); + prototype.AddArgument( + LlvmCodeGen::NamedVariable("row", codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME))); + Function* function = prototype.GeneratePrototype(NULL, args[0]); + DCHECK(function != NULL); + return function; +} + +Status ScalarExpr::GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, Function** fn) { + if (ir_compute_fn_ != nullptr) { + *fn = ir_compute_fn_; + return Status::OK(); + } + Function* static_getval_fn = GetStaticGetValWrapper(type(), codegen); + + // Call it passing this as the additional first argument. + Value* args[2]; + ir_compute_fn_ = CreateIrFunctionPrototype("CodegenComputeFnWrapper", codegen, &args); + BasicBlock* entry_block = + BasicBlock::Create(codegen->context(), "entry", ir_compute_fn_); + LlvmBuilder builder(entry_block); + Value* this_ptr = + codegen->CastPtrToLlvmPtr(codegen->GetPtrType(ScalarExpr::LLVM_CLASS_NAME), this); + Value* compute_fn_args[] = {this_ptr, args[0], args[1]}; + Value* ret = CodegenAnyVal::CreateCall( + codegen, &builder, static_getval_fn, compute_fn_args, "ret"); + builder.CreateRet(ret); + *fn = codegen->FinalizeFunction(ir_compute_fn_); + if (UNLIKELY(*fn == nullptr)) { + return Status(TErrorCode::IR_VERIFY_FAILED, "CodegendComputeFnWrapper"); + } + ir_compute_fn_ = *fn; + return Status::OK(); +} + +// At least one of these should always be overridden. +BooleanVal ScalarExpr::GetBooleanVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return BooleanVal::null(); +} + +TinyIntVal ScalarExpr::GetTinyIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return TinyIntVal::null(); +} + +SmallIntVal ScalarExpr::GetSmallIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return SmallIntVal::null(); +} + +IntVal ScalarExpr::GetIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return IntVal::null(); +} + +BigIntVal ScalarExpr::GetBigIntVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return BigIntVal::null(); +} + +FloatVal ScalarExpr::GetFloatVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return FloatVal::null(); +} + +DoubleVal ScalarExpr::GetDoubleVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return DoubleVal::null(); +} + +StringVal ScalarExpr::GetStringVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return StringVal::null(); +} + +CollectionVal ScalarExpr::GetCollectionVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return CollectionVal::null(); +} + +TimestampVal ScalarExpr::GetTimestampVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return TimestampVal::null(); +} + +DecimalVal ScalarExpr::GetDecimalVal( + ScalarExprEvaluator* eval, const TupleRow* row) const { + DCHECK(false) << DebugString(); + return DecimalVal::null(); +} + +string ScalarExpr::DebugString(const string& expr_name) const { + stringstream out; + out << expr_name << "(" << ScalarExpr::DebugString() << ")"; + return out.str(); +} + +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/scalar-expr.h b/be/src/exprs/scalar-expr.h new file mode 100644 index 0000000..032ac94 --- /dev/null +++ b/be/src/exprs/scalar-expr.h @@ -0,0 +1,338 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#ifndef IMPALA_EXPRS_SCALAR_EXPR_H +#define IMPALA_EXPRS_SCALAR_EXPR_H + +#include <memory> +#include <string> +#include <vector> +#include <boost/scoped_ptr.hpp> + +#include "common/global-types.h" +#include "common/status.h" +#include "exprs/expr.h" +#include "impala-ir/impala-ir-functions.h" +#include "runtime/types.h" +#include "udf/udf-internal.h" // for CollectionVal +#include "udf/udf.h" + +namespace llvm { + class BasicBlock; + class Function; + class Type; + class Value; +}; + +namespace impala { + +using impala_udf::FunctionContext; +using impala_udf::AnyVal; +using impala_udf::BooleanVal; +using impala_udf::TinyIntVal; +using impala_udf::SmallIntVal; +using impala_udf::IntVal; +using impala_udf::BigIntVal; +using impala_udf::FloatVal; +using impala_udf::DoubleVal; +using impala_udf::TimestampVal; +using impala_udf::StringVal; +using impala_udf::DecimalVal; +using impala_udf::CollectionVal; + +class LibCacheEntry; +class LlvmCodeGen; +class MemTracker; +class ObjectPool; +class RowDescriptor; +class RuntimeState; +class ScalarExprEvaluator; +class SlotDescriptor; +class TColumnValue; +class TExpr; +class TExprNode; +class Tuple; +class TupleRow; + +/// --- ScalarExpr overview +/// +/// ScalarExpr is an expression which returns a value for each input tuple row. +/// Examples include built-in functions such as abs(), UDF, case-expr and literal +/// such as a string "foobar". It's a subclass of Expr which represents an expression +/// as a tree. +/// +/// --- Implementation: +/// +/// ScalarExpr implements compute function, which given a row, performs the computation +/// of an expr and produces scalar result. This function evaluates the necessary child +/// arguments by calling their compute functions, then performs whatever computation is +/// necessary on the arguments to generate the result. These compute functions have +/// signature Get*Val(ScalarExprEvaluator*, const TupleRow*). One is implemented for each +/// possible return type it supports (e.g. GetBooleanVal(), GetStringVal(), etc). The +/// return type is a subclass of AnyVal (e.g. StringVal). One or more of these compute +/// functions must be overridden by subclasses of ScalarExpr. +/// +/// ScalarExpr contains query compile-time information about an expression (e.g. +/// sub-expressions implicitly encoded in the tree structure) and the LLVM IR compute +/// functions. ScalarExprEvaluator is the interface for evaluating a scalar expression +/// against an input TupleRow. +/// +/// ScalarExpr's compute functions are codegend to replace calls to the generic compute +/// function of child expressions with the exact compute functions based on the return +/// types of the child expressions known at runtime. Subclasses should override +/// GetCodegendComputeFn() to either generate custom IR compute functions using IRBuilder, +/// which inline calls to child expressions' compute functions, or simply call +/// GetCodegendComputeFnWrapper() to generate a wrapper function to call the interpreted +/// compute function. Note that we do not need a separate GetCodegendComputeFn() for each +/// type. +/// +/// TODO: Fix subclasses which call GetCodegendComputeFnWrapper() to not call interpreted +/// functions. +/// +class ScalarExpr : public Expr { + public: + /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr + /// is stored in ObjectPool 'pool' and returned in 'expr' on success. 'row_desc' is the + /// tuple row descriptor of the input tuple row. On failure, 'expr' is set to NULL and + /// the expr tree (if created) will be closed. Error status will be returned too. + static Status Create(const TExpr& texpr, const RowDescriptor& row_desc, + RuntimeState* state, ObjectPool* pool, ScalarExpr** expr) WARN_UNUSED_RESULT; + + /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr + /// is stored in ObjectPool 'state->obj_pool()' and returned in 'expr'. 'row_desc' is + /// the tuple row descriptor of the input tuple row. Returns error status on failure. + static Status Create(const TExpr& texpr, const RowDescriptor& row_desc, + RuntimeState* state, ScalarExpr** expr) WARN_UNUSED_RESULT; + + /// Convenience functions creating multiple ScalarExpr. + static Status Create(const std::vector<TExpr>& texprs, const RowDescriptor& row_desc, + RuntimeState* state, ObjectPool* pool, std::vector<ScalarExpr*>* exprs) + WARN_UNUSED_RESULT; + + /// Convenience functions creating multiple ScalarExpr. + static Status Create(const std::vector<TExpr>& texprs, const RowDescriptor& row_desc, + RuntimeState* state, std::vector<ScalarExpr*>* exprs) WARN_UNUSED_RESULT; + + /// Returns true if this expression is a SlotRef. Overridden by SlotRef. + virtual bool IsSlotRef() const { return false; } + + /// Returns true if this is a literal expression. Overridden by Literal. + virtual bool IsLiteral() const { return false; } + + /// Returns true if this expr uses a FunctionContext to track its runtime state. + /// Overridden by exprs which use FunctionContext. + virtual bool HasFnCtx() const { return false; } + + /// Returns true if this expr should be treated as a constant expression. + bool is_constant() const { return is_constant_; } + + /// Returns the number of SlotRef nodes in the expr tree. If 'slot_ids' is non-null, + /// add the slot ids to it. Overridden by SlotRef. + virtual int GetSlotIds(std::vector<SlotId>* slot_ids = nullptr) const; + + /// Returns an llvm::Function* with signature: + /// <subclass of AnyVal> ComputeFn(ScalarExprEvaluator*, const TupleRow*) + // + /// The function should evaluate this expr over 'row' and return the result as the + /// appropriate type of AnyVal. Returns error status on failure. + virtual Status GetCodegendComputeFn( + LlvmCodeGen* codegen, llvm::Function** fn) WARN_UNUSED_RESULT = 0; + + /// Simple debug string that provides no expr subclass-specific information + virtual std::string DebugString() const; + static std::string DebugString(const std::vector<ScalarExpr*>& exprs); + std::string DebugString(const std::string& expr_name) const; + + /// Computes a memory efficient layout for storing the results of evaluating 'exprs'. + /// The results are assumed to be void* slot types (vs AnyVal types). Varlen data is + /// not included (e.g. there will be space for a StringValue, but not the data + /// referenced by it). + /// + /// Returns the number of bytes necessary to store all the results and offsets + /// where the result for each expr should be stored. + /// + /// Variable length types are guaranteed to be at the end and 'var_result_begin' + /// will be set the beginning byte offset where variable length results begin. + /// 'var_result_begin' will be set to -1 if there are no variable len types. + static int ComputeResultsLayout(const vector<ScalarExpr*>& exprs, vector<int>* offsets, + int* var_result_begin); + + /// Releases cache entries to libCache for all nodes in the ScalarExpr tree. + virtual void Close(); + + /// Convenience functions for closing a list of ScalarExpr. + static void Close(const std::vector<ScalarExpr*>& exprs); + + static const char* LLVM_CLASS_NAME; + + protected: + friend class Expr; + friend class AggFn; + friend class AggFnEvaluator; + friend class AndPredicate; + friend class CaseExpr; + friend class CoalesceExpr; + friend class ConditionalFunctions; + friend class CompoundPredicate; + friend class DecimalFunctions; + friend class DecimalOperators; + friend class HiveUdfCall; + friend class IfExpr; + friend class InPredicate; + friend class IsNotEmptyPredicate; + friend class IsNullExpr; + friend class KuduPartitionExpr; + friend class Literal; + friend class NullIfExpr; + friend class NullLiteral; + friend class OrPredicate; + friend class Predicate; + friend class ScalarExprEvaluator; + friend class ScalarFnCall; + + /// For BE tests + friend class ExprTest; + friend class ExprCodegenTest; + friend class HashTableTest; + friend class OldHashTableTest; + + /// Cached LLVM IR for the compute function. Set this in GetCodegendComputeFn(). + llvm::Function* ir_compute_fn_ = nullptr; + + /// Assigns indices into the FunctionContext vector 'fn_ctxs_' in an evaluator to + /// nodes which need FunctionContext in the tree. 'next_fn_ctx_idx' is the index + /// of the next available entry in the vector. It's updated as this function is + /// called recursively down the tree. + void AssignFnCtxIdx(int* next_fn_ctx_idx); + + int fn_ctx_idx() const { return fn_ctx_idx_; } + + /// Creates a single ScalarExpr node based on 'texpr_node' and returns it + /// in 'expr'. Return error status on failure. + static Status CreateNode(const TExprNode& texpr_node, ObjectPool* pool, + ScalarExpr** expr) WARN_UNUSED_RESULT; + + ScalarExpr(const ColumnType& type, bool is_constant); + ScalarExpr(const TExprNode& node); + + /// Virtual compute functions for each return type. Each subclass should override + /// the functions for the return type(s) it supports. For example, a boolean function + /// will only override GetBooleanVal(). Some Exprs, like Literal, have many possible + /// return types and will override multiple Get*Val() functions. These functions should + /// be called by other ScalarExpr and ScalarExprEvaluator only. + virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual TinyIntVal GetTinyIntVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual SmallIntVal GetSmallIntVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual IntVal GetIntVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual BigIntVal GetBigIntVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual FloatVal GetFloatVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual DoubleVal GetDoubleVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual StringVal GetStringVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual CollectionVal GetCollectionVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual TimestampVal GetTimestampVal(ScalarExprEvaluator*, const TupleRow*) const; + virtual DecimalVal GetDecimalVal(ScalarExprEvaluator*, const TupleRow*) const; + + /// Initializes all nodes in the expr tree. Subclasses overriding this function should + /// call ScalarExpr::Init() to recursively call Init() on the expr tree. + virtual Status Init(const RowDescriptor& row_desc, RuntimeState* state) + WARN_UNUSED_RESULT; + + /// Initializes 'eval' for execution. If scope if FRAGMENT_LOCAL, both + /// fragment-local and thread-local states should be initialized. If scope is + /// THREAD_LOCAL, only thread-local states should be initialized. THREAD_LOCAL + /// scope is used for cloned evaluator. + /// + /// Subclasses overriding this function should call ScalarExpr::OpenEvaluator() to + /// recursively call OpenEvaluator() on all nodes in the ScalarExpr tree. + virtual Status OpenEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const + WARN_UNUSED_RESULT; + + /// Free resources held by the 'eval' allocated during OpenEvaluator(). + /// If scope is FRAGMENT_LOCAL, both fragment-local and thread-local states should be + /// torn down. If scope is THREAD_LOCAL, only thread-local state should be torn down. + /// + /// Subclasses overriding this function should call ScalarExpr::CloseEvaluator() to + /// recursively call CloseEvaluator() on all nodes in the ScalarExpr tree. + virtual void CloseEvaluator(FunctionContext::FunctionStateScope scope, + RuntimeState* state, ScalarExprEvaluator* eval) const; + + /// Computes the size of the varargs buffer in bytes (0 bytes if no varargs). + /// Overridden by ScalarFnCall. + virtual int ComputeVarArgsBufferSize() const { return 0; } + + /// Helper function to create an empty llvm::Function* with the signature: + /// *Val name(ScalarExprEvaluator*, TupleRow*); + /// + /// 'name' is the name of the returned llvm::Function*. The arguments to the IR function + /// are returned in 'args'. The return type is determined by the return type of the expr + /// tree. + llvm::Function* CreateIrFunctionPrototype(const std::string& name, LlvmCodeGen* codegen, + llvm::Value* (*args)[2]); + + /// Generates an IR compute function that calls the interpreted compute function. + /// It doesn't provide any performance benefit over the interpreted path. This is + /// useful for builtins (e.g. && and || operators) and UDF which don't generate + /// custom IR code but are part of a larger expr tree. The IR compute function of + /// the larger expr tree may still benefit from custom IR and inlining of other + /// sub-expressions. + /// + /// TODO: this should be removed in the long run and replaced with cross-compilation + /// together with constant propagation and loop unrolling. + Status GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, llvm::Function** fn) + WARN_UNUSED_RESULT; + + /// Helper function for GetCodegendComputeFnWrapper(). Returns the cross-compiled IR + /// function of the static Get*Val wrapper function for return type 'type'. + llvm::Function* GetStaticGetValWrapper(ColumnType type, LlvmCodeGen* codegen); + + private: + /// 'fn_ctx_idx_' is the index into the FunctionContext vector in ScalarExprEvaluator + /// for storing FunctionContext needed to evaluate this ScalarExprNode. It's -1 if this + /// ScalarExpr doesn't need a FunctionContext. The FunctionContext is managed by the + /// evaluator and initialized by calling ScalarExpr::OpenEvaluator(). + int fn_ctx_idx_ = -1; + + /// [fn_ctx_idx_start_, fn_ctx_idx_end_) defines the range in FunctionContext vector + /// in ScalarExpeEvaluator for the expression subtree rooted at this ScalarExpr node. + int fn_ctx_idx_start_ = 0; + int fn_ctx_idx_end_ = 0; + + /// True if this expr should be treated as a constant expression. True if either: + /// * This expr was sent from the frontend and Expr.isConstant() was true. + /// * This expr is a constant literal created in the backend. + const bool is_constant_; + + /// Static wrappers which call the compute function of the given ScalarExpr, passing + /// it the ScalarExprEvaluator and TupleRow. These are cross-compiled and called by + /// the IR wrapper functions generated by GetCodegendComputeFnWrapper(). + static BooleanVal GetBooleanVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static TinyIntVal GetTinyIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static SmallIntVal GetSmallIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static IntVal GetIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static BigIntVal GetBigIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static FloatVal GetFloatVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static DoubleVal GetDoubleVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static StringVal GetStringVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static TimestampVal GetTimestampVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); + static DecimalVal GetDecimalVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*); +}; + +} + +#endif
