[03/14] incubator-impala git commit: IMPALA-4192: Disentangle Expr and ExprContext

kwho Sun, 18 Jun 2017 11:37:11 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/null-literal.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/null-literal.h b/be/src/exprs/null-literal.h
index d7a495a..91f4ca4 100644
--- a/be/src/exprs/null-literal.h
+++ b/be/src/exprs/null-literal.h
@@ -19,38 +19,54 @@
 #ifndef IMPALA_EXPRS_NULL_LITERAL_H_
 #define IMPALA_EXPRS_NULL_LITERAL_H_
 
-#include "exprs/expr.h"
+#include "exprs/scalar-expr.h"
 
 namespace impala {
 
+using impala_udf::FunctionContext;
+using impala_udf::BooleanVal;
+using impala_udf::TinyIntVal;
+using impala_udf::SmallIntVal;
+using impala_udf::IntVal;
+using impala_udf::BigIntVal;
+using impala_udf::FloatVal;
+using impala_udf::DoubleVal;
+using impala_udf::TimestampVal;
+using impala_udf::StringVal;
+using impala_udf::DecimalVal;
+
 class TExprNode;
 
-class NullLiteral: public Expr {
+class NullLiteral: public ScalarExpr {
  public:
-  NullLiteral(PrimitiveType type) : Expr(type, true, false) { }
-
-  virtual bool IsLiteral() const;
-
-  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** 
fn);
+  virtual bool IsLiteral() const override { return true; }
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** 
fn)
+      override WARN_UNUSED_RESULT;
+  virtual std::string DebugString() const override;
 
-  virtual impala_udf::BooleanVal GetBooleanVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::TinyIntVal GetTinyIntVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::SmallIntVal GetSmallIntVal(ExprContext*, const 
TupleRow*);
-  virtual impala_udf::IntVal GetIntVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::BigIntVal GetBigIntVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::FloatVal GetFloatVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::DoubleVal GetDoubleVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::StringVal GetStringVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::TimestampVal GetTimestampVal(ExprContext*, const 
TupleRow*);
-  virtual impala_udf::DecimalVal GetDecimalVal(ExprContext*, const TupleRow*);
-  virtual impala_udf::CollectionVal GetCollectionVal(ExprContext*, const 
TupleRow*);
-
-  virtual std::string DebugString() const;
+  /// Constructor for test.
+  NullLiteral(PrimitiveType type) : ScalarExpr(type, true) { }
 
  protected:
-  friend class Expr;
+  friend class ScalarExpr;
+  friend class ScalarExprEvaluator;
+
+  NullLiteral(const TExprNode& node) : ScalarExpr(node) { }
 
-  NullLiteral(const TExprNode& node) : Expr(node) { }
+  virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) 
const override;
+  virtual TinyIntVal GetTinyIntVal(ScalarExprEvaluator*, const TupleRow*) 
const override;
+  virtual SmallIntVal GetSmallIntVal(
+      ScalarExprEvaluator*, const TupleRow*) const override;
+  virtual IntVal GetIntVal(ScalarExprEvaluator*, const TupleRow*) const 
override;
+  virtual BigIntVal GetBigIntVal(ScalarExprEvaluator*, const TupleRow*) const 
override;
+  virtual FloatVal GetFloatVal(ScalarExprEvaluator*, const TupleRow*) const 
override;
+  virtual DoubleVal GetDoubleVal(ScalarExprEvaluator*, const TupleRow*) const 
override;
+  virtual StringVal GetStringVal(ScalarExprEvaluator*, const TupleRow*) const 
override;
+  virtual TimestampVal GetTimestampVal(
+      ScalarExprEvaluator*, const TupleRow*) const override;
+  virtual DecimalVal GetDecimalVal(ScalarExprEvaluator*, const TupleRow*) 
const override;
+  virtual CollectionVal GetCollectionVal(
+      ScalarExprEvaluator*, const TupleRow*) const override;
 };
 
 }


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/operators.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/operators.h b/be/src/exprs/operators.h
index 9b386c8..d3920e8 100644
--- a/be/src/exprs/operators.h
+++ b/be/src/exprs/operators.h
@@ -20,10 +20,21 @@
 
 #include "udf/udf.h"
 
-using namespace impala_udf;
-
 namespace impala {
 
+using impala_udf::FunctionContext;
+using impala_udf::AnyVal;
+using impala_udf::BooleanVal;
+using impala_udf::TinyIntVal;
+using impala_udf::SmallIntVal;
+using impala_udf::IntVal;
+using impala_udf::BigIntVal;
+using impala_udf::FloatVal;
+using impala_udf::DoubleVal;
+using impala_udf::TimestampVal;
+using impala_udf::StringVal;
+using impala_udf::DecimalVal;
+
 /// Operators written against the UDF interface.
 class Operators {
  public:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/predicate.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/predicate.h b/be/src/exprs/predicate.h
index 28b8b73..d5ef4b8 100644
--- a/be/src/exprs/predicate.h
+++ b/be/src/exprs/predicate.h
@@ -19,17 +19,17 @@
 #ifndef IMPALA_EXPRS_PREDICATE_H_
 #define IMPALA_EXPRS_PREDICATE_H_
 
-#include "exprs/expr.h"
+#include "exprs/scalar-expr.h"
 
 namespace impala {
 
 class TExprNode;
 
-class Predicate: public Expr {
+class Predicate: public ScalarExpr {
  protected:
-  friend class Expr;
+  friend class ScalarExpr;
 
-  Predicate(const TExprNode& node) : Expr(node) {}
+  Predicate(const TExprNode& node) : ScalarExpr(node) {}
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr-evaluator.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr-evaluator.cc 
b/be/src/exprs/scalar-expr-evaluator.cc
new file mode 100644
index 0000000..ae0aa15
--- /dev/null
+++ b/be/src/exprs/scalar-expr-evaluator.cc
@@ -0,0 +1,452 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/scalar-expr-evaluator.h"
+
+#include <sstream>
+
+#include "common/object-pool.h"
+#include "common/status.h"
+#include "exprs/anyval-util.h"
+#include "exprs/scalar-expr.h"
+#include "exprs/aggregate-functions.h"
+#include "exprs/bit-byte-functions.h"
+#include "exprs/case-expr.h"
+#include "exprs/cast-functions.h"
+#include "exprs/compound-predicates.h"
+#include "exprs/conditional-functions.h"
+#include "exprs/decimal-functions.h"
+#include "exprs/decimal-operators.h"
+#include "exprs/hive-udf-call.h"
+#include "exprs/in-predicate.h"
+#include "exprs/is-not-empty-predicate.h"
+#include "exprs/is-null-predicate.h"
+#include "exprs/like-predicate.h"
+#include "exprs/literal.h"
+#include "exprs/math-functions.h"
+#include "exprs/null-literal.h"
+#include "exprs/operators.h"
+#include "exprs/scalar-expr-evaluator.h"
+#include "exprs/scalar-fn-call.h"
+#include "exprs/slot-ref.h"
+#include "exprs/string-functions.h"
+#include "exprs/timestamp-functions.h"
+#include "exprs/tuple-is-null-predicate.h"
+#include "exprs/udf-builtins.h"
+#include "exprs/utility-functions.h"
+#include "runtime/decimal-value.inline.h"
+#include "runtime/mem-pool.h"
+#include "runtime/mem-tracker.h"
+#include "runtime/raw-value.inline.h"
+#include "runtime/runtime-state.h"
+#include "udf/udf-internal.h"
+
+#include "common/names.h"
+
+using namespace impala;
+using namespace impala_udf;
+
+const char* ScalarExprEvaluator::LLVM_CLASS_NAME = 
"class.impala::ScalarExprEvaluator";
+
+ScalarExprEvaluator::ScalarExprEvaluator(const ScalarExpr& root, MemPool* 
mem_pool)
+  : mem_pool_(mem_pool),
+    root_(root) {
+}
+
+ScalarExprEvaluator::~ScalarExprEvaluator() {
+  DCHECK(!initialized_ || closed_);
+}
+
+Status ScalarExprEvaluator::Create(const ScalarExpr& root, RuntimeState* state,
+    ObjectPool* pool, MemPool* mem_pool, ScalarExprEvaluator** eval) {
+  *eval = pool->Add(new ScalarExprEvaluator(root, mem_pool));
+  if (root.fn_ctx_idx_end_ > 0) {
+    (*eval)->fn_ctxs_.resize(root.fn_ctx_idx_end_, nullptr);
+    (*eval)->CreateFnCtxs(state, root);
+    DCHECK_EQ((*eval)->fn_ctxs_.size(), root.fn_ctx_idx_end_);
+    for (FunctionContext* fn_ctx : (*eval)->fn_ctxs_) DCHECK(fn_ctx != 
nullptr);
+    (*eval)->fn_ctxs_ptr_ = (*eval)->fn_ctxs_.data();
+  } else {
+    DCHECK_EQ((*eval)->fn_ctxs_.size(), 0);
+    DCHECK_EQ(root.fn_ctx_idx_end_, 0);
+    DCHECK_EQ(root.fn_ctx_idx_, -1);
+    DCHECK((*eval)->fn_ctxs_ptr_ == nullptr);
+  }
+  (*eval)->initialized_ = true;
+  return Status::OK();
+}
+
+Status ScalarExprEvaluator::Create(const vector<ScalarExpr*>& exprs, 
RuntimeState* state,
+    ObjectPool* pool, MemPool* mem_pool, vector<ScalarExprEvaluator*>* evals) {
+  for (const ScalarExpr* expr : exprs) {
+    ScalarExprEvaluator* eval;
+    Status status = Create(*expr, state, pool, mem_pool, &eval);
+    // Always add the evaluator to the vector so it can be cleaned up.
+    evals->push_back(eval);
+    RETURN_IF_ERROR(status);
+  }
+  return Status::OK();
+}
+
+void ScalarExprEvaluator::CreateFnCtxs(RuntimeState* state, const ScalarExpr& 
expr) {
+  const int fn_ctx_idx = expr.fn_ctx_idx();
+  const bool has_fn_ctx = fn_ctx_idx != -1;
+  vector<FunctionContext::TypeDesc> arg_types;
+  for (const ScalarExpr* child : expr.children()) {
+    CreateFnCtxs(state, *child);
+    if (has_fn_ctx) 
arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc(child->type()));
+  }
+  if (has_fn_ctx) {
+    FunctionContext::TypeDesc return_type =
+        AnyValUtil::ColumnTypeToTypeDesc(expr.type());
+    const int varargs_buffer_size = expr.ComputeVarArgsBufferSize();
+    DCHECK_GE(fn_ctx_idx, 0);
+    DCHECK_LT(fn_ctx_idx, fn_ctxs_.size());
+    DCHECK(fn_ctxs_[fn_ctx_idx] == nullptr);
+    fn_ctxs_[fn_ctx_idx] = FunctionContextImpl::CreateContext(
+        state, mem_pool_, return_type, arg_types, varargs_buffer_size);
+  }
+}
+
+Status ScalarExprEvaluator::Open(RuntimeState* state) {
+  DCHECK(initialized_);
+  if (opened_) return Status::OK();
+  opened_ = true;
+  // Fragment-local state is only initialized for original contexts. Clones 
inherit the
+  // original's fragment state and only need to have thread-local state 
initialized.
+  // TODO: Move FRAGMENT_LOCAL state to ScalarExpr. ScalarExprEvaluator should 
only
+  // have THREAD_LOCAL state.
+  FunctionContext::FunctionStateScope scope =
+      is_clone_ ? FunctionContext::THREAD_LOCAL : 
FunctionContext::FRAGMENT_LOCAL;
+  return root_.OpenEvaluator(scope, state, this);
+}
+
+Status ScalarExprEvaluator::Open(
+    const vector<ScalarExprEvaluator*>& evals, RuntimeState* state) {
+  for (int i = 0; i < evals.size(); ++i) 
RETURN_IF_ERROR(evals[i]->Open(state));
+  return Status::OK();
+}
+
+void ScalarExprEvaluator::Close(RuntimeState* state) {
+  if (closed_) return;
+  FunctionContext::FunctionStateScope scope =
+      is_clone_ ? FunctionContext::THREAD_LOCAL : 
FunctionContext::FRAGMENT_LOCAL;
+  root_.CloseEvaluator(scope, state, this);
+  for (int i = 0; i < fn_ctxs_.size(); ++i) {
+    fn_ctxs_[i]->impl()->Close();
+    delete fn_ctxs_[i];
+  }
+  fn_ctxs_.clear();
+  // Memory allocated by 'fn_ctx_' is still in 'mem_pool_'. It's the 
responsibility of
+  // the owner of 'mem_pool_' to free it.
+  closed_ = true;
+}
+
+void ScalarExprEvaluator::Close(
+    const vector<ScalarExprEvaluator*>& evals, RuntimeState* state) {
+  for (ScalarExprEvaluator* eval : evals) eval->Close(state);
+}
+
+Status ScalarExprEvaluator::Clone(ObjectPool* pool, RuntimeState* state,
+    MemPool* mem_pool, ScalarExprEvaluator** cloned_eval) const {
+  DCHECK(initialized_);
+  DCHECK(opened_);
+  *cloned_eval = pool->Add(new ScalarExprEvaluator(root_, mem_pool));
+  for (int i = 0; i < fn_ctxs_.size(); ++i) {
+    (*cloned_eval)->fn_ctxs_.push_back(fn_ctxs_[i]->impl()->Clone(mem_pool));
+  }
+  (*cloned_eval)->fn_ctxs_ptr_ = (*cloned_eval)->fn_ctxs_.data();
+  (*cloned_eval)->is_clone_ = true;
+  (*cloned_eval)->initialized_ = true;
+  (*cloned_eval)->opened_ = true;
+  (*cloned_eval)->output_scale_ = output_scale_;
+  return root_.OpenEvaluator(FunctionContext::THREAD_LOCAL, state, 
*cloned_eval);
+}
+
+Status ScalarExprEvaluator::Clone(ObjectPool* pool, RuntimeState* state,
+    MemPool* mem_pool, const vector<ScalarExprEvaluator*>& evals,
+    vector<ScalarExprEvaluator*>* cloned_evals) {
+  DCHECK(cloned_evals != nullptr);
+  DCHECK(cloned_evals->empty());
+  for (int i = 0; i < evals.size(); ++i) {
+    ScalarExprEvaluator* cloned_eval;
+    RETURN_IF_ERROR(evals[i]->Clone(pool, state, mem_pool, &cloned_eval));
+    cloned_evals->push_back(cloned_eval);
+  }
+  return Status::OK();
+}
+
+bool ScalarExprEvaluator::HasLocalAllocations() const {
+  for (int i = 0; i < fn_ctxs_.size(); ++i) {
+    if (fn_ctxs_[i]->impl()->closed()) continue;
+    if (fn_ctxs_[i]->impl()->HasLocalAllocations()) return true;
+  }
+  return false;
+}
+
+bool ScalarExprEvaluator::HasLocalAllocations(
+    const vector<ScalarExprEvaluator*>& evals) {
+  for (int i = 0; i < evals.size(); ++i) {
+    if (evals[i]->HasLocalAllocations()) return true;
+  }
+  return false;
+}
+
+void ScalarExprEvaluator::FreeLocalAllocations() {
+  for (int i = 0; i < fn_ctxs_.size(); ++i) {
+    if (fn_ctxs_[i]->impl()->closed()) continue;
+    fn_ctxs_[i]->impl()->FreeLocalAllocations();
+  }
+}
+
+void ScalarExprEvaluator::FreeLocalAllocations(
+    const vector<ScalarExprEvaluator*>& evals) {
+  for (int i = 0; i < evals.size(); ++i) evals[i]->FreeLocalAllocations();
+}
+
+Status ScalarExprEvaluator::GetError(int start_idx, int end_idx) const {
+  DCHECK(opened_);
+  end_idx = end_idx == -1 ? fn_ctxs_.size() : end_idx;
+  DCHECK_GE(start_idx, 0);
+  DCHECK_LE(end_idx, fn_ctxs_.size());
+  for (int idx = start_idx; idx < end_idx; ++idx) {
+    DCHECK_LT(idx, fn_ctxs_.size());
+    FunctionContext* fn_ctx = fn_ctxs_[idx];
+    if (fn_ctx->has_error()) return Status(fn_ctx->error_msg());
+  }
+  return Status::OK();
+}
+
+Status ScalarExprEvaluator::GetConstValue(RuntimeState* state, const 
ScalarExpr& expr,
+    AnyVal** const_val) {
+  DCHECK(opened_);
+  if (!expr.is_constant()) {
+    *const_val = nullptr;
+    return Status::OK();
+  }
+
+  // A constant expression shouldn't have any SlotRefs expr in it.
+  DCHECK_EQ(expr.GetSlotIds(), 0);
+  DCHECK(mem_pool_ != nullptr);
+  const ColumnType& result_type = expr.type();
+  RETURN_IF_ERROR(AllocateAnyVal(state, mem_pool_, result_type,
+      "Could not allocate constant expression value", const_val));
+
+  void* result = ScalarExprEvaluator::GetValue(expr, nullptr);
+  AnyValUtil::SetAnyVal(result, result_type, *const_val);
+  if (result_type.IsStringType()) {
+    StringVal* sv = reinterpret_cast<StringVal*>(*const_val);
+    if (!sv->is_null && sv->len > 0) {
+      // Make sure the memory is owned by this evaluator.
+      char* ptr_copy = 
reinterpret_cast<char*>(mem_pool_->TryAllocate(sv->len));
+      if (ptr_copy == nullptr) {
+        return mem_pool_->mem_tracker()->MemLimitExceeded(
+            state, "Could not allocate constant string value", sv->len);
+      }
+      memcpy(ptr_copy, sv->ptr, sv->len);
+      sv->ptr = reinterpret_cast<uint8_t*>(ptr_copy);
+    }
+  }
+  return GetError(expr.fn_ctx_idx_start_, expr.fn_ctx_idx_end_);
+}
+
+void* ScalarExprEvaluator::GetValue(const TupleRow* row) {
+  return GetValue(root_, row);
+}
+
+void* ScalarExprEvaluator::GetValue(const ScalarExpr& expr, const TupleRow* 
row) {
+  switch (expr.type_.type) {
+    case TYPE_BOOLEAN: {
+      impala_udf::BooleanVal v = expr.GetBooleanVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.bool_val = v.val;
+      return &result_.bool_val;
+    }
+    case TYPE_TINYINT: {
+      impala_udf::TinyIntVal v = expr.GetTinyIntVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.tinyint_val = v.val;
+      return &result_.tinyint_val;
+    }
+    case TYPE_SMALLINT: {
+      impala_udf::SmallIntVal v = expr.GetSmallIntVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.smallint_val = v.val;
+      return &result_.smallint_val;
+    }
+    case TYPE_INT: {
+      impala_udf::IntVal v = expr.GetIntVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.int_val = v.val;
+      return &result_.int_val;
+    }
+    case TYPE_BIGINT: {
+      impala_udf::BigIntVal v = expr.GetBigIntVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.bigint_val = v.val;
+      return &result_.bigint_val;
+    }
+    case TYPE_FLOAT: {
+      impala_udf::FloatVal v = expr.GetFloatVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.float_val = v.val;
+      return &result_.float_val;
+    }
+    case TYPE_DOUBLE: {
+      impala_udf::DoubleVal v = expr.GetDoubleVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.double_val = v.val;
+      return &result_.double_val;
+    }
+    case TYPE_STRING:
+    case TYPE_VARCHAR: {
+      impala_udf::StringVal v = expr.GetStringVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.string_val.ptr = reinterpret_cast<char*>(v.ptr);
+      result_.string_val.len = v.len;
+      return &result_.string_val;
+    }
+    case TYPE_CHAR: {
+      impala_udf::StringVal v = expr.GetStringVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.string_val.ptr = reinterpret_cast<char*>(v.ptr);
+      result_.string_val.len = v.len;
+      if (expr.type_.IsVarLenStringType()) {
+        return &result_.string_val;
+      } else {
+        return result_.string_val.ptr;
+      }
+    }
+    case TYPE_TIMESTAMP: {
+      impala_udf::TimestampVal v = expr.GetTimestampVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.timestamp_val = TimestampValue::FromTimestampVal(v);
+      return &result_.timestamp_val;
+    }
+    case TYPE_DECIMAL: {
+      DecimalVal v = expr.GetDecimalVal(this, row);
+      if (v.is_null) return nullptr;
+      switch (expr.type_.GetByteSize()) {
+        case 4:
+          result_.decimal4_val = v.val4;
+          return &result_.decimal4_val;
+        case 8:
+          result_.decimal8_val = v.val8;
+          return &result_.decimal8_val;
+        case 16:
+          result_.decimal16_val = v.val16;
+          return &result_.decimal16_val;
+        default:
+          DCHECK(false) << expr.type_.GetByteSize();
+          return nullptr;
+      }
+    }
+    case TYPE_ARRAY:
+    case TYPE_MAP: {
+      impala_udf::CollectionVal v = expr.GetCollectionVal(this, row);
+      if (v.is_null) return nullptr;
+      result_.collection_val.ptr = v.ptr;
+      result_.collection_val.num_tuples = v.num_tuples;
+      return &result_.collection_val;
+    }
+    default:
+      DCHECK(false) << "Type not implemented: " << expr.type_.DebugString();
+      return nullptr;
+  }
+}
+
+void ScalarExprEvaluator::PrintValue(const TupleRow* row, string* str) {
+  RawValue::PrintValue(GetValue(row), root_.type(), output_scale_, str);
+}
+
+void ScalarExprEvaluator::PrintValue(void* value, string* str) {
+  RawValue::PrintValue(value, root_.type(), output_scale_, str);
+}
+
+void ScalarExprEvaluator::PrintValue(void* value, stringstream* stream) {
+  RawValue::PrintValue(value, root_.type(), output_scale_, stream);
+}
+
+void ScalarExprEvaluator::PrintValue(const TupleRow* row, stringstream* 
stream) {
+  RawValue::PrintValue(GetValue(row), root_.type(), output_scale_, stream);
+}
+
+BooleanVal ScalarExprEvaluator::GetBooleanVal(TupleRow* row) {
+  return root_.GetBooleanVal(this, row);
+}
+
+TinyIntVal ScalarExprEvaluator::GetTinyIntVal(TupleRow* row) {
+  return root_.GetTinyIntVal(this, row);
+}
+
+SmallIntVal ScalarExprEvaluator::GetSmallIntVal(TupleRow* row) {
+  return root_.GetSmallIntVal(this, row);
+}
+
+IntVal ScalarExprEvaluator::GetIntVal(TupleRow* row) {
+  return root_.GetIntVal(this, row);
+}
+
+BigIntVal ScalarExprEvaluator::GetBigIntVal(TupleRow* row) {
+  return root_.GetBigIntVal(this, row);
+}
+
+FloatVal ScalarExprEvaluator::GetFloatVal(TupleRow* row) {
+  return root_.GetFloatVal(this, row);
+}
+
+DoubleVal ScalarExprEvaluator::GetDoubleVal(TupleRow* row) {
+  return root_.GetDoubleVal(this, row);
+}
+
+StringVal ScalarExprEvaluator::GetStringVal(TupleRow* row) {
+  return root_.GetStringVal(this, row);
+}
+
+CollectionVal ScalarExprEvaluator::GetCollectionVal(TupleRow* row) {
+  return root_.GetCollectionVal(this, row);
+}
+
+TimestampVal ScalarExprEvaluator::GetTimestampVal(TupleRow* row) {
+  return root_.GetTimestampVal(this, row);
+}
+
+DecimalVal ScalarExprEvaluator::GetDecimalVal(TupleRow* row) {
+  return root_.GetDecimalVal(this, row);
+}
+
+void ScalarExprEvaluator::InitBuiltinsDummy() {
+  // Call one function from each of the classes to pull all the symbols
+  // from that class in.
+  AggregateFunctions::InitNull(nullptr, nullptr);
+  BitByteFunctions::CountSet(nullptr, TinyIntVal::null());
+  CastFunctions::CastToBooleanVal(nullptr, TinyIntVal::null());
+  CompoundPredicate::Not(nullptr, BooleanVal::null());
+  ConditionalFunctions::NullIfZero(nullptr, TinyIntVal::null());
+  DecimalFunctions::Precision(nullptr, DecimalVal::null());
+  DecimalOperators::CastToDecimalVal(nullptr, DecimalVal::null());
+  InPredicate::InIterate(nullptr, BigIntVal::null(), 0, nullptr);
+  IsNullPredicate::IsNull(nullptr, BooleanVal::null());
+  LikePredicate::Like(nullptr, StringVal::null(), StringVal::null());
+  Operators::Add_IntVal_IntVal(nullptr, IntVal::null(), IntVal::null());
+  MathFunctions::Pi(nullptr);
+  StringFunctions::Length(nullptr, StringVal::null());
+  TimestampFunctions::Year(nullptr, TimestampVal::null());
+  TimestampFunctions::UnixAndFromUnixPrepare(nullptr, 
FunctionContext::FRAGMENT_LOCAL);
+  UdfBuiltins::Pi(nullptr);
+  UtilityFunctions::Pid(nullptr);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr-evaluator.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr-evaluator.h 
b/be/src/exprs/scalar-expr-evaluator.h
new file mode 100644
index 0000000..5792d14
--- /dev/null
+++ b/be/src/exprs/scalar-expr-evaluator.h
@@ -0,0 +1,258 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_EXPRS_SCALAR_EXPR_EVALUATOR_H
+#define IMPALA_EXPRS_SCALAR_EXPR_EVALUATOR_H
+
+#include <boost/scoped_ptr.hpp>
+
+#include "common/object-pool.h"
+#include "common/status.h"
+#include "exprs/expr-value.h"
+#include "udf/udf-internal.h" // for CollectionVal
+#include "udf/udf.h"
+
+namespace impala {
+
+using impala_udf::FunctionContext;
+using impala_udf::AnyVal;
+using impala_udf::BooleanVal;
+using impala_udf::TinyIntVal;
+using impala_udf::SmallIntVal;
+using impala_udf::IntVal;
+using impala_udf::BigIntVal;
+using impala_udf::FloatVal;
+using impala_udf::DoubleVal;
+using impala_udf::TimestampVal;
+using impala_udf::StringVal;
+using impala_udf::DecimalVal;
+using impala_udf::CollectionVal;
+
+class MemPool;
+class RuntimeState;
+class ScalarExpr;
+class Status;
+class TupleRow;
+
+/// ScalarExprEvaluator is the interface for evaluating a scalar expression. 
It holds a
+/// reference to the root of a ScalarExpr tree, runtime state (e.g. 
FunctionContexts)
+/// needed during evaluation and also a buffer for the expression evaluation 
result.
+/// A single evaluator is not thread-safe. It implements Get*Val() interfaces 
for every
+/// possible return type and drives the execution by calling the Get*Val() 
function of
+/// the root ScalarExpr with the input tuple row.
+///
+/// A ScalarExprEvaluator is created using the Create() interface. It must be 
initialized
+/// by calling Open() before use and Close() must also be called to free up 
resources
+/// owned by the evaluator.
+///
+/// FunctionContext is the interface for Impala to communicate with built-in 
functions,
+/// UDF and UDAF. It is passed to UDF/UDAF to store its thread-private states, 
propagate
+/// errors and allocate memory. An evaluator contains a vector of 
FunctionContext for
+/// the ScalarExpr nodes in the Expr tree. The index of each node's entry is 
defined in
+/// the its 'fn_ctx_idx_' field. The range in the vector for the 
sub-expression tree
+/// rooted at a node is defined by [fn_ctx_idx_start_, fn_ctx_idx_end_).
+///
+class ScalarExprEvaluator {
+ public:
+  ~ScalarExprEvaluator();
+
+  /// Creates an evaluator for the scalar expression tree rooted at 'expr' and 
all
+  /// FunctionContexts needed during evaluation. Allocations from this 
evaluator will
+  /// be from 'mem_pool'. The newly created evaluator will be stored in 'pool' 
and
+  /// returned in 'eval'. Returns error status on failure. Note that it's the
+  /// responsibility to call Close() on all created evaluators even if this 
function
+  /// returns error on initialization failure.
+  static Status Create(const ScalarExpr& expr, RuntimeState* state, 
ObjectPool* pool,
+      MemPool* mem_pool, ScalarExprEvaluator** eval) WARN_UNUSED_RESULT;
+
+  /// Convenience function for creating multiple ScalarExprEvaluators. The 
evaluators
+  /// are returned in 'evals'.
+  static Status Create(const std::vector<ScalarExpr*>& exprs, RuntimeState* 
state,
+      ObjectPool* pool, MemPool* mem_pool, std::vector<ScalarExprEvaluator*>* 
evals)
+      WARN_UNUSED_RESULT;
+
+  /// Initializes the ScalarExprEvaluator on all nodes in the ScalarExpr tree. 
This is
+  /// also the location in which constant arguments to functions are computed. 
Does not
+  /// need to be called on clones. Idempotent (this allows exprs to be opened 
multiple
+  /// times in subplans without reinitializing function states).
+  Status Open(RuntimeState* state) WARN_UNUSED_RESULT;
+
+  /// Convenience function for opening multiple ScalarExprEvaluators.
+  static Status Open(const std::vector<ScalarExprEvaluator*>& evals,
+      RuntimeState* state) WARN_UNUSED_RESULT;
+
+  /// Free resources held by this evaluator. Must be called on every 
ScalarExprEvaluator,
+  /// including clones. Has no effect if already closed.
+  void Close(RuntimeState* state);
+
+  /// Convenience function for closing multiple ScalarExprEvaluators.
+  static void Close(const std::vector<ScalarExprEvaluator*>& evals,
+      RuntimeState* state);
+
+  /// Creates a copy of this ScalarExprEvaluator. Open() must be called first. 
The copy
+  /// contains clones of each FunctionContext, which share the fragment-local 
state of the
+  /// original one but have their own FreePool and thread-local state. This 
should be used
+  /// to create an ScalarExprEvaluator for each execution thread that needs to 
evaluate
+  /// 'root_'. All allocations will be from 'mem_pool' so callers should use 
different
+  /// MemPool for evaluators in different threads. Note that clones are 
considered opened.
+  /// The cloned ScalarExprEvaluator cannot be used after the original 
ScalarExprEvaluator
+  /// is destroyed because it may reference fragment-local state from the 
original.
+  /// TODO: IMPALA-4743: Evaluate input arguments in ScalarExpr::Init() and 
store them
+  /// in ScalarExpr.
+  Status Clone(ObjectPool* pool, RuntimeState* state, MemPool* mem_pool,
+      ScalarExprEvaluator** new_eval) const WARN_UNUSED_RESULT;
+
+  /// Convenience functions for cloning multiple ScalarExprEvaluators. The 
newly
+  /// created evaluators are appended to 'new_evals.
+  static Status Clone(ObjectPool* pool, RuntimeState* state, MemPool* mem_pool,
+      const std::vector<ScalarExprEvaluator*>& evals,
+      std::vector<ScalarExprEvaluator*>* new_evals) WARN_UNUSED_RESULT;
+
+  /// If 'expr' is constant, evaluates it with no input row argument and 
returns the
+  /// result in 'const_val'. Sets 'const_val' to NULL if the argument is not 
constant.
+  /// The returned AnyVal and associated varlen data is owned by this 
evaluator. This
+  /// should only be called after Open() has been called on this expr. Returns 
an error
+  /// if there was an error evaluating the expression or if memory could not 
be allocated
+  /// for the expression result.
+  Status GetConstValue(
+      RuntimeState* state, const ScalarExpr& expr, AnyVal** const_val) 
WARN_UNUSED_RESULT;
+
+  /// Calls the appropriate Get*Val() function on 'e' and stores the result in 
result_.
+  /// This is used by ScalarExpr to call GetValue() on sub-expression, rather 
than root_.
+  void* GetValue(const ScalarExpr& e, const TupleRow* row);
+
+  /// Calls the appropriate Get*Val() function on this evaluator's root_ expr 
tree, stores
+  /// the result in 'result_' and returns a pointer to it.
+  void* GetValue(const TupleRow* row);
+
+  /// Evaluates the expression of this evaluator on tuple row 'row' and returns
+  /// the results. One function for each data type implemented.
+  BooleanVal GetBooleanVal(TupleRow* row);
+  TinyIntVal GetTinyIntVal(TupleRow* row);
+  SmallIntVal GetSmallIntVal(TupleRow* row);
+  IntVal GetIntVal(TupleRow* row);
+  BigIntVal GetBigIntVal(TupleRow* row);
+  FloatVal GetFloatVal(TupleRow* row);
+  DoubleVal GetDoubleVal(TupleRow* row);
+  StringVal GetStringVal(TupleRow* row);
+  CollectionVal GetCollectionVal(TupleRow* row);
+  TimestampVal GetTimestampVal(TupleRow* row);
+  DecimalVal GetDecimalVal(TupleRow* row);
+
+  /// Returns an error status if there was any error in evaluating the 
expression
+  /// or its sub-expressions. 'start_idx' and 'end_idx' correspond to the range
+  /// within the vector of FunctionContext for the sub-expressions of interest.
+  /// The default parameters correspond to the entire expr 'root_'.
+  Status GetError(int start_idx = 0, int end_idx = -1) const 
WARN_UNUSED_RESULT;
+
+  /// Convenience functions: print value into 'str' or 'stream'. NULL turns 
into "NULL".
+  /// The first two variants will evaluate the tuple row against 'root_'.
+  void PrintValue(const TupleRow* row, std::string* str);
+  void PrintValue(const TupleRow* row, std::stringstream* stream);
+  void PrintValue(void* value, std::string* str);
+  void PrintValue(void* value, std::stringstream* stream);
+
+  /// Returns true if any of the expression contexts in the array has local 
allocations.
+  static bool HasLocalAllocations(const std::vector<ScalarExprEvaluator*>& 
evals);
+  bool HasLocalAllocations() const;
+
+  /// Frees all local allocations made by fn_ctxs_. This can be called when 
result
+  /// data from this context is no longer needed.
+  void FreeLocalAllocations();
+  static void FreeLocalAllocations(const std::vector<ScalarExprEvaluator*>& 
evals);
+
+  /// Get the number of digits after the decimal that should be displayed for 
this value.
+  /// Returns -1 if no scale has been specified (currently the scale is only 
set for
+  /// doubles set by RoundUpTo). GetValue() must have already been called.
+  /// TODO: remove this (IMPALA-4720).
+  int output_scale() const { return output_scale_; }
+  const ScalarExpr& root() const { return root_; }
+  bool opened() const { return opened_; }
+  bool closed() const { return closed_; }
+  bool is_clone() const { return is_clone_; }
+  MemPool* mem_pool() const { return mem_pool_; }
+
+  /// The builtin functions are not called from anywhere in the code and the
+  /// symbols are therefore not included in the binary. We call these functions
+  /// by using dlsym. The compiler must think this function is callable to
+  /// not strip these symbols.
+  static void InitBuiltinsDummy();
+
+  static const char* LLVM_CLASS_NAME;
+
+ protected:
+  /// Users of fn_context();
+  friend class CaseExpr;
+  friend class HiveUdfCall;
+  friend class ScalarFnCall;
+
+  /// Retrieves a registered FunctionContext. 'i' is the 'fn_context_index_' 
of the
+  /// corresponding sub-expression in the Expr tree.
+  FunctionContext* fn_context(int i) {
+    DCHECK_GE(i, 0);
+    DCHECK_LT(i, fn_ctxs_.size());
+    return fn_ctxs_[i];
+  }
+
+ private:
+  friend class ScalarExpr;
+
+  /// FunctionContexts for nodes in this Expr tree. Created by this 
ScalarExprEvaluator
+  /// and live in the same object pool as this evaluator (i.e. same life span 
as the
+  /// evaluator).
+  std::vector<FunctionContext*> fn_ctxs_;
+
+  /// Array access to fn_ctxs_. Used by ScalarFnCall's codegend compute 
function
+  /// to access the correct FunctionContext.
+  FunctionContext** fn_ctxs_ptr_ = nullptr;
+
+  /// Pointer to the MemPool which all allocations (including fn_ctxs_') come 
from.
+  /// Owned by the exec node which owns this evaluator.
+  MemPool* mem_pool_;
+
+  /// The expr tree which this evaluator is for.
+  const ScalarExpr& root_;
+
+  /// Stores the result of evaluation for this expr tree (or any 
sub-expression).
+  /// This is used in interpreted path when we need to return a void*.
+  ExprValue result_;
+
+  /// True if this evaluator came from a Clone() call. Used to manage 
FunctionStateScope.
+  bool is_clone_ = false;
+
+  /// Variables keeping track of current state.
+  bool initialized_ = false;
+  bool opened_ = false;
+  bool closed_ = false;
+
+  /// The number of digits after the decimal that should be displayed for this 
value.
+  /// -1 if no scale has been specified (currently the scale is only set for 
doubles
+  /// set by RoundUpTo). This value relies on FunctionContext to be allocated 
first
+  /// before it's derived so it lives in the evaluator instead of Expr.
+  /// TODO: move this to Expr initialization after IMPALA-4743 is fixed.
+  int output_scale_ = -1;
+
+  ScalarExprEvaluator(const ScalarExpr& root, MemPool* mem_pool);
+
+  /// Walks the expression tree 'expr' and fills in 'fn_ctxs_' for all Expr 
nodes
+  /// which need FunctionContext.
+  void CreateFnCtxs(RuntimeState* state, const ScalarExpr& expr);
+};
+
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr-ir.cc b/be/src/exprs/scalar-expr-ir.cc
new file mode 100644
index 0000000..051cc4e
--- /dev/null
+++ b/be/src/exprs/scalar-expr-ir.cc
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/scalar-expr.h"
+#include "udf/udf.h"
+
+#ifdef IR_COMPILE
+
+/// Compile ScalarExprEvaluator declaration to IR so we can use it in 
codegen'd functions
+#include "exprs/scalar-expr-evaluator.h"
+
+/// Dummy function to force compilation of UDF types.
+/// The arguments are pointers to prevent Clang from lowering the struct types
+/// (e.g. IntVal={bool, i32} can be coerced to i64).
+void dummy(impala_udf::FunctionContext*, impala_udf::BooleanVal*, 
impala_udf::TinyIntVal*,
+    impala_udf::SmallIntVal*, impala_udf::IntVal*, impala_udf::BigIntVal*,
+    impala_udf::FloatVal*, impala_udf::DoubleVal*, impala_udf::StringVal*,
+    impala_udf::TimestampVal*, impala_udf::DecimalVal*, 
impala::ScalarExprEvaluator*) { }
+#endif
+
+/// The following are compute functions that are cross-compiled to both native 
and IR
+/// libraries. In the interpreted path, these functions are executed as-is 
from the native
+/// code. In the codegen'd path, we load the IR functions and replace the 
Get*Val() calls
+/// with the appropriate child's codegen'd compute function.
+
+using namespace impala;
+using namespace impala_udf;
+
+/// Static wrappers around Get*Val() functions. We'd like to be able to call 
these from
+/// directly from native code as well as from generated IR functions.
+BooleanVal ScalarExpr::GetBooleanVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetBooleanVal(eval, row);
+}
+
+TinyIntVal ScalarExpr::GetTinyIntVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetTinyIntVal(eval, row);
+}
+
+SmallIntVal ScalarExpr::GetSmallIntVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetSmallIntVal(eval, row);
+}
+
+IntVal ScalarExpr::GetIntVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetIntVal(eval, row);
+}
+
+BigIntVal ScalarExpr::GetBigIntVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetBigIntVal(eval, row);
+}
+
+FloatVal ScalarExpr::GetFloatVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetFloatVal(eval, row);
+}
+
+DoubleVal ScalarExpr::GetDoubleVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetDoubleVal(eval, row);
+}
+
+StringVal ScalarExpr::GetStringVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetStringVal(eval, row);
+}
+
+TimestampVal ScalarExpr::GetTimestampVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetTimestampVal(eval, row);
+}
+
+DecimalVal ScalarExpr::GetDecimalVal(
+    ScalarExpr* expr, ScalarExprEvaluator* eval, const TupleRow* row) {
+  return expr->GetDecimalVal(eval, row);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr.cc b/be/src/exprs/scalar-expr.cc
new file mode 100644
index 0000000..956e188
--- /dev/null
+++ b/be/src/exprs/scalar-expr.cc
@@ -0,0 +1,480 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/scalar-expr.h"
+
+#include <sstream>
+#include <thrift/protocol/TDebugProtocol.h>
+
+#include "codegen/codegen-anyval.h"
+#include "codegen/llvm-codegen.h"
+#include "common/object-pool.h"
+#include "common/status.h"
+#include "exprs/case-expr.h"
+#include "exprs/compound-predicates.h"
+#include "exprs/conditional-functions.h"
+#include "exprs/hive-udf-call.h"
+#include "exprs/in-predicate.h"
+#include "exprs/is-not-empty-predicate.h"
+#include "exprs/is-null-predicate.h"
+#include "exprs/kudu-partition-expr.h"
+#include "exprs/like-predicate.h"
+#include "exprs/literal.h"
+#include "exprs/null-literal.h"
+#include "exprs/operators.h"
+#include "exprs/scalar-expr-evaluator.h"
+#include "exprs/scalar-fn-call.h"
+#include "exprs/slot-ref.h"
+#include "exprs/string-functions.h"
+#include "exprs/timestamp-functions.h"
+#include "exprs/tuple-is-null-predicate.h"
+#include "exprs/udf-builtins.h"
+#include "exprs/utility-functions.h"
+#include "runtime/runtime-state.h"
+#include "runtime/tuple-row.h"
+#include "runtime/tuple.h"
+#include "udf/udf-internal.h"
+#include "udf/udf.h"
+
+#include "gen-cpp/Exprs_types.h"
+#include "gen-cpp/ImpalaService_types.h"
+
+#include "common/names.h"
+
+using namespace impala_udf;
+using namespace llvm;
+
+namespace impala {
+
+const char* ScalarExpr::LLVM_CLASS_NAME = "class.impala::ScalarExpr";
+
+ScalarExpr::ScalarExpr(const ColumnType& type, bool is_constant)
+  : Expr(type),
+    is_constant_(is_constant) {
+}
+
+ScalarExpr::ScalarExpr(const TExprNode& node)
+  : Expr(node),
+    is_constant_(node.is_constant) {
+  if (node.__isset.fn) fn_ = node.fn;
+}
+
+Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc,
+    RuntimeState* state, ObjectPool* pool, ScalarExpr** scalar_expr) {
+  *scalar_expr = nullptr;
+  ScalarExpr* root;
+  RETURN_IF_ERROR(CreateNode(texpr.nodes[0], pool, &root));
+  RETURN_IF_ERROR(Expr::CreateTree(texpr, pool, root));
+  Status status = root->Init(row_desc, state);
+  if (UNLIKELY(!status.ok())) {
+    root->Close();
+    return status;
+  }
+  int fn_ctx_idx = 0;
+  root->AssignFnCtxIdx(&fn_ctx_idx);
+  *scalar_expr = root;
+  return Status::OK();
+}
+
+Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& 
row_desc,
+    RuntimeState* state, ObjectPool* pool, vector<ScalarExpr*>* exprs) {
+  exprs->clear();
+  for (const TExpr& texpr: texprs) {
+    ScalarExpr* expr;
+    RETURN_IF_ERROR(Create(texpr, row_desc, state, pool, &expr));
+    DCHECK(expr != nullptr);
+    exprs->push_back(expr);
+  }
+  return Status::OK();
+}
+
+Status ScalarExpr::Create(const TExpr& texpr, const RowDescriptor& row_desc,
+    RuntimeState* state, ScalarExpr** scalar_expr) {
+  return ScalarExpr::Create(texpr, row_desc, state, state->obj_pool(), 
scalar_expr);
+}
+
+Status ScalarExpr::Create(const vector<TExpr>& texprs, const RowDescriptor& 
row_desc,
+    RuntimeState* state, vector<ScalarExpr*>* exprs) {
+  return ScalarExpr::Create(texprs, row_desc, state, state->obj_pool(), exprs);
+}
+
+void ScalarExpr::AssignFnCtxIdx(int* next_fn_ctx_idx) {
+  fn_ctx_idx_start_ = *next_fn_ctx_idx;
+  if (HasFnCtx()) {
+    fn_ctx_idx_ = *next_fn_ctx_idx;
+    ++(*next_fn_ctx_idx);
+  }
+  for (ScalarExpr* child : children()) child->AssignFnCtxIdx(next_fn_ctx_idx);
+  fn_ctx_idx_end_ = *next_fn_ctx_idx;
+}
+
+Status ScalarExpr::CreateNode(
+    const TExprNode& texpr_node, ObjectPool* pool, ScalarExpr** expr) {
+  switch (texpr_node.node_type) {
+    case TExprNodeType::BOOL_LITERAL:
+    case TExprNodeType::FLOAT_LITERAL:
+    case TExprNodeType::INT_LITERAL:
+    case TExprNodeType::STRING_LITERAL:
+    case TExprNodeType::DECIMAL_LITERAL:
+    case TExprNodeType::TIMESTAMP_LITERAL:
+      *expr = pool->Add(new Literal(texpr_node));
+      return Status::OK();
+    case TExprNodeType::CASE_EXPR:
+      if (!texpr_node.__isset.case_expr) {
+        return Status("Case expression not set in thrift node");
+      }
+      *expr = pool->Add(new CaseExpr(texpr_node));
+      return Status::OK();
+    case TExprNodeType::COMPOUND_PRED:
+      if (texpr_node.fn.name.function_name == "and") {
+        *expr = pool->Add(new AndPredicate(texpr_node));
+      } else if (texpr_node.fn.name.function_name == "or") {
+        *expr = pool->Add(new OrPredicate(texpr_node));
+      } else {
+        DCHECK_EQ(texpr_node.fn.name.function_name, "not");
+        *expr = pool->Add(new ScalarFnCall(texpr_node));
+      }
+      return Status::OK();
+    case TExprNodeType::NULL_LITERAL:
+      *expr = pool->Add(new NullLiteral(texpr_node));
+      return Status::OK();
+    case TExprNodeType::SLOT_REF:
+      if (!texpr_node.__isset.slot_ref) {
+        return Status("Slot reference not set in thrift node");
+      }
+      *expr = pool->Add(new SlotRef(texpr_node));
+      return Status::OK();
+    case TExprNodeType::TUPLE_IS_NULL_PRED:
+      *expr = pool->Add(new TupleIsNullPredicate(texpr_node));
+      return Status::OK();
+    case TExprNodeType::FUNCTION_CALL:
+      if (!texpr_node.__isset.fn) {
+        return Status("Function not set in thrift node");
+      }
+      // Special-case functions that have their own Expr classes
+      // TODO: is there a better way to do this?
+      if (texpr_node.fn.name.function_name == "if") {
+        *expr = pool->Add(new IfExpr(texpr_node));
+      } else if (texpr_node.fn.name.function_name == "nullif") {
+        *expr = pool->Add(new NullIfExpr(texpr_node));
+      } else if (texpr_node.fn.name.function_name == "isnull" ||
+                 texpr_node.fn.name.function_name == "ifnull" ||
+                 texpr_node.fn.name.function_name == "nvl") {
+        *expr = pool->Add(new IsNullExpr(texpr_node));
+      } else if (texpr_node.fn.name.function_name == "coalesce") {
+        *expr = pool->Add(new CoalesceExpr(texpr_node));
+      } else if (texpr_node.fn.binary_type == TFunctionBinaryType::JAVA) {
+        *expr = pool->Add(new HiveUdfCall(texpr_node));
+      } else {
+        *expr = pool->Add(new ScalarFnCall(texpr_node));
+      }
+      return Status::OK();
+    case TExprNodeType::IS_NOT_EMPTY_PRED:
+      *expr = pool->Add(new IsNotEmptyPredicate(texpr_node));
+      return Status::OK();
+    case TExprNodeType::KUDU_PARTITION_EXPR:
+      *expr = pool->Add(new KuduPartitionExpr(texpr_node));
+      return Status::OK();
+    default:
+      *expr = nullptr;
+      stringstream os;
+      os << "Unknown expr node type: " << texpr_node.node_type;
+      return Status(os.str());
+  }
+}
+
+Status ScalarExpr::OpenEvaluator(FunctionContext::FunctionStateScope scope,
+    RuntimeState* state, ScalarExprEvaluator* eval) const {
+  for (int i = 0; i < children_.size(); ++i) {
+    RETURN_IF_ERROR(children_[i]->OpenEvaluator(scope, state, eval));
+  }
+  return Status::OK();
+}
+
+void ScalarExpr::CloseEvaluator(FunctionContext::FunctionStateScope scope,
+    RuntimeState* state, ScalarExprEvaluator* eval) const {
+  for (ScalarExpr* child : children_) child->CloseEvaluator(scope, state, 
eval);
+}
+
+void ScalarExpr::Close() {
+  Expr::Close();
+}
+
+void ScalarExpr::Close(const vector<ScalarExpr*>& exprs) {
+  for (ScalarExpr* expr : exprs) expr->Close();
+}
+
+struct MemLayoutData {
+  int expr_idx;
+  int byte_size;
+  bool variable_length;
+  int alignment;
+
+  // TODO: sort by type as well?  Any reason to do this?
+  // TODO: would sorting in reverse order of size be faster due to better 
packing?
+  // TODO: why put var-len at end?
+  bool operator<(const MemLayoutData& rhs) const {
+    // variable_len go at end
+    if (this->variable_length && !rhs.variable_length) return false;
+    if (!this->variable_length && rhs.variable_length) return true;
+    return this->byte_size < rhs.byte_size;
+  }
+};
+
+int ScalarExpr::ComputeResultsLayout(const vector<ScalarExpr*>& exprs,
+    vector<int>* offsets, int* var_result_begin) {
+  if (exprs.size() == 0) {
+    *var_result_begin = -1;
+    return 0;
+  }
+
+  // Don't align more than word (8-byte) size. There's no performance gain 
beyond 8-byte
+  // alignment, and there is a performance gain to keeping the results buffer 
small. This
+  // is consistent with what compilers do.
+  int MAX_ALIGNMENT = sizeof(int64_t);
+
+  vector<MemLayoutData> data;
+  data.resize(exprs.size());
+
+  // Collect all the byte sizes and sort them
+  for (int i = 0; i < exprs.size(); ++i) {
+    DCHECK(!exprs[i]->type().IsComplexType()) << "NYI";
+    data[i].expr_idx = i;
+    data[i].byte_size = exprs[i]->type().GetSlotSize();
+    DCHECK_GT(data[i].byte_size, 0);
+    data[i].variable_length = exprs[i]->type().IsVarLenStringType();
+
+    bool fixed_len_char = exprs[i]->type().type == TYPE_CHAR && 
!data[i].variable_length;
+
+    // Compute the alignment of this value. Values should be self-aligned for 
optimal
+    // memory access speed, up to the max alignment (e.g., if this value is an 
int32_t,
+    // its offset in the buffer should be divisible by sizeof(int32_t)).
+    // TODO: is self-alignment really necessary for perf?
+    if (!fixed_len_char) {
+      data[i].alignment = min(data[i].byte_size, MAX_ALIGNMENT);
+    } else {
+      // Fixed-len chars are aligned to a one-byte boundary, as if they were 
char[],
+      // leaving no padding between them and the previous value.
+      data[i].alignment = 1;
+    }
+  }
+
+  sort(data.begin(), data.end());
+
+  // Walk the types and store in a packed aligned layout
+  int byte_offset = 0;
+
+  offsets->resize(exprs.size());
+  *var_result_begin = -1;
+
+  for (int i = 0; i < data.size(); ++i) {
+    // Increase byte_offset so data[i] is at the right alignment (i.e. add 
padding between
+    // this value and the previous).
+    byte_offset = BitUtil::RoundUp(byte_offset, data[i].alignment);
+
+    (*offsets)[data[i].expr_idx] = byte_offset;
+    if (data[i].variable_length && *var_result_begin == -1) {
+      *var_result_begin = byte_offset;
+    }
+    DCHECK(!(i == 0 && byte_offset > 0)) << "first value should be at start of 
layout";
+    byte_offset += data[i].byte_size;
+  }
+
+  return byte_offset;
+}
+
+Status ScalarExpr::Init(const RowDescriptor& row_desc, RuntimeState* state) {
+  DCHECK(type_.type != INVALID_TYPE);
+  for (int i = 0; i < children_.size(); ++i) {
+    RETURN_IF_ERROR(children_[i]->Init(row_desc, state));
+  }
+  return Status::OK();
+}
+
+string ScalarExpr::DebugString() const {
+  // TODO: implement partial debug string for member vars
+  stringstream out;
+  out << " type=" << type_.DebugString();
+  if (!children_.empty()) {
+    out << " children=" << DebugString(children_);
+  }
+  return out.str();
+}
+
+string ScalarExpr::DebugString(const vector<ScalarExpr*>& exprs) {
+  stringstream out;
+  out << "[";
+  for (int i = 0; i < exprs.size(); ++i) {
+    out << (i == 0 ? "" : " ") << exprs[i]->DebugString();
+  }
+  out << "]";
+  return out.str();
+}
+
+int ScalarExpr::GetSlotIds(vector<SlotId>* slot_ids) const {
+  int n = 0;
+  for (int i = 0; i < children_.size(); ++i) {
+    n += children_[i]->GetSlotIds(slot_ids);
+  }
+  return n;
+}
+
+Function* ScalarExpr::GetStaticGetValWrapper(ColumnType type, LlvmCodeGen* 
codegen) {
+  switch (type.type) {
+    case TYPE_BOOLEAN:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BOOLEAN_VAL, 
false);
+    case TYPE_TINYINT:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TINYINT_VAL, 
false);
+    case TYPE_SMALLINT:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_SMALLINT_VAL, 
false);
+    case TYPE_INT:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_INT_VAL, false);
+    case TYPE_BIGINT:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_BIGINT_VAL, 
false);
+    case TYPE_FLOAT:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_FLOAT_VAL, 
false);
+    case TYPE_DOUBLE:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DOUBLE_VAL, 
false);
+    case TYPE_STRING:
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_STRING_VAL, 
false);
+    case TYPE_TIMESTAMP:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_TIMESTAMP_VAL, 
false);
+    case TYPE_DECIMAL:
+      return codegen->GetFunction(IRFunction::SCALAR_EXPR_GET_DECIMAL_VAL, 
false);
+    default:
+      DCHECK(false) << "Invalid type: " << type.DebugString();
+      return NULL;
+  }
+}
+
+Function* ScalarExpr::CreateIrFunctionPrototype(const string& name,
+    LlvmCodeGen* codegen, Value* (*args)[2]) {
+  Type* return_type = CodegenAnyVal::GetLoweredType(codegen, type());
+  LlvmCodeGen::FnPrototype prototype(codegen, name, return_type);
+  prototype.AddArgument(
+      LlvmCodeGen::NamedVariable(
+          "eval", codegen->GetPtrType(ScalarExprEvaluator::LLVM_CLASS_NAME)));
+  prototype.AddArgument(
+      LlvmCodeGen::NamedVariable("row", 
codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME)));
+  Function* function = prototype.GeneratePrototype(NULL, args[0]);
+  DCHECK(function != NULL);
+  return function;
+}
+
+Status ScalarExpr::GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, 
Function** fn) {
+  if (ir_compute_fn_ != nullptr) {
+    *fn = ir_compute_fn_;
+    return Status::OK();
+  }
+  Function* static_getval_fn = GetStaticGetValWrapper(type(), codegen);
+
+  // Call it passing this as the additional first argument.
+  Value* args[2];
+  ir_compute_fn_ = CreateIrFunctionPrototype("CodegenComputeFnWrapper", 
codegen, &args);
+  BasicBlock* entry_block =
+      BasicBlock::Create(codegen->context(), "entry", ir_compute_fn_);
+  LlvmBuilder builder(entry_block);
+  Value* this_ptr =
+      
codegen->CastPtrToLlvmPtr(codegen->GetPtrType(ScalarExpr::LLVM_CLASS_NAME), 
this);
+  Value* compute_fn_args[] = {this_ptr, args[0], args[1]};
+  Value* ret = CodegenAnyVal::CreateCall(
+      codegen, &builder, static_getval_fn, compute_fn_args, "ret");
+  builder.CreateRet(ret);
+  *fn = codegen->FinalizeFunction(ir_compute_fn_);
+  if (UNLIKELY(*fn == nullptr)) {
+    return Status(TErrorCode::IR_VERIFY_FAILED, "CodegendComputeFnWrapper");
+  }
+  ir_compute_fn_ = *fn;
+  return Status::OK();
+}
+
+// At least one of these should always be overridden.
+BooleanVal ScalarExpr::GetBooleanVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return BooleanVal::null();
+}
+
+TinyIntVal ScalarExpr::GetTinyIntVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return TinyIntVal::null();
+}
+
+SmallIntVal ScalarExpr::GetSmallIntVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return SmallIntVal::null();
+}
+
+IntVal ScalarExpr::GetIntVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return IntVal::null();
+}
+
+BigIntVal ScalarExpr::GetBigIntVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return BigIntVal::null();
+}
+
+FloatVal ScalarExpr::GetFloatVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return FloatVal::null();
+}
+
+DoubleVal ScalarExpr::GetDoubleVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return DoubleVal::null();
+}
+
+StringVal ScalarExpr::GetStringVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return StringVal::null();
+}
+
+CollectionVal ScalarExpr::GetCollectionVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return CollectionVal::null();
+}
+
+TimestampVal ScalarExpr::GetTimestampVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return TimestampVal::null();
+}
+
+DecimalVal ScalarExpr::GetDecimalVal(
+    ScalarExprEvaluator* eval, const TupleRow* row) const {
+  DCHECK(false) << DebugString();
+  return DecimalVal::null();
+}
+
+string ScalarExpr::DebugString(const string& expr_name) const {
+  stringstream out;
+  out << expr_name << "(" << ScalarExpr::DebugString() << ")";
+  return out.str();
+}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b38d9826/be/src/exprs/scalar-expr.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr.h b/be/src/exprs/scalar-expr.h
new file mode 100644
index 0000000..032ac94
--- /dev/null
+++ b/be/src/exprs/scalar-expr.h
@@ -0,0 +1,338 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#ifndef IMPALA_EXPRS_SCALAR_EXPR_H
+#define IMPALA_EXPRS_SCALAR_EXPR_H
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <boost/scoped_ptr.hpp>
+
+#include "common/global-types.h"
+#include "common/status.h"
+#include "exprs/expr.h"
+#include "impala-ir/impala-ir-functions.h"
+#include "runtime/types.h"
+#include "udf/udf-internal.h" // for CollectionVal
+#include "udf/udf.h"
+
+namespace llvm {
+  class BasicBlock;
+  class Function;
+  class Type;
+  class Value;
+};
+
+namespace impala {
+
+using impala_udf::FunctionContext;
+using impala_udf::AnyVal;
+using impala_udf::BooleanVal;
+using impala_udf::TinyIntVal;
+using impala_udf::SmallIntVal;
+using impala_udf::IntVal;
+using impala_udf::BigIntVal;
+using impala_udf::FloatVal;
+using impala_udf::DoubleVal;
+using impala_udf::TimestampVal;
+using impala_udf::StringVal;
+using impala_udf::DecimalVal;
+using impala_udf::CollectionVal;
+
+class LibCacheEntry;
+class LlvmCodeGen;
+class MemTracker;
+class ObjectPool;
+class RowDescriptor;
+class RuntimeState;
+class ScalarExprEvaluator;
+class SlotDescriptor;
+class TColumnValue;
+class TExpr;
+class TExprNode;
+class Tuple;
+class TupleRow;
+
+/// --- ScalarExpr overview
+///
+/// ScalarExpr is an expression which returns a value for each input tuple row.
+/// Examples include built-in functions such as abs(), UDF, case-expr and 
literal
+/// such as a string "foobar". It's a subclass of Expr which represents an 
expression
+/// as a tree.
+///
+/// --- Implementation:
+///
+/// ScalarExpr implements compute function, which given a row, performs the 
computation
+/// of an expr and produces scalar result. This function evaluates the 
necessary child
+/// arguments by calling their compute functions, then performs whatever 
computation is
+/// necessary on the arguments to generate the result. These compute functions 
have
+/// signature Get*Val(ScalarExprEvaluator*, const TupleRow*). One is 
implemented for each
+/// possible return type it supports (e.g. GetBooleanVal(), GetStringVal(), 
etc). The
+/// return type is a subclass of AnyVal (e.g. StringVal). One or more of these 
compute
+/// functions must be overridden by subclasses of ScalarExpr.
+///
+/// ScalarExpr contains query compile-time information about an expression 
(e.g.
+/// sub-expressions implicitly encoded in the tree structure) and the LLVM IR 
compute
+/// functions. ScalarExprEvaluator is the interface for evaluating a scalar 
expression
+/// against an input TupleRow.
+///
+/// ScalarExpr's compute functions are codegend to replace calls to the 
generic compute
+/// function of child expressions with the exact compute functions based on 
the return
+/// types of the child expressions known at runtime. Subclasses should override
+/// GetCodegendComputeFn() to either generate custom IR compute functions 
using IRBuilder,
+/// which inline calls to child expressions' compute functions, or simply call
+/// GetCodegendComputeFnWrapper() to generate a wrapper function to call the 
interpreted
+/// compute function. Note that we do not need a separate 
GetCodegendComputeFn() for each
+/// type.
+///
+/// TODO: Fix subclasses which call GetCodegendComputeFnWrapper() to not call 
interpreted
+/// functions.
+///
+class ScalarExpr : public Expr {
+ public:
+  /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created 
ScalarExpr
+  /// is stored in ObjectPool 'pool' and returned in 'expr' on success. 
'row_desc' is the
+  /// tuple row descriptor of the input tuple row. On failure, 'expr' is set 
to NULL and
+  /// the expr tree (if created) will be closed. Error status will be returned 
too.
+  static Status Create(const TExpr& texpr, const RowDescriptor& row_desc,
+      RuntimeState* state, ObjectPool* pool, ScalarExpr** expr) 
WARN_UNUSED_RESULT;
+
+  /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created 
ScalarExpr
+  /// is stored in ObjectPool 'state->obj_pool()' and returned in 'expr'. 
'row_desc' is
+  /// the tuple row descriptor of the input tuple row. Returns error status on 
failure.
+  static Status Create(const TExpr& texpr, const RowDescriptor& row_desc,
+      RuntimeState* state, ScalarExpr** expr) WARN_UNUSED_RESULT;
+
+  /// Convenience functions creating multiple ScalarExpr.
+  static Status Create(const std::vector<TExpr>& texprs, const RowDescriptor& 
row_desc,
+      RuntimeState* state, ObjectPool* pool, std::vector<ScalarExpr*>* exprs)
+      WARN_UNUSED_RESULT;
+
+  /// Convenience functions creating multiple ScalarExpr.
+  static Status Create(const std::vector<TExpr>& texprs, const RowDescriptor& 
row_desc,
+      RuntimeState* state, std::vector<ScalarExpr*>* exprs) WARN_UNUSED_RESULT;
+
+  /// Returns true if this expression is a SlotRef. Overridden by SlotRef.
+  virtual bool IsSlotRef() const { return false; }
+
+  /// Returns true if this is a literal expression. Overridden by Literal.
+  virtual bool IsLiteral() const { return false; }
+
+  /// Returns true if this expr uses a FunctionContext to track its runtime 
state.
+  /// Overridden by exprs which use FunctionContext.
+  virtual bool HasFnCtx() const { return false; }
+
+  /// Returns true if this expr should be treated as a constant expression.
+  bool is_constant() const { return is_constant_; }
+
+  /// Returns the number of SlotRef nodes in the expr tree. If 'slot_ids' is 
non-null,
+  /// add the slot ids to it. Overridden by SlotRef.
+  virtual int GetSlotIds(std::vector<SlotId>* slot_ids = nullptr) const;
+
+  /// Returns an llvm::Function* with signature:
+  /// <subclass of AnyVal> ComputeFn(ScalarExprEvaluator*, const TupleRow*)
+  //
+  /// The function should evaluate this expr over 'row' and return the result 
as the
+  /// appropriate type of AnyVal. Returns error status on failure.
+  virtual Status GetCodegendComputeFn(
+      LlvmCodeGen* codegen, llvm::Function** fn) WARN_UNUSED_RESULT = 0;
+
+  /// Simple debug string that provides no expr subclass-specific information
+  virtual std::string DebugString() const;
+  static std::string DebugString(const std::vector<ScalarExpr*>& exprs);
+  std::string DebugString(const std::string& expr_name) const;
+
+  /// Computes a memory efficient layout for storing the results of evaluating 
'exprs'.
+  /// The results are assumed to be void* slot types (vs AnyVal types). Varlen 
data is
+  /// not included (e.g. there will be space for a StringValue, but not the 
data
+  /// referenced by it).
+  ///
+  /// Returns the number of bytes necessary to store all the results and 
offsets
+  /// where the result for each expr should be stored.
+  ///
+  /// Variable length types are guaranteed to be at the end and 
'var_result_begin'
+  /// will be set the beginning byte offset where variable length results 
begin.
+  /// 'var_result_begin' will be set to -1 if there are no variable len types.
+  static int ComputeResultsLayout(const vector<ScalarExpr*>& exprs, 
vector<int>* offsets,
+      int* var_result_begin);
+
+  /// Releases cache entries to libCache for all nodes in the ScalarExpr tree.
+  virtual void Close();
+
+  /// Convenience functions for closing a list of ScalarExpr.
+  static void Close(const std::vector<ScalarExpr*>& exprs);
+
+  static const char* LLVM_CLASS_NAME;
+
+ protected:
+  friend class Expr;
+  friend class AggFn;
+  friend class AggFnEvaluator;
+  friend class AndPredicate;
+  friend class CaseExpr;
+  friend class CoalesceExpr;
+  friend class ConditionalFunctions;
+  friend class CompoundPredicate;
+  friend class DecimalFunctions;
+  friend class DecimalOperators;
+  friend class HiveUdfCall;
+  friend class IfExpr;
+  friend class InPredicate;
+  friend class IsNotEmptyPredicate;
+  friend class IsNullExpr;
+  friend class KuduPartitionExpr;
+  friend class Literal;
+  friend class NullIfExpr;
+  friend class NullLiteral;
+  friend class OrPredicate;
+  friend class Predicate;
+  friend class ScalarExprEvaluator;
+  friend class ScalarFnCall;
+
+  /// For BE tests
+  friend class ExprTest;
+  friend class ExprCodegenTest;
+  friend class HashTableTest;
+  friend class OldHashTableTest;
+
+  /// Cached LLVM IR for the compute function. Set this in 
GetCodegendComputeFn().
+  llvm::Function* ir_compute_fn_ = nullptr;
+
+  /// Assigns indices into the FunctionContext vector 'fn_ctxs_' in an 
evaluator to
+  /// nodes which need FunctionContext in the tree. 'next_fn_ctx_idx' is the 
index
+  /// of the next available entry in the vector. It's updated as this function 
is
+  /// called recursively down the tree.
+  void AssignFnCtxIdx(int* next_fn_ctx_idx);
+
+  int fn_ctx_idx() const { return fn_ctx_idx_; }
+
+  /// Creates a single ScalarExpr node based on 'texpr_node' and returns it
+  /// in 'expr'. Return error status on failure.
+  static Status CreateNode(const TExprNode& texpr_node, ObjectPool* pool,
+      ScalarExpr** expr) WARN_UNUSED_RESULT;
+
+  ScalarExpr(const ColumnType& type, bool is_constant);
+  ScalarExpr(const TExprNode& node);
+
+  /// Virtual compute functions for each return type. Each subclass should 
override
+  /// the functions for the return type(s) it supports. For example, a boolean 
function
+  /// will only override GetBooleanVal(). Some Exprs, like Literal, have many 
possible
+  /// return types and will override multiple Get*Val() functions. These 
functions should
+  /// be called by other ScalarExpr and ScalarExprEvaluator only.
+  virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) 
const;
+  virtual TinyIntVal GetTinyIntVal(ScalarExprEvaluator*, const TupleRow*) 
const;
+  virtual SmallIntVal GetSmallIntVal(ScalarExprEvaluator*, const TupleRow*) 
const;
+  virtual IntVal GetIntVal(ScalarExprEvaluator*, const TupleRow*) const;
+  virtual BigIntVal GetBigIntVal(ScalarExprEvaluator*, const TupleRow*) const;
+  virtual FloatVal GetFloatVal(ScalarExprEvaluator*, const TupleRow*) const;
+  virtual DoubleVal GetDoubleVal(ScalarExprEvaluator*, const TupleRow*) const;
+  virtual StringVal GetStringVal(ScalarExprEvaluator*, const TupleRow*) const;
+  virtual CollectionVal GetCollectionVal(ScalarExprEvaluator*, const 
TupleRow*) const;
+  virtual TimestampVal GetTimestampVal(ScalarExprEvaluator*, const TupleRow*) 
const;
+  virtual DecimalVal GetDecimalVal(ScalarExprEvaluator*, const TupleRow*) 
const;
+
+  /// Initializes all nodes in the expr tree. Subclasses overriding this 
function should
+  /// call ScalarExpr::Init() to recursively call Init() on the expr tree.
+  virtual Status Init(const RowDescriptor& row_desc, RuntimeState* state)
+      WARN_UNUSED_RESULT;
+
+  /// Initializes 'eval' for execution. If scope if FRAGMENT_LOCAL, both
+  /// fragment-local and thread-local states should be initialized. If scope is
+  /// THREAD_LOCAL, only thread-local states should be initialized. 
THREAD_LOCAL
+  /// scope is used for cloned evaluator.
+  ///
+  /// Subclasses overriding this function should call 
ScalarExpr::OpenEvaluator() to
+  /// recursively call OpenEvaluator() on all nodes in the ScalarExpr tree.
+  virtual Status OpenEvaluator(FunctionContext::FunctionStateScope scope,
+      RuntimeState* state, ScalarExprEvaluator* eval) const
+      WARN_UNUSED_RESULT;
+
+  /// Free resources held by the 'eval' allocated during OpenEvaluator().
+  /// If scope is FRAGMENT_LOCAL, both fragment-local and thread-local states 
should be
+  /// torn down. If scope is THREAD_LOCAL, only thread-local state should be 
torn down.
+  ///
+  /// Subclasses overriding this function should call 
ScalarExpr::CloseEvaluator() to
+  /// recursively call CloseEvaluator() on all nodes in the ScalarExpr tree.
+  virtual void CloseEvaluator(FunctionContext::FunctionStateScope scope,
+      RuntimeState* state, ScalarExprEvaluator* eval) const;
+
+  /// Computes the size of the varargs buffer in bytes (0 bytes if no varargs).
+  /// Overridden by ScalarFnCall.
+  virtual int ComputeVarArgsBufferSize() const { return 0; }
+
+  /// Helper function to create an empty llvm::Function* with the signature:
+  /// *Val name(ScalarExprEvaluator*, TupleRow*);
+  ///
+  /// 'name' is the name of the returned llvm::Function*. The arguments to the 
IR function
+  /// are returned in 'args'. The return type is determined by the return type 
of the expr
+  /// tree.
+  llvm::Function* CreateIrFunctionPrototype(const std::string& name, 
LlvmCodeGen* codegen,
+      llvm::Value* (*args)[2]);
+
+  /// Generates an IR compute function that calls the interpreted compute 
function.
+  /// It doesn't provide any performance benefit over the interpreted path. 
This is
+  /// useful for builtins (e.g. && and || operators) and UDF which don't 
generate
+  /// custom IR code but are part of a larger expr tree. The IR compute 
function of
+  /// the larger expr tree may still benefit from custom IR and inlining of 
other
+  /// sub-expressions.
+  ///
+  /// TODO: this should be removed in the long run and replaced with 
cross-compilation
+  /// together with constant propagation and loop unrolling.
+  Status GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, llvm::Function** fn)
+      WARN_UNUSED_RESULT;
+
+  /// Helper function for GetCodegendComputeFnWrapper(). Returns the 
cross-compiled IR
+  /// function of the static Get*Val wrapper function for return type 'type'.
+  llvm::Function* GetStaticGetValWrapper(ColumnType type, LlvmCodeGen* 
codegen);
+
+ private:
+  /// 'fn_ctx_idx_' is the index into the FunctionContext vector in 
ScalarExprEvaluator
+  /// for storing FunctionContext needed to evaluate this ScalarExprNode. It's 
-1 if this
+  /// ScalarExpr doesn't need a FunctionContext. The FunctionContext is 
managed by the
+  /// evaluator and initialized by calling ScalarExpr::OpenEvaluator().
+  int fn_ctx_idx_ = -1;
+
+  /// [fn_ctx_idx_start_, fn_ctx_idx_end_) defines the range in 
FunctionContext vector
+  /// in ScalarExpeEvaluator for the expression subtree rooted at this 
ScalarExpr node.
+  int fn_ctx_idx_start_ = 0;
+  int fn_ctx_idx_end_ = 0;
+
+  /// True if this expr should be treated as a constant expression. True if 
either:
+  /// * This expr was sent from the frontend and Expr.isConstant() was true.
+  /// * This expr is a constant literal created in the backend.
+  const bool is_constant_;
+
+  /// Static wrappers which call the compute function of the given ScalarExpr, 
passing
+  /// it the ScalarExprEvaluator and TupleRow. These are cross-compiled and 
called by
+  /// the IR wrapper functions generated by GetCodegendComputeFnWrapper().
+  static BooleanVal GetBooleanVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static TinyIntVal GetTinyIntVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static SmallIntVal GetSmallIntVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static IntVal GetIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
+  static BigIntVal GetBigIntVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static FloatVal GetFloatVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static DoubleVal GetDoubleVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static StringVal GetStringVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static TimestampVal GetTimestampVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+  static DecimalVal GetDecimalVal(ScalarExpr*, ScalarExprEvaluator*, const 
TupleRow*);
+};
+
+}
+
+#endif

[03/14] incubator-impala git commit: IMPALA-4192: Disentangle Expr and ExprContext

Reply via email to