Repository: incubator-impala
Updated Branches:
  refs/heads/master c1da1409b -> c7b7c3ece


Remove some code in like-predicate-ir.cc from cross-compilation

like-predicate-ir.cc contains a lot of code which
won't be called and inlined by other IR functions.
(e.g. the prepare functions). To reduce the size of
the bitcode module, this change removes these functions
from cross compilation and moves them into like-predicate.cc
instead.

Change-Id: I1279622ff97af0e1bedcfd9aafdb875b01e38c7c
Reviewed-on: http://gerrit.cloudera.org:8080/3483
Reviewed-by: Michael Ho <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c7b7c3ec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c7b7c3ec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c7b7c3ec

Branch: refs/heads/master
Commit: c7b7c3ece2eb28db7ad76397c433ef582aeac7f8
Parents: c1da140
Author: Michael Ho <[email protected]>
Authored: Thu Jun 23 22:32:18 2016 -0700
Committer: Tim Armstrong <[email protected]>
Committed: Thu Jul 7 18:41:45 2016 -0700

----------------------------------------------------------------------
 be/src/exprs/CMakeLists.txt       |   1 +
 be/src/exprs/like-predicate-ir.cc | 390 +------------------------------
 be/src/exprs/like-predicate.cc    | 414 +++++++++++++++++++++++++++++++++
 be/src/exprs/like-predicate.h     |   9 +-
 4 files changed, 424 insertions(+), 390 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index 452113a..747add3 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -39,6 +39,7 @@ add_library(Exprs
   in-predicate-ir.cc
   is-not-empty-predicate.cc
   is-null-predicate-ir.cc
+  like-predicate.cc
   like-predicate-ir.cc
   literal.cc
   math-functions-ir.cc

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/like-predicate-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/like-predicate-ir.cc 
b/be/src/exprs/like-predicate-ir.cc
index 516866a..2d9c28b 100644
--- a/be/src/exprs/like-predicate-ir.cc
+++ b/be/src/exprs/like-predicate-ir.cc
@@ -14,103 +14,9 @@
 
 #include "exprs/like-predicate.h"
 
-#include <string.h>
-#include <re2/re2.h>
-#include <re2/stringpiece.h>
-#include <sstream>
-
-#include "gutil/strings/substitute.h"
-#include "runtime/string-value.inline.h"
-#include "string-functions.h"
-#include "common/names.h"
-
 using namespace impala_udf;
-using namespace re2;
 
 namespace impala {
-// A regex to match any regex pattern is equivalent to a substring search.
-static const RE2 SUBSTRING_RE(
-    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
-
-// A regex to match any regex pattern which is equivalent to matching a 
constant string
-// at the end of the string values.
-static const RE2 ENDS_WITH_RE(
-    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
-
-// A regex to match any regex pattern which is equivalent to matching a 
constant string
-// at the end of the string values.
-static const RE2 STARTS_WITH_RE(
-    "\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
-
-// A regex to match any regex pattern which is equivalent to a constant string 
match.
-static const RE2 
EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
-
-LikePredicate::LikePredicate(const TExprNode& node)
-  : Predicate(node) {
-}
-
-LikePredicate::~LikePredicate() {
-}
-
-void LikePredicate::LikePrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  LikePrepareInternal(context, scope, true);
-}
-
-void LikePredicate::ILikePrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  LikePrepareInternal(context, scope, false);
-}
-
-// TODO: make class StringValue and StringSearch accept a case-sensitive flag 
and
-// switch back to using the cheaper Constant<>() functions.
-void LikePredicate::LikePrepareInternal(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
-  if (scope != FunctionContext::THREAD_LOCAL) return;
-  LikePredicateState* state = new LikePredicateState();
-  state->function_ = LikeFn;
-  context->SetFunctionState(scope, state);
-  if (context->IsArgConstant(1)) {
-    StringVal pattern_val = 
*reinterpret_cast<StringVal*>(context->GetConstantArg(1));
-    if (pattern_val.is_null) return;
-    StringValue pattern = StringValue::FromStringVal(pattern_val);
-    re2::RE2 substring_re("(?:%+)([^%_]*)(?:%+)");
-    re2::RE2 ends_with_re("(?:%+)([^%_]*)");
-    re2::RE2 starts_with_re("([^%_]*)(?:%+)");
-    re2::RE2 equals_re("([^%_]*)");
-    string pattern_str(pattern.ptr, pattern.len);
-    string search_string;
-    if (case_sensitive && RE2::FullMatch(pattern_str, substring_re, 
&search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantSubstringFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, starts_with_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantStartsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, ends_with_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEndsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, equals_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEqualsFn;
-    } else {
-      string re_pattern;
-      ConvertLikePattern(context,
-          *reinterpret_cast<StringVal*>(context->GetConstantArg(1)), 
&re_pattern);
-      RE2::Options opts;
-      opts.set_never_nl(false);
-      opts.set_dot_nl(true);
-      opts.set_case_sensitive(case_sensitive);
-      state->regex_.reset(new RE2(re_pattern, opts));
-      if (!state->regex_->ok()) {
-        context->SetError(
-            strings::Substitute("Invalid regex: $0", pattern_val.ptr).c_str());
-      }
-    }
-  }
-}
 
 BooleanVal LikePredicate::Like(FunctionContext* context, const StringVal& val,
     const StringVal& pattern) {
@@ -119,71 +25,6 @@ BooleanVal LikePredicate::Like(FunctionContext* context, 
const StringVal& val,
   return (state->function_)(context, val, pattern);
 }
 
-void LikePredicate::LikeClose(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  if (scope == FunctionContext::THREAD_LOCAL) {
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-    delete state;
-  }
-}
-
-void LikePredicate::RegexPrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  RegexPrepareInternal(context, scope, true);
-}
-
-void LikePredicate::IRegexPrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  RegexPrepareInternal(context, scope, false);
-}
-
-void LikePredicate::RegexPrepareInternal(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
-  if (scope != FunctionContext::THREAD_LOCAL) return;
-  LikePredicateState* state = new LikePredicateState();
-  context->SetFunctionState(scope, state);
-  state->function_ = RegexFn;
-  if (context->IsArgConstant(1)) {
-    StringVal* pattern = 
reinterpret_cast<StringVal*>(context->GetConstantArg(1));
-    if (pattern->is_null) return;
-    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), 
pattern->len);
-    string search_string;
-    // The following four conditionals check if the pattern is a constant 
string,
-    // starts with a constant string and is followed by any number of wildcard 
characters,
-    // ends with a constant string and is preceded by any number of wildcard 
characters or
-    // has a constant substring surrounded on both sides by any number of 
wildcard
-    // characters. In any of these conditions, we can search for the pattern 
more
-    // efficiently by using our own string match functions rather than regex 
matching.
-    if (case_sensitive && RE2::FullMatch(pattern_str, EQUALS_RE, 
&search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEqualsFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, STARTS_WITH_RE, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantStartsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, ENDS_WITH_RE, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEndsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, SUBSTRING_RE, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantSubstringFn;
-    } else {
-      RE2::Options opts;
-      opts.set_case_sensitive(case_sensitive);
-      state->regex_.reset(new RE2(pattern_str, opts));
-      if (!state->regex_->ok()) {
-        stringstream error;
-        error << "Invalid regex expression" << pattern->ptr;
-        context->SetError(error.str().c_str());
-      }
-      state->function_ = ConstantRegexFnPartial;
-    }
-  }
-}
-
 BooleanVal LikePredicate::Regex(FunctionContext* context, const StringVal& val,
     const StringVal& pattern) {
   LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
@@ -191,238 +32,9 @@ BooleanVal LikePredicate::Regex(FunctionContext* context, 
const StringVal& val,
   return (state->function_)(context, val, pattern);
 }
 
-// This prepare function is used only when 3 parameters are passed to the 
regexp_like()
-// function. For the 2 parameter version, the RegexPrepare() function is used 
to prepare.
-void LikePredicate::RegexpLikePrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  if (scope != FunctionContext::THREAD_LOCAL) return;
-  LikePredicateState* state = new LikePredicateState();
-  context->SetFunctionState(scope, state);
-  // If both the pattern and the match parameter are constant, we pre-compile 
the
-  // regular expression once here. Otherwise, the RE is compiled per row in 
RegexpLike()
-  if (context->IsArgConstant(1) && context->IsArgConstant(2)) {
-    StringVal* pattern;
-    pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
-    if (pattern->is_null) return;
-    StringVal* match_parameter = 
reinterpret_cast<StringVal*>(context->GetConstantArg(2));
-    stringstream error;
-    if (match_parameter->is_null) {
-      error << "NULL match parameter";
-      context->SetError(error.str().c_str());
-      return;
-    }
-    RE2::Options opts;
-    string error_str;
-    if (!StringFunctions::SetRE2Options(*match_parameter, &error_str, &opts)) {
-      context->SetError(error_str.c_str());
-      return;
-    }
-    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), 
pattern->len);
-    state->regex_.reset(new RE2(pattern_str, opts));
-    if (!state->regex_->ok()) {
-      error << "Invalid regex expression" << pattern->ptr;
-      context->SetError(error.str().c_str());
-    }
-  }
-}
-
-// This is used only for the 3 parameter version of regexp_like(). The 2 
parameter
-// version calls Regex() directly.
 BooleanVal LikePredicate::RegexpLike(FunctionContext* context, const 
StringVal& val,
     const StringVal& pattern, const StringVal& match_parameter) {
-  if (val.is_null || pattern.is_null) return BooleanVal::null();
-  // If either the pattern or the third optional match parameter are not 
constant, we
-  // have to recompile the RE for every row.
-  if (!context->IsArgConstant(2) || !context->IsArgConstant(1)) {
-    if (match_parameter.is_null) return BooleanVal::null();
-    RE2::Options opts;
-    string error_str;
-    if (!StringFunctions::SetRE2Options(match_parameter, &error_str, &opts)) {
-      context->SetError(error_str.c_str());
-      return BooleanVal(false);
-    }
-    string re_pattern(reinterpret_cast<const char*>(pattern.ptr), pattern.len);
-    re2::RE2 re(re_pattern, opts);
-    if (re.ok()) {
-      return RE2::PartialMatch(re2::StringPiece(
-          reinterpret_cast<const char*>(val.ptr), val.len), re);
-    } else {
-      context->SetError(
-          strings::Substitute("Invalid regex: $0", pattern.ptr).c_str());
-      return BooleanVal(false);
-    }
-  }
-  return ConstantRegexFnPartial(context, val, pattern);
-}
-
-void LikePredicate::RegexClose(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  if (scope == FunctionContext::THREAD_LOCAL) {
-    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-    delete state;
-  }
-}
-
-BooleanVal LikePredicate::RegexFn(FunctionContext* context, const StringVal& 
val,
-    const StringVal& pattern) {
-  return RegexMatch(context, val, pattern, false);
-}
-
-BooleanVal LikePredicate::LikeFn(FunctionContext* context, const StringVal& 
val,
-    const StringVal& pattern) {
-  return RegexMatch(context, val, pattern, true);
-}
-
-BooleanVal LikePredicate::ConstantSubstringFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  if (state->search_string_sv_.len == 0) return BooleanVal(true);
-  StringValue pattern_value = StringValue::FromStringVal(val);
-  return BooleanVal(state->substring_pattern_.Search(&pattern_value) != -1);
-}
-
-BooleanVal LikePredicate::ConstantStartsWithFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  if (val.len < state->search_string_sv_.len) {
-    return BooleanVal(false);
-  } else {
-    StringValue v =
-        StringValue(reinterpret_cast<char*>(val.ptr), 
state->search_string_sv_.len);
-    return BooleanVal(state->search_string_sv_.Eq((v)));
-  }
-}
-
-BooleanVal LikePredicate::ConstantEndsWithFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  if (val.len < state->search_string_sv_.len) {
-    return BooleanVal(false);
-  } else {
-    char* ptr =
-        reinterpret_cast<char*>(val.ptr) + val.len - 
state->search_string_sv_.len;
-    int len = state->search_string_sv_.len;
-    StringValue v = StringValue(ptr, len);
-    return BooleanVal(state->search_string_sv_.Eq(v));
-  }
-}
-
-BooleanVal LikePredicate::ConstantEqualsFn(FunctionContext* context, const 
StringVal& val,
-    const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  return 
BooleanVal(state->search_string_sv_.Eq(StringValue::FromStringVal(val)));
-}
-
-BooleanVal LikePredicate::ConstantRegexFnPartial(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
-  return RE2::PartialMatch(operand_sp, *state->regex_);
-}
-
-BooleanVal LikePredicate::ConstantRegexFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
-  return RE2::FullMatch(operand_sp, *state->regex_);
-}
-
-BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
-    const StringVal& operand_value, const StringVal& pattern_value,
-    bool is_like_pattern) {
-  if (operand_value.is_null || pattern_value.is_null) return 
BooleanVal::null();
-  if (context->IsArgConstant(1)) {
-    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-    if (is_like_pattern) {
-      return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>(
-          operand_value.ptr), operand_value.len), *state->regex_.get());
-    } else {
-      return RE2::PartialMatch(re2::StringPiece(reinterpret_cast<const char*>(
-          operand_value.ptr), operand_value.len), *state->regex_.get());
-    }
-  } else {
-    string re_pattern;
-    RE2::Options opts;
-    if (is_like_pattern) {
-      ConvertLikePattern(context, pattern_value, &re_pattern);
-      opts.set_never_nl(false);
-      opts.set_dot_nl(true);
-    } else {
-      re_pattern =
-        string(reinterpret_cast<const char*>(pattern_value.ptr), 
pattern_value.len);
-    }
-    re2::RE2 re(re_pattern, opts);
-    if (re.ok()) {
-      if (is_like_pattern) {
-        return RE2::FullMatch(re2::StringPiece(
-            reinterpret_cast<const char*>(operand_value.ptr), 
operand_value.len), re);
-      } else {
-        return RE2::PartialMatch(re2::StringPiece(
-            reinterpret_cast<const char*>(operand_value.ptr), 
operand_value.len), re);
-      }
-    } else {
-      context->SetError(
-          strings::Substitute("Invalid regex: $0", pattern_value.ptr).c_str());
-      return BooleanVal(false);
-    }
-  }
-}
-
-void LikePredicate::ConvertLikePattern(FunctionContext* context, const 
StringVal& pattern,
-    string* re_pattern) {
-  re_pattern->clear();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  bool is_escaped = false;
-  for (int i = 0; i < pattern.len; ++i) {
-    if (!is_escaped && pattern.ptr[i] == '%') {
-      re_pattern->append(".*");
-    } else if (!is_escaped && pattern.ptr[i] == '_') {
-      re_pattern->append(".");
-    // check for escape char before checking for regex special chars, they 
might overlap
-    } else if (!is_escaped && pattern.ptr[i] == state->escape_char_) {
-      is_escaped = true;
-    } else if (
-        pattern.ptr[i] == '.'
-        || pattern.ptr[i] == '['
-        || pattern.ptr[i] == ']'
-        || pattern.ptr[i] == '{'
-        || pattern.ptr[i] == '}'
-        || pattern.ptr[i] == '('
-        || pattern.ptr[i] == ')'
-        || pattern.ptr[i] == '\\'
-        || pattern.ptr[i] == '*'
-        || pattern.ptr[i] == '+'
-        || pattern.ptr[i] == '?'
-        || pattern.ptr[i] == '|'
-        || pattern.ptr[i] == '^'
-        || pattern.ptr[i] == '$'
-        ) {
-      // escape all regex special characters; see list at
-      // 
http://www.boost.org/doc/libs/1_47_0/libs/regex/doc/html/boost_regex/syntax/basic_extended.html
-      re_pattern->append("\\");
-      re_pattern->append(1, pattern.ptr[i]);
-      is_escaped = false;
-    } else {
-      // regular character or escaped special character
-      re_pattern->append(1, pattern.ptr[i]);
-      is_escaped = false;
-    }
-  }
+  return RegexpLikeInternal(context, val, pattern, match_parameter);
 }
 
 }  // namespace impala

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/like-predicate.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc
new file mode 100644
index 0000000..fd73089
--- /dev/null
+++ b/be/src/exprs/like-predicate.cc
@@ -0,0 +1,414 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "exprs/like-predicate.h"
+
+#include <string.h>
+#include <re2/re2.h>
+#include <re2/stringpiece.h>
+#include <sstream>
+
+#include "gutil/strings/substitute.h"
+#include "runtime/string-value.inline.h"
+#include "string-functions.h"
+#include "common/names.h"
+
+using namespace impala_udf;
+using namespace re2;
+
+namespace impala {
+// A regex to match any regex pattern is equivalent to a substring search.
+static const RE2 SUBSTRING_RE(
+    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
+
+// A regex to match any regex pattern which is equivalent to matching a 
constant string
+// at the end of the string values.
+static const RE2 ENDS_WITH_RE(
+    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
+
+// A regex to match any regex pattern which is equivalent to matching a 
constant string
+// at the end of the string values.
+static const RE2 STARTS_WITH_RE(
+    "\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
+
+// A regex to match any regex pattern which is equivalent to a constant string 
match.
+static const RE2 
EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
+
+LikePredicate::LikePredicate(const TExprNode& node)
+  : Predicate(node) {
+}
+
+LikePredicate::~LikePredicate() {
+}
+
+void LikePredicate::LikePrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  LikePrepareInternal(context, scope, true);
+}
+
+void LikePredicate::ILikePrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  LikePrepareInternal(context, scope, false);
+}
+
+// TODO: make class StringValue and StringSearch accept a case-sensitive flag 
and
+// switch back to using the cheaper Constant<>() functions.
+void LikePredicate::LikePrepareInternal(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
+  if (scope != FunctionContext::THREAD_LOCAL) return;
+  LikePredicateState* state = new LikePredicateState();
+  state->function_ = LikeFn;
+  context->SetFunctionState(scope, state);
+  if (context->IsArgConstant(1)) {
+    StringVal pattern_val = 
*reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+    if (pattern_val.is_null) return;
+    StringValue pattern = StringValue::FromStringVal(pattern_val);
+    re2::RE2 substring_re("(?:%+)([^%_]*)(?:%+)");
+    re2::RE2 ends_with_re("(?:%+)([^%_]*)");
+    re2::RE2 starts_with_re("([^%_]*)(?:%+)");
+    re2::RE2 equals_re("([^%_]*)");
+    string pattern_str(pattern.ptr, pattern.len);
+    string search_string;
+    if (case_sensitive && RE2::FullMatch(pattern_str, substring_re, 
&search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantSubstringFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, starts_with_re, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantStartsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, ends_with_re, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEndsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, equals_re, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEqualsFn;
+    } else {
+      string re_pattern;
+      ConvertLikePattern(context,
+          *reinterpret_cast<StringVal*>(context->GetConstantArg(1)), 
&re_pattern);
+      RE2::Options opts;
+      opts.set_never_nl(false);
+      opts.set_dot_nl(true);
+      opts.set_case_sensitive(case_sensitive);
+      state->regex_.reset(new RE2(re_pattern, opts));
+      if (!state->regex_->ok()) {
+        context->SetError(
+            strings::Substitute("Invalid regex: $0", pattern_val.ptr).c_str());
+      }
+    }
+  }
+}
+
+void LikePredicate::LikeClose(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope == FunctionContext::THREAD_LOCAL) {
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    delete state;
+  }
+}
+
+void LikePredicate::RegexPrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  RegexPrepareInternal(context, scope, true);
+}
+
+void LikePredicate::IRegexPrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  RegexPrepareInternal(context, scope, false);
+}
+
+void LikePredicate::RegexPrepareInternal(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
+  if (scope != FunctionContext::THREAD_LOCAL) return;
+  LikePredicateState* state = new LikePredicateState();
+  context->SetFunctionState(scope, state);
+  state->function_ = RegexFn;
+  if (context->IsArgConstant(1)) {
+    StringVal* pattern = 
reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+    if (pattern->is_null) return;
+    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), 
pattern->len);
+    string search_string;
+    // The following four conditionals check if the pattern is a constant 
string,
+    // starts with a constant string and is followed by any number of wildcard 
characters,
+    // ends with a constant string and is preceded by any number of wildcard 
characters or
+    // has a constant substring surrounded on both sides by any number of 
wildcard
+    // characters. In any of these conditions, we can search for the pattern 
more
+    // efficiently by using our own string match functions rather than regex 
matching.
+    if (case_sensitive && RE2::FullMatch(pattern_str, EQUALS_RE, 
&search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEqualsFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, STARTS_WITH_RE, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantStartsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, ENDS_WITH_RE, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEndsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, SUBSTRING_RE, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantSubstringFn;
+    } else {
+      RE2::Options opts;
+      opts.set_case_sensitive(case_sensitive);
+      state->regex_.reset(new RE2(pattern_str, opts));
+      if (!state->regex_->ok()) {
+        stringstream error;
+        error << "Invalid regex expression" << pattern->ptr;
+        context->SetError(error.str().c_str());
+      }
+      state->function_ = ConstantRegexFnPartial;
+    }
+  }
+}
+
+// This prepare function is used only when 3 parameters are passed to the 
regexp_like()
+// function. For the 2 parameter version, the RegexPrepare() function is used 
to prepare.
+void LikePredicate::RegexpLikePrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope != FunctionContext::THREAD_LOCAL) return;
+  LikePredicateState* state = new LikePredicateState();
+  context->SetFunctionState(scope, state);
+  // If both the pattern and the match parameter are constant, we pre-compile 
the
+  // regular expression once here. Otherwise, the RE is compiled per row in 
RegexpLike()
+  if (context->IsArgConstant(1) && context->IsArgConstant(2)) {
+    StringVal* pattern;
+    pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+    if (pattern->is_null) return;
+    StringVal* match_parameter = 
reinterpret_cast<StringVal*>(context->GetConstantArg(2));
+    stringstream error;
+    if (match_parameter->is_null) {
+      error << "NULL match parameter";
+      context->SetError(error.str().c_str());
+      return;
+    }
+    RE2::Options opts;
+    string error_str;
+    if (!StringFunctions::SetRE2Options(*match_parameter, &error_str, &opts)) {
+      context->SetError(error_str.c_str());
+      return;
+    }
+    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), 
pattern->len);
+    state->regex_.reset(new RE2(pattern_str, opts));
+    if (!state->regex_->ok()) {
+      error << "Invalid regex expression" << pattern->ptr;
+      context->SetError(error.str().c_str());
+    }
+  }
+}
+
+// This is used only for the 3 parameter version of regexp_like(). The 2 
parameter
+// version calls Regex() directly.
+BooleanVal LikePredicate::RegexpLikeInternal(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern, const StringVal& 
match_parameter) {
+  if (val.is_null || pattern.is_null) return BooleanVal::null();
+  // If either the pattern or the third optional match parameter are not 
constant, we
+  // have to recompile the RE for every row.
+  if (!context->IsArgConstant(2) || !context->IsArgConstant(1)) {
+    if (match_parameter.is_null) return BooleanVal::null();
+    RE2::Options opts;
+    string error_str;
+    if (!StringFunctions::SetRE2Options(match_parameter, &error_str, &opts)) {
+      context->SetError(error_str.c_str());
+      return BooleanVal(false);
+    }
+    string re_pattern(reinterpret_cast<const char*>(pattern.ptr), pattern.len);
+    re2::RE2 re(re_pattern, opts);
+    if (re.ok()) {
+      return RE2::PartialMatch(re2::StringPiece(
+          reinterpret_cast<const char*>(val.ptr), val.len), re);
+    } else {
+      context->SetError(
+          strings::Substitute("Invalid regex: $0", pattern.ptr).c_str());
+      return BooleanVal(false);
+    }
+  }
+  return ConstantRegexFnPartial(context, val, pattern);
+}
+
+void LikePredicate::RegexClose(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope == FunctionContext::THREAD_LOCAL) {
+    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    delete state;
+  }
+}
+
+BooleanVal LikePredicate::RegexFn(FunctionContext* context, const StringVal& 
val,
+    const StringVal& pattern) {
+  return RegexMatch(context, val, pattern, false);
+}
+
+BooleanVal LikePredicate::LikeFn(FunctionContext* context, const StringVal& 
val,
+    const StringVal& pattern) {
+  return RegexMatch(context, val, pattern, true);
+}
+
+BooleanVal LikePredicate::ConstantSubstringFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  if (state->search_string_sv_.len == 0) return BooleanVal(true);
+  StringValue pattern_value = StringValue::FromStringVal(val);
+  return BooleanVal(state->substring_pattern_.Search(&pattern_value) != -1);
+}
+
+BooleanVal LikePredicate::ConstantStartsWithFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  if (val.len < state->search_string_sv_.len) {
+    return BooleanVal(false);
+  } else {
+    StringValue v =
+        StringValue(reinterpret_cast<char*>(val.ptr), 
state->search_string_sv_.len);
+    return BooleanVal(state->search_string_sv_.Eq((v)));
+  }
+}
+
+BooleanVal LikePredicate::ConstantEndsWithFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  if (val.len < state->search_string_sv_.len) {
+    return BooleanVal(false);
+  } else {
+    char* ptr =
+        reinterpret_cast<char*>(val.ptr) + val.len - 
state->search_string_sv_.len;
+    int len = state->search_string_sv_.len;
+    StringValue v = StringValue(ptr, len);
+    return BooleanVal(state->search_string_sv_.Eq(v));
+  }
+}
+
+BooleanVal LikePredicate::ConstantEqualsFn(FunctionContext* context, const 
StringVal& val,
+    const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  return 
BooleanVal(state->search_string_sv_.Eq(StringValue::FromStringVal(val)));
+}
+
+BooleanVal LikePredicate::ConstantRegexFnPartial(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
+  return RE2::PartialMatch(operand_sp, *state->regex_);
+}
+
+BooleanVal LikePredicate::ConstantRegexFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
+  return RE2::FullMatch(operand_sp, *state->regex_);
+}
+
+BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
+    const StringVal& operand_value, const StringVal& pattern_value,
+    bool is_like_pattern) {
+  if (operand_value.is_null || pattern_value.is_null) return 
BooleanVal::null();
+  if (context->IsArgConstant(1)) {
+    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    if (is_like_pattern) {
+      return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>(
+          operand_value.ptr), operand_value.len), *state->regex_.get());
+    } else {
+      return RE2::PartialMatch(re2::StringPiece(reinterpret_cast<const char*>(
+          operand_value.ptr), operand_value.len), *state->regex_.get());
+    }
+  } else {
+    string re_pattern;
+    RE2::Options opts;
+    if (is_like_pattern) {
+      ConvertLikePattern(context, pattern_value, &re_pattern);
+      opts.set_never_nl(false);
+      opts.set_dot_nl(true);
+    } else {
+      re_pattern =
+        string(reinterpret_cast<const char*>(pattern_value.ptr), 
pattern_value.len);
+    }
+    re2::RE2 re(re_pattern, opts);
+    if (re.ok()) {
+      if (is_like_pattern) {
+        return RE2::FullMatch(re2::StringPiece(
+            reinterpret_cast<const char*>(operand_value.ptr), 
operand_value.len), re);
+      } else {
+        return RE2::PartialMatch(re2::StringPiece(
+            reinterpret_cast<const char*>(operand_value.ptr), 
operand_value.len), re);
+      }
+    } else {
+      context->SetError(
+          strings::Substitute("Invalid regex: $0", pattern_value.ptr).c_str());
+      return BooleanVal(false);
+    }
+  }
+}
+
+void LikePredicate::ConvertLikePattern(FunctionContext* context, const 
StringVal& pattern,
+    string* re_pattern) {
+  re_pattern->clear();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  bool is_escaped = false;
+  for (int i = 0; i < pattern.len; ++i) {
+    if (!is_escaped && pattern.ptr[i] == '%') {
+      re_pattern->append(".*");
+    } else if (!is_escaped && pattern.ptr[i] == '_') {
+      re_pattern->append(".");
+    // check for escape char before checking for regex special chars, they 
might overlap
+    } else if (!is_escaped && pattern.ptr[i] == state->escape_char_) {
+      is_escaped = true;
+    } else if (
+        pattern.ptr[i] == '.'
+        || pattern.ptr[i] == '['
+        || pattern.ptr[i] == ']'
+        || pattern.ptr[i] == '{'
+        || pattern.ptr[i] == '}'
+        || pattern.ptr[i] == '('
+        || pattern.ptr[i] == ')'
+        || pattern.ptr[i] == '\\'
+        || pattern.ptr[i] == '*'
+        || pattern.ptr[i] == '+'
+        || pattern.ptr[i] == '?'
+        || pattern.ptr[i] == '|'
+        || pattern.ptr[i] == '^'
+        || pattern.ptr[i] == '$'
+        ) {
+      // escape all regex special characters; see list at
+      // 
http://www.boost.org/doc/libs/1_47_0/libs/regex/doc/html/boost_regex/syntax/basic_extended.html
+      re_pattern->append("\\");
+      re_pattern->append(1, pattern.ptr[i]);
+      is_escaped = false;
+    } else {
+      // regular character or escaped special character
+      re_pattern->append(1, pattern.ptr[i]);
+      is_escaped = false;
+    }
+  }
+}
+
+}  // namespace impala

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/like-predicate.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/like-predicate.h b/be/src/exprs/like-predicate.h
index 6c907f2..90c78ec 100644
--- a/be/src/exprs/like-predicate.h
+++ b/be/src/exprs/like-predicate.h
@@ -117,11 +117,18 @@ class LikePredicate: public Predicate {
   static void RegexpLikePrepare(impala_udf::FunctionContext* context,
       impala_udf::FunctionContext::FunctionStateScope scope);
 
-  /// Handles regexp_like() when 3 parameters are passed to it
+  /// The cross-compiled wrapper to call RegexpLikeInternal() which is not 
cross-compiled.
   static impala_udf::BooleanVal RegexpLike(impala_udf::FunctionContext* 
context,
       const impala_udf::StringVal& val, const impala_udf::StringVal& pattern,
       const impala_udf::StringVal& match_parameter);
 
+  /// Handles regexp_like() when 3 parameters are passed to it. This is 
intentionally
+  /// not cross-compiled as there is no performance benefit in doing so and it 
will
+  /// consume extra codegen time.
+  static impala_udf::BooleanVal 
RegexpLikeInternal(impala_udf::FunctionContext* context,
+      const impala_udf::StringVal& val, const impala_udf::StringVal& pattern,
+      const impala_udf::StringVal& match_parameter);
+
   static void RegexClose(impala_udf::FunctionContext*,
       impala_udf::FunctionContext::FunctionStateScope scope);
 


Reply via email to