This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e69012e835feb861ad6bda4b93da26829c3d2787
Author: Balazs Hevele <[email protected]>
AuthorDate: Thu Feb 5 14:55:30 2026 +0100

    IMPALA-12374: Optimize trailing/leading % in LIKE
    
    When converting LIKE containing a trailing %, leading %, or both,
    to a regular expression, use partial match (with anchors as necessary)
    in re2 with '.*' trimmed, instead of a full match with trailing or
    leading '.*'.
    
    Note that this optimization only concerns more complex patterns,
    e.g. '%a%b%'.
    Patterns where the trimmed pattern is a fixed string already use more
    optimized checks, like a string search, e.g. '%abc%'.
    
    This optimization can make LIKE matching faster, especially if the
    trimmed % covers a long part of the string matched.
    The performance gain is highest with both leading and trailing %,
    and the lowest with only a trailing %.
    
    In expr-benchmark.cc, a new function BenchmarkLikeRegexp was added to
    compare LIKE and regexp_like especially in the relevant cases.
    In these tests, a string of 100 characters are used to match the
    trailing/leading % wildcard.
    
    Before the change, the performance of the test cases are:
    
                     Function  iters/ms   10%ile   50%ile   90%ile
    --------------------------------------------------------------
    
                        like               10.7     10.8     10.9
                       regex               10.7     10.8     10.9
                leading like               18.8       19     19.1
               leading regex               68.4     69.4     69.9
               trailing like               16.2     16.3     16.6
              trailing regex               18.6     18.9     19.1
       trailing leading like               9.56      9.6     9.77
      trailing leading regex               63.5     64.3     65.1
    
    After the change, the performance of LIKE and regexp_like is about the
    same in the relevant cases:
    
                     Function  iters/ms   10%ile   50%ile   90%ile
    --------------------------------------------------------------
                        like               10.7     10.8     10.9
                       regex               10.7     10.8     10.9
                leading like               67.9     68.7     69.3
               leading regex               67.4     68.3     69.1
               trailing like               18.5     18.9       19
              trailing regex               18.7     18.9     19.1
       trailing leading like               63.1     63.9     64.6
      trailing leading regex               63.5     63.9     64.8
    
    Testing:
    -added new tests to LikePredicate in expr-test.cc to cover relevant
    cases
    -added like-predicate-test.cc which checks that optimizations are
    applied when possible
    
    Change-Id: I37b472e056f791035d25633f17ad8a6e841cdd18
    Reviewed-on: http://gerrit.cloudera.org:8080/23932
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Joe McDonnell <[email protected]>
---
 be/src/benchmarks/expr-benchmark.cc |  56 +++++++++++++++
 be/src/exprs/CMakeLists.txt         |   2 +
 be/src/exprs/expr-test.cc           |   6 ++
 be/src/exprs/like-predicate-test.cc |  87 +++++++++++++++++++++++
 be/src/exprs/like-predicate.cc      | 136 +++++++++++++++++++++++++-----------
 be/src/exprs/like-predicate.h       |  14 +++-
 6 files changed, 257 insertions(+), 44 deletions(-)

diff --git a/be/src/benchmarks/expr-benchmark.cc 
b/be/src/benchmarks/expr-benchmark.cc
index fa8d84fbd..929730d72 100644
--- a/be/src/benchmarks/expr-benchmark.cc
+++ b/be/src/benchmarks/expr-benchmark.cc
@@ -285,6 +285,61 @@ Benchmark* BenchmarkLike(bool codegen) {
   BENCHMARK("strncmp2", "'abcdefghijklmnopqrstuvwxyz' LIKE 'abc%'");
   BENCHMARK("strncmp3", "'abcdefghijklmnopqrstuvwxyz' LIKE 'abc'");
   BENCHMARK("regex", "'abcdefghijklmnopqrstuvwxyz' LIKE 'abc%z'");
+
+  return suite;
+}
+
+// Like Regexp:               Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//                                like               10.7     10.8     10.9    
     1X         1X         1X
+//                               regex               10.7     10.8     10.9    
     1X     0.997X         1X
+//                        leading like               63.6     64.3     65.2    
  5.95X      5.95X      5.99X
+//                       leading regex               67.4     67.9     68.8    
   6.3X      6.28X      6.33X
+//                       trailing like               18.5     18.7       19    
  1.74X      1.73X      1.75X
+//                      trailing regex               18.7     18.9     19.1    
  1.75X      1.75X      1.75X
+//               trailing leading like               59.8     60.5     61.1    
  5.59X      5.59X      5.61X
+//              trailing leading regex               60.5     61.1     61.6    
  5.66X      5.65X      5.66X
+//
+// Like RegexpCodegen:        Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//                                like               10.7     10.8     10.9    
     1X         1X         1X
+//                               regex               10.7     10.8     10.9    
     1X         1X         1X
+//                        leading like               67.9     68.7     69.3    
  6.36X      6.36X      6.37X
+//                       leading regex               67.4     68.3     69.1    
  6.31X      6.32X      6.36X
+//                       trailing like               18.5     18.9       19    
  1.73X      1.75X      1.75X
+//                      trailing regex               18.7     18.9     19.1    
  1.75X      1.75X      1.76X
+//               trailing leading like               63.1     63.9     64.6    
  5.91X      5.92X      5.94X
+//              trailing leading regex               63.5     63.9     64.8    
  5.95X      5.92X      5.96X
+Benchmark* BenchmarkLikeRegexp(bool codegen) {
+  Benchmark* suite = new Benchmark(BenchmarkName("Like Regexp", codegen));
+
+  #define LONG_STRING "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" \
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+
+  #define MATCH_PATTERN "'b" LONG_STRING "c" LONG_STRING "b'"
+  #define MATCH_PATTERN_LEADING "'" LONG_STRING "xyz'"
+  #define MATCH_PATTERN_TRAILING "'xyz" LONG_STRING "'"
+  #define MATCH_PATTERN_TRAILING_LEADING "'" LONG_STRING "mno" LONG_STRING "'"
+
+  BENCHMARK("like", MATCH_PATTERN "LIKE 'b%b'");
+  BENCHMARK("regex", "regexp_like(" MATCH_PATTERN ", '^b.*b$', 'cn')");
+  BENCHMARK("leading like", MATCH_PATTERN_LEADING " LIKE '%x%z'");
+  BENCHMARK("leading regex", "regexp_like(" MATCH_PATTERN_LEADING ", 'x.*z$', 
'cn')");
+  BENCHMARK("trailing like", MATCH_PATTERN_TRAILING " LIKE 'x%z%'");
+  BENCHMARK("trailing regex", "regexp_like(" MATCH_PATTERN_TRAILING ", 
'^x.*z', 'cn')");
+  BENCHMARK("trailing leading like", MATCH_PATTERN_TRAILING_LEADING " like 
'%m%o%'");
+  BENCHMARK("trailing leading regex",
+      "regexp_like(" MATCH_PATTERN_TRAILING_LEADING ", 'm.*o', 'cn')");
+
+  #undef MATCH_PATTERN_TRAILING_LEADING
+  #undef MATCH_PATTERN_TRAILING
+  #undef MATCH_PATTERN_LEADING
+  #undef MATCH_PATTERN
+  #undef LONG_STRING
+
+
   return suite;
 }
 
@@ -950,6 +1005,7 @@ int main(int argc, char** argv) {
   benchmarks.push_back(&BenchmarkLiterals);
   benchmarks.push_back(&BenchmarkArithmetic);
   benchmarks.push_back(&BenchmarkLike);
+  benchmarks.push_back(&BenchmarkLikeRegexp);
   benchmarks.push_back(&BenchmarkCast);
   benchmarks.push_back(&BenchmarkDecimalCast);
   benchmarks.push_back(&BenchmarkConditionalFunctions);
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index 560dbceff..327f571d0 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -97,6 +97,7 @@ add_library(ExprsTests STATIC
   expr-test.cc
   iceberg-functions-test.cc
   timezone_db-test.cc
+  like-predicate-test.cc
 )
 add_dependencies(ExprsTests gen-deps)
 
@@ -108,6 +109,7 @@ ADD_UNIFIED_BE_LSAN_TEST(expr-test 
"Instantiations/ExprTest.*")
 ADD_BE_LSAN_TEST(expr-codegen-test)
 ADD_UNIFIED_BE_LSAN_TEST(timezone_db-test
  
"TimezoneDbNamesTest.*:TimezoneDbLoadAliasTest.*:TimezoneDbLoadZoneInfoTest.*")
+ADD_UNIFIED_BE_LSAN_TEST(like-predicate-test "TestLikePredicate.*")
 
 # expr-codegen-test includes test IR functions
 COMPILE_TO_IR(expr-codegen-test.cc)
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 67f31efb4..c8e7aa177 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3933,6 +3933,9 @@ TEST_P(ExprTest, LikePredicate) {
   TestValue("'abcde' LIKE 'abcde%'", TYPE_BOOLEAN, true);
   TestValue("'abcde' LIKE '%abcde'", TYPE_BOOLEAN, true);
   TestValue("'abcde' LIKE '%abcde%'", TYPE_BOOLEAN, true);
+  TestValue("'abcde' LIKE '%a%d%'", TYPE_BOOLEAN, true);
+  TestValue("'abcde' LIKE '%b%e'", TYPE_BOOLEAN, true);
+  TestValue("'abcde' LIKE 'a%b%'", TYPE_BOOLEAN, true);
   // Test multiple wildcard characters
   TestValue("'abcde' LIKE '%%bc%%'", TYPE_BOOLEAN, true);
   TestValue("'abcde' LIKE '%%cb%%'", TYPE_BOOLEAN, false);
@@ -4057,6 +4060,9 @@ TEST_P(ExprTest, LikePredicate) {
   TestValue("'aBcde' ILIKE 'A%%Dc%%'", TYPE_BOOLEAN, false);
   TestValue("'aBcde' ILIKE 'AbCde'", TYPE_BOOLEAN, true);
   TestValue("'aBcde' ILIKE 'AbDCe'", TYPE_BOOLEAN, false);
+  TestValue("'aBcde' ILIKE '%ab%d%'", TYPE_BOOLEAN, true);
+  TestValue("'aBcde' ILIKE '%b%e'", TYPE_BOOLEAN, true);
+  TestValue("'aBcde' ILIKE 'a%b%'", TYPE_BOOLEAN, true);
   TestValue("'Abc\n123' ILIKE 'aBc%123'", TYPE_BOOLEAN, true);
   TestValue("'Abc\n\n123' ILIKE 'aBc%123'", TYPE_BOOLEAN, true);
   TestValue("'\nAbc\n123' ILIKE '%aBc_123'", TYPE_BOOLEAN, true);
diff --git a/be/src/exprs/like-predicate-test.cc 
b/be/src/exprs/like-predicate-test.cc
new file mode 100644
index 000000000..50e39fbc2
--- /dev/null
+++ b/be/src/exprs/like-predicate-test.cc
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/like-predicate.h"
+
+#include <string.h>
+
+#include "testutil/gtest-util.h"
+
+namespace impala {
+
+class LikePredicateTests {
+public:
+  static void TestMatchOptimization();
+
+private:
+  static LikePredicate::LikePredicateFunction GetMatchFunction(const string& 
pattern,
+      bool case_sensitive);
+};
+
+void LikePredicateTests::TestMatchOptimization() {
+  // Check that constant strings (with leading/trailing wildcard) use optimized
+  // check instead of regex match.
+  EXPECT_EQ(GetMatchFunction("test", true), &LikePredicate::ConstantEqualsFn);
+  EXPECT_EQ(GetMatchFunction("%test", true), 
&LikePredicate::ConstantEndsWithFn);
+  EXPECT_EQ(GetMatchFunction("test%", true), 
&LikePredicate::ConstantStartsWithFn);
+  EXPECT_EQ(GetMatchFunction("%test%", true), 
&LikePredicate::ConstantSubstringFn);
+
+  // Multiple % wilcards works the same as single ones.
+  EXPECT_EQ(GetMatchFunction("%%test", true), 
&LikePredicate::ConstantEndsWithFn);
+  EXPECT_EQ(GetMatchFunction("test%%", true), 
&LikePredicate::ConstantStartsWithFn);
+  EXPECT_EQ(GetMatchFunction("%%test%%", true), 
&LikePredicate::ConstantSubstringFn);
+
+  // Optimizations are not enabled for escaped % wildcards.
+  // See IMPALA-10849 for more info.
+  EXPECT_EQ(GetMatchFunction("\\%test", true), &LikePredicate::LikeFn);
+  EXPECT_EQ(GetMatchFunction("test\\%", true), &LikePredicate::LikeFn);
+  EXPECT_EQ(GetMatchFunction("\\%test\\%", true), &LikePredicate::LikeFn);
+
+  // IMPALA-12374: Check that more complex patterns with trailing/leading % 
wildcard
+  // use partial regex match, instead of full regex match.
+  EXPECT_EQ(GetMatchFunction("%te%st", true), &LikePredicate::LikeFnPartial);
+  EXPECT_EQ(GetMatchFunction("te%st%", true), &LikePredicate::LikeFnPartial);
+  EXPECT_EQ(GetMatchFunction("%te%st%", true), &LikePredicate::LikeFnPartial);
+  EXPECT_EQ(GetMatchFunction("%te%st", false), &LikePredicate::LikeFnPartial);
+  EXPECT_EQ(GetMatchFunction("te%st%", false), &LikePredicate::LikeFnPartial);
+  EXPECT_EQ(GetMatchFunction("%te%st%", false), &LikePredicate::LikeFnPartial);
+
+  // Constant string match optimization is not enabled for case-insensitive,
+  // but partial match is.
+  EXPECT_EQ(GetMatchFunction("test", false), &LikePredicate::LikeFn);
+  EXPECT_EQ(GetMatchFunction("%test", false), &LikePredicate::LikeFnPartial);
+  EXPECT_EQ(GetMatchFunction("test%", false), &LikePredicate::LikeFnPartial);
+  EXPECT_EQ(GetMatchFunction("%test%", false), &LikePredicate::LikeFnPartial);
+
+  // Check case that cannot be optimized uses full regex match.
+  EXPECT_EQ(GetMatchFunction("te%st", true), &LikePredicate::LikeFn);
+}
+
+LikePredicate::LikePredicateFunction LikePredicateTests::GetMatchFunction(
+    const string& pattern, bool case_sensitive) {
+  LikePredicate::LikePredicateState test_state;
+  test_state.case_sensitive_ = case_sensitive;
+  test_state.function_ = &LikePredicate::LikeFn;
+  LikePredicate::OptimizeConstantPatternMatch(pattern, &test_state);
+  return test_state.function_;
+}
+
+TEST(TestLikePredicate, RegexMatchOptimization) {
+    LikePredicateTests::TestMatchOptimization();
+}
+
+} // namespace impala
\ No newline at end of file
diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc
index fbf5f89a9..fe69e2cc0 100644
--- a/be/src/exprs/like-predicate.cc
+++ b/be/src/exprs/like-predicate.cc
@@ -71,44 +71,10 @@ void LikePredicate::LikePrepareInternal(FunctionContext* 
context,
     StringVal pattern_val = 
*reinterpret_cast<StringVal*>(context->GetConstantArg(1));
     if (pattern_val.is_null) return;
     StringValue pattern = StringValue::FromStringVal(pattern_val);
-    re2::RE2 substring_re("(?:%+)([^%_]*)(?:%+)");
-    re2::RE2 ends_with_re("(?:%+)([^%_]*)");
-    re2::RE2 starts_with_re("([^%_]*)(?:%+)");
-    re2::RE2 equals_re("([^%_]*)");
-    re2::RE2 ends_with_escaped_wildcard(".*\\\\%$");
     string pattern_str(pattern.Ptr(), pattern.Len());
-    string search_string;
-    if (case_sensitive &&
-        RE2::FullMatch(pattern_str, substring_re, &search_string) &&
-        !RE2::FullMatch(pattern_str, ends_with_escaped_wildcard)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantSubstringFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, starts_with_re, &search_string) &&
-        !RE2::FullMatch(pattern_str, ends_with_escaped_wildcard)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantStartsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, ends_with_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEndsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, equals_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEqualsFn;
-    } else {
-      string re_pattern;
-      ConvertLikePattern(context,
-          *reinterpret_cast<StringVal*>(context->GetConstantArg(1)), 
&re_pattern);
-      RE2::Options opts;
-      opts.set_never_nl(false);
-      opts.set_dot_nl(true);
-      opts.set_case_sensitive(case_sensitive);
-      StringFunctions::SetRE2MemOpt(&opts);
-      state->regex_.reset(new RE2(re_pattern, opts));
-      if (!state->regex_->ok()) {
-        context->SetError(Substitute("Invalid regex: $0", 
pattern_str).c_str());
-      }
+    OptimizeConstantPatternMatch(pattern_str, state);
+    if (state->regex_ && !state->regex_->ok()) {
+      context->SetError(Substitute("Invalid regex: $0", pattern_str).c_str());
     }
   }
 }
@@ -265,6 +231,12 @@ BooleanVal LikePredicate::LikeFn(FunctionContext* context, 
const StringVal& val,
   return RegexMatch(context, val, pattern, true);
 }
 
+BooleanVal LikePredicate::LikeFnPartial(FunctionContext* context, const 
StringVal& val,
+    const StringVal& pattern) {
+  // Handle partial LIKE as if it were a regexp_like, so that it uses 
PartialMatch
+  return RegexMatch(context, val, pattern, false);
+}
+
 BooleanVal LikePredicate::ConstantSubstringFn(FunctionContext* context,
     const StringVal& val, const StringVal& pattern) {
   if (val.is_null) return BooleanVal::null();
@@ -351,7 +323,7 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* 
context,
     opts.set_case_sensitive(state->case_sensitive_);
     StringFunctions::SetRE2MemOpt(&opts);
     if (is_like_pattern) {
-      ConvertLikePattern(context, pattern_value, &re_pattern);
+      ConvertLikePattern(state, pattern_value, &re_pattern);
       opts.set_never_nl(false);
       opts.set_dot_nl(true);
     } else {
@@ -378,11 +350,87 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* 
context,
   }
 }
 
-void LikePredicate::ConvertLikePattern(FunctionContext* context, const 
StringVal& pattern,
-    string* re_pattern) {
+void LikePredicate::OptimizeConstantPatternMatch(const string &pattern,
+    LikePredicateState *state) {
+  re2::RE2 substring_re("(?:%+)([^%_]*)(?:%+)");
+  re2::RE2 ends_with_re("(?:%+)([^%_]*)");
+  re2::RE2 starts_with_re("([^%_]*)(?:%+)");
+  re2::RE2 equals_re("([^%_]*)");
+  re2::RE2 ends_with_escaped_wildcard(".*\\\\%$");
+  string search_string;
+  bool case_sensitive = state->case_sensitive_;
+  if (case_sensitive &&
+      RE2::FullMatch(pattern, substring_re, &search_string) &&
+      !RE2::FullMatch(pattern, ends_with_escaped_wildcard)) {
+    state->SetSearchString(search_string);
+    state->function_ = ConstantSubstringFn;
+  } else if (case_sensitive &&
+      RE2::FullMatch(pattern, starts_with_re, &search_string) &&
+      !RE2::FullMatch(pattern, ends_with_escaped_wildcard)) {
+    state->SetSearchString(search_string);
+    state->function_ = ConstantStartsWithFn;
+  } else if (case_sensitive &&
+      RE2::FullMatch(pattern, ends_with_re, &search_string)) {
+    state->SetSearchString(search_string);
+    state->function_ = ConstantEndsWithFn;
+  } else if (case_sensitive &&
+      RE2::FullMatch(pattern, equals_re, &search_string)) {
+    state->SetSearchString(search_string);
+    state->function_ = ConstantEqualsFn;
+  } else {
+    string re_pattern;
+    bool re_full_match = true;
+    bool re_anchor_start = false;
+    bool re_anchor_end = false;
+    StringVal re_pattern_val = StringVal(pattern.c_str());
+    // IMPALA-12374:
+    // Avoid leading/trailing .* in regex, use (anchored) partial match instead
+    re2::RE2 leading_trailing_re("(?:%+)(.*)(?:%+)");
+    re2::RE2 leading_re("(?:%+)(.*)");
+    re2::RE2 trailing_re("(.*)(?:%+)");
+    string trimmed_pattern;
+    if (RE2::FullMatch(pattern, leading_trailing_re, &trimmed_pattern) &&
+        !RE2::FullMatch(pattern, ends_with_escaped_wildcard)) {
+      // e.g. '%a%b%'
+      re_pattern_val = StringVal(trimmed_pattern.c_str());
+      re_full_match = false;
+    } else if(RE2::FullMatch(pattern, leading_re, &trimmed_pattern)) {
+      // e.g. '%a%b'
+      re_pattern_val = StringVal(trimmed_pattern.c_str());
+      re_full_match = false;
+      re_anchor_end = true;
+    } else if(RE2::FullMatch(pattern, trailing_re, &trimmed_pattern) &&
+        !RE2::FullMatch(pattern, ends_with_escaped_wildcard)) {
+      // e.g. 'a%b%'
+      re_pattern_val = StringVal(trimmed_pattern.c_str());
+      re_full_match = false;
+      re_anchor_start = true;
+    }
+
+    ConvertLikePattern(state, re_pattern_val, &re_pattern, re_anchor_start,
+        re_anchor_end);
+    RE2::Options opts;
+    opts.set_never_nl(false);
+    opts.set_dot_nl(true);
+    opts.set_case_sensitive(case_sensitive);
+    StringFunctions::SetRE2MemOpt(&opts);
+    state->regex_.reset(new RE2(re_pattern, opts));
+    if (re_full_match) {
+      state->function_ = LikeFn;
+    } else {
+      state->function_ = LikeFnPartial;
+    }
+  }
+}
+
+void LikePredicate::ConvertLikePattern(LikePredicateState* state,
+    const StringVal& pattern, string* re_pattern, bool anchor_start,
+    bool anchor_end) {
   re_pattern->clear();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  if (anchor_start) {
+    re_pattern->append("^");
+  }
+
   bool is_escaped = false;
   for (int i = 0; i < pattern.len; ++i) {
     if (!is_escaped && pattern.ptr[i] == '%') {
@@ -419,6 +467,10 @@ void LikePredicate::ConvertLikePattern(FunctionContext* 
context, const StringVal
       is_escaped = false;
     }
   }
+
+  if (anchor_end) {
+    re_pattern->append("$");
+  }
 }
 
 }  // namespace impala
diff --git a/be/src/exprs/like-predicate.h b/be/src/exprs/like-predicate.h
index d4dc36c30..fbc84aced 100644
--- a/be/src/exprs/like-predicate.h
+++ b/be/src/exprs/like-predicate.h
@@ -41,6 +41,7 @@ class LikePredicate: public Predicate {
 
  protected:
   friend class ScalarExprEvaluator;
+  friend class LikePredicateTests;
 
   LikePredicate(const TExprNode& node)
       : Predicate(node) { }
@@ -121,6 +122,11 @@ class LikePredicate: public Predicate {
   static impala_udf::BooleanVal Regex(impala_udf::FunctionContext* context,
       const impala_udf::StringVal& val, const impala_udf::StringVal& pattern);
 
+  /// Try to find an optimized function to match constant pattern,
+  /// e.g. if constant contains no wildcards, string equality check will be 
used.
+  static void OptimizeConstantPatternMatch(const string &pattern,
+      LikePredicateState *state);
+
   /// Prepare function for regexp_like() when a third optional parameter is 
used
   static void RegexpLikePrepare(impala_udf::FunctionContext* context,
       impala_udf::FunctionContext::FunctionStateScope scope);
@@ -146,6 +152,9 @@ class LikePredicate: public Predicate {
   static impala_udf::BooleanVal LikeFn(impala_udf::FunctionContext* context,
       const impala_udf::StringVal& val, const impala_udf::StringVal& pattern);
 
+  static impala_udf::BooleanVal LikeFnPartial(impala_udf::FunctionContext* 
context,
+      const impala_udf::StringVal& val, const impala_udf::StringVal& pattern);
+
   /// Handling of like predicates that map to strstr
   static impala_udf::BooleanVal 
ConstantSubstringFn(impala_udf::FunctionContext* context,
       const impala_udf::StringVal& val, const impala_udf::StringVal& pattern);
@@ -175,8 +184,9 @@ class LikePredicate: public Predicate {
 
   /// Convert a LIKE pattern (with embedded % and _) into the corresponding
   /// regular expression pattern. Escaped chars are copied verbatim.
-  static void ConvertLikePattern(impala_udf::FunctionContext* context,
-      const impala_udf::StringVal& pattern, std::string* re_pattern);
+  static void ConvertLikePattern(LikePredicateState* state,
+      const impala_udf::StringVal& pattern, std::string* re_pattern,
+      bool anchor_start = false, bool anchor_end = false);
 };
 
 }  // namespace impala

Reply via email to