This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 12f2026675f6de42d44310c57fe7037f3dc82f46 Author: Eyizoha <[email protected]> AuthorDate: Wed Dec 13 11:17:42 2023 +0800 IMPALA-12581: Fix issue of ILIKE and IREGEXP not working correctly with non-const pattern This patch fixes the issue where ILIKE and IREGEXP couldn't ignore case when encountering non-constant patterns. For example, 'SELECT 'ABC' ILIKE pattern FROM tbl' would return false when the pattern in tbl is '%b%'. Tests: - Add TestNonConstPatternILike to test_exprs.py to verify the effectiveness of the fixing. Change-Id: I3d66680f5a7660e6a41859754c4230f276e66712 Reviewed-on: http://gerrit.cloudera.org:8080/20785 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exprs/like-predicate.cc | 10 +++++++--- be/src/exprs/like-predicate.h | 3 +++ tests/query_test/test_exprs.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc index 74271012b..993f656c4 100644 --- a/be/src/exprs/like-predicate.cc +++ b/be/src/exprs/like-predicate.cc @@ -64,8 +64,9 @@ void LikePredicate::LikePrepareInternal(FunctionContext* context, FunctionContext::FunctionStateScope scope, bool case_sensitive) { if (scope != FunctionContext::THREAD_LOCAL) return; LikePredicateState* state = new LikePredicateState(); - state->function_ = LikeFn; context->SetFunctionState(scope, state); + state->function_ = LikeFn; + state->case_sensitive_ = case_sensitive; if (context->IsArgConstant(1)) { StringVal pattern_val = *reinterpret_cast<StringVal*>(context->GetConstantArg(1)); if (pattern_val.is_null) return; @@ -136,6 +137,7 @@ void LikePredicate::RegexPrepareInternal(FunctionContext* context, LikePredicateState* state = new LikePredicateState(); context->SetFunctionState(scope, state); state->function_ = RegexFn; + state->case_sensitive_ = case_sensitive; if (context->IsArgConstant(1)) { StringVal* pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1)); if (pattern->is_null) return; @@ -330,9 +332,10 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* context, const StringVal& operand_value, const StringVal& pattern_value, bool is_like_pattern) { if (operand_value.is_null || pattern_value.is_null) return BooleanVal::null(); + + LikePredicateState* state = reinterpret_cast<LikePredicateState*>( + context->GetFunctionState(FunctionContext::THREAD_LOCAL)); if (context->IsArgConstant(1)) { - LikePredicateState* state = reinterpret_cast<LikePredicateState*>( - context->GetFunctionState(FunctionContext::THREAD_LOCAL)); if (is_like_pattern) { return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>( operand_value.ptr), operand_value.len), *state->regex_.get()); @@ -343,6 +346,7 @@ BooleanVal LikePredicate::RegexMatch(FunctionContext* context, } else { string re_pattern; RE2::Options opts; + opts.set_case_sensitive(state->case_sensitive_); StringFunctions::SetRE2MemOpt(&opts); if (is_like_pattern) { ConvertLikePattern(context, pattern_value, &re_pattern); diff --git a/be/src/exprs/like-predicate.h b/be/src/exprs/like-predicate.h index 2a7cb7143..d4dc36c30 100644 --- a/be/src/exprs/like-predicate.h +++ b/be/src/exprs/like-predicate.h @@ -79,6 +79,9 @@ class LikePredicate: public Predicate { /// Used for RLIKE and REGEXP predicates if the pattern is a constant argument. boost::scoped_ptr<re2::RE2> regex_; + /// Used for ILIKE and IREGEXP predicates if the pattern is not a constant argument. + bool case_sensitive_; + LikePredicateState() : escape_char_('\\') { } diff --git a/tests/query_test/test_exprs.py b/tests/query_test/test_exprs.py index 569e584e8..9558fb172 100644 --- a/tests/query_test/test_exprs.py +++ b/tests/query_test/test_exprs.py @@ -290,3 +290,31 @@ class TestConstantFoldingNoTypeLoss(ImpalaTestSuite): query = "select typeof(cast(1 as bigint) + cast(rand() as tinyint))" result = self.execute_query_expect_success(self.client, query) assert result.data == ["BIGINT"] + + +class TestNonConstPatternILike(ImpalaTestSuite): + """Tests for ILIKE and IREGEXP with non-constant patterns for IMPALA-12581. + These tests verify that ILIKE and IREGEXP work correctly when the pattern + is not a constant string.""" + @classmethod + def add_test_dimensions(cls): + super(TestNonConstPatternILike, cls).add_test_dimensions() + + @classmethod + def get_workload(cls): + return 'functional-query' + + def test_non_const_pattern_ilike(self, vector, unique_database): + tbl_name = '`{0}`.`ilike_test`'.format(unique_database) + + self.execute_query_expect_success(self.client, + "CREATE TABLE {0} (pattern_str string)".format(tbl_name)) + self.execute_query_expect_success(self.client, + "INSERT INTO TABLE {0} VALUES('%b%'), ('.*b.*')".format(tbl_name)) + + ilike_result = self.execute_query_expect_success(self.client, + "SELECT count(*) FROM {0} WHERE 'ABC' ILIKE pattern_str".format(tbl_name)) + assert int(ilike_result.get_data()) == 1 + iregexp_result = self.execute_query_expect_success(self.client, + "SELECT count(*) FROM {0} WHERE 'ABC' IREGEXP pattern_str".format(tbl_name)) + assert int(iregexp_result.get_data()) == 1
