This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 888a0460093 [Fix](regexp) make dot match newline in regexp_fn by 
default (#60831)
888a0460093 is described below

commit 888a04600936ada7ab83e02d474715db257adf14
Author: linrrarity <[email protected]>
AuthorDate: Thu Feb 26 17:05:17 2026 +0800

    [Fix](regexp) make dot match newline in regexp_fn by default (#60831)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    doc: https://github.com/apache/doris-website/pull/3410
    
    Keep the behavior of `regexp_fn` consistent with `regexp`
    
    before:
    ```text
    Doris> SELECT REGEXP_EXTRACT('foo\nbar', '^(.+)$', 1);
    +-----------------------------------------+
    | REGEXP_EXTRACT('foo\nbar', '^(.+)$', 1) |
    +-----------------------------------------+
    |                                         |
    +-----------------------------------------+
    ```
    
    After
    ```text
    Doris> SELECT REGEXP_EXTRACT('foo\nbar', '^(.+)$', 1);
    +-----------------------------------------+
    | REGEXP_EXTRACT('foo\nbar', '^(.+)$', 1) |
    +-----------------------------------------+
    | foo
    bar                                 |
    +-----------------------------------------+
    ```
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/vec/functions/function_regexp.cpp           |  1 +
 .../test_string_function_regexp.out                | 45 ++++++++++++++++++++++
 .../test_string_function_regexp.groovy             | 18 +++++++++
 3 files changed, 64 insertions(+)

diff --git a/be/src/vec/functions/function_regexp.cpp 
b/be/src/vec/functions/function_regexp.cpp
index bc51ddb21a6..efa61b05cdd 100644
--- a/be/src/vec/functions/function_regexp.cpp
+++ b/be/src/vec/functions/function_regexp.cpp
@@ -66,6 +66,7 @@ struct RegexpExtractEngine {
                         RegexpExtractEngine& engine, bool 
enable_extended_regex) {
         re2::RE2::Options options;
         options.set_log_errors(false); // avoid RE2 printing to stderr; we 
handle errors ourselves
+        options.set_dot_nl(true); // make '.' match '\n' by default, 
consistent with REGEXP/LIKE
         engine.re2_regex =
                 std::make_unique<re2::RE2>(re2::StringPiece(pattern.data, 
pattern.size), options);
 
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
index d7994943fd5..d7422abb0e7 100644
--- 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
@@ -163,6 +163,21 @@ EdgeCase1
 -- !regexp_extract_3 --
 AA-1
 
+-- !regexp_extract_4 --
+foo\nbar
+
+-- !regexp_extract_5 --
+
+
+-- !regexp_extract_6 --
+foo\nbar
+
+-- !regexp_extract_7 --
+aXb
+
+-- !regexp_extract_8 --
+aXb
+
 -- !sql --
 b
 
@@ -178,6 +193,21 @@ d
 -- !regexp_extract_or_null_2 --
 B
 
+-- !regexp_extract_or_null_3 --
+foo\nbar
+
+-- !regexp_extract_or_null_4 --
+\N
+
+-- !regexp_extract_or_null_5 --
+foo\nbar
+
+-- !regexp_extract_or_null_6 --
+aXb
+
+-- !regexp_extract_or_null_7 --
+aXb
+
 -- !sql --
 ['18','17']
 
@@ -217,6 +247,21 @@ B
 -- !sql_regexp_extract_all_5 --
 ['Case1','Case2','Case3']
 
+-- !sql_regexp_extract_all_6 --
+['foo\nbar']
+
+-- !sql_regexp_extract_all_7 --
+
+
+-- !sql_regexp_extract_all_8 --
+['foo\nbar']
+
+-- !sql_regexp_extract_all_9 --
+['aXb','cXd']
+
+-- !sql_regexp_extract_all_10 --
+['aXb','cXd']
+
 -- !sql --
 a-b-c
 
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
index 7c9876d32d6..3a219a2e619 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
@@ -76,6 +76,12 @@ suite("test_string_function_regexp") {
     qt_regexp_extract_3 'SELECT regexp_extract(\'ID:AA-1,ID:BB-2,ID:CC-3\', 
\'(?<=ID:)([A-Z]{2}-\\\\d)(?=,ID|$)\', 1);'
     sql "set enable_extended_regex = false;"
 
+    qt_regexp_extract_4 "SELECT REGEXP_EXTRACT(concat('foo', char(10), 'bar'), 
'(foo.bar)', 1);"
+    qt_regexp_extract_5 "SELECT REGEXP_EXTRACT(concat('foo', char(10), 'bar'), 
'(?-s)(foo.bar)', 1);"
+    qt_regexp_extract_6 "SELECT REGEXP_EXTRACT(concat('foo', char(10), 'bar'), 
'(?s)(foo.bar)', 1);"
+    qt_regexp_extract_7 "SELECT REGEXP_EXTRACT(concat('aXb', char(10), 'cXd'), 
'(?-s)(a.b)', 1);"
+    qt_regexp_extract_8 "SELECT REGEXP_EXTRACT(concat('aXb', char(10), 'cXd'), 
'(a.b)', 1);"
+
     qt_sql "SELECT regexp_extract_or_null('AbCdE', 
'([[:lower:]]+)C([[:lower:]]+)', 1);"
     qt_sql "SELECT regexp_extract_or_null('AbCdE', 
'([[:lower:]]+)C([[:lower:]]+)', 2);"
     qt_sql "SELECT regexp_extract_or_null('AbCdE', 
'([[:lower:]]+)C([[:lower:]]+)', 3);"
@@ -90,6 +96,12 @@ suite("test_string_function_regexp") {
     qt_regexp_extract_or_null_2 "SELECT regexp_extract_or_null('TokenA TokenB 
TokenC', '(?<=Token)([A-Z])(?= TokenC)', 1);"
     sql "set enable_extended_regex = false;"
 
+    qt_regexp_extract_or_null_3 "SELECT REGEXP_EXTRACT_OR_NULL(concat('foo', 
char(10), 'bar'), '(foo.bar)', 1);"
+    qt_regexp_extract_or_null_4 "SELECT REGEXP_EXTRACT_OR_NULL(concat('foo', 
char(10), 'bar'), '(?-s)(foo.bar)', 1);"
+    qt_regexp_extract_or_null_5 "SELECT REGEXP_EXTRACT_OR_NULL(concat('foo', 
char(10), 'bar'), '(?s)(foo.bar)', 1);"
+    qt_regexp_extract_or_null_6 "SELECT REGEXP_EXTRACT_OR_NULL(concat('aXb', 
char(10), 'cXd'), '(?-s)(a.b)', 1);"
+    qt_regexp_extract_or_null_7 "SELECT REGEXP_EXTRACT_OR_NULL(concat('aXb', 
char(10), 'cXd'), '(a.b)', 1);"
+
     qt_sql "SELECT regexp_extract_all('x=a3&x=18abc&x=2&y=3&x=4&x=17bcd', 
'x=([0-9]+)([a-z]+)');"
     qt_sql "SELECT regexp_extract_all('http://a.m.baidu.com/i41915i73660.htm', 
'i([0-9]+)');"
     qt_sql "SELECT regexp_extract_all('abc=111, def=222, ghi=333', 
'(\"[^\"]+\"|\\\\w+)=(\"[^\"]+\"|\\\\w+)');"
@@ -110,6 +122,12 @@ suite("test_string_function_regexp") {
     qt_sql_regexp_extract_all_5 'SELECT 
REGEXP_EXTRACT_ALL(\'EdgeCase1EdgeCase2EdgeCase3\', 
\'(?<=Edge)(Case\\\\d)(?=Edge|$)\');'
     sql "set enable_extended_regex = false;"
 
+    qt_sql_regexp_extract_all_6 "SELECT REGEXP_EXTRACT_ALL(concat('foo', 
char(10), 'bar'), '(foo.bar)');"
+    qt_sql_regexp_extract_all_7 "SELECT REGEXP_EXTRACT_ALL(concat('foo', 
char(10), 'bar'), '(?-s)(foo.bar)');"
+    qt_sql_regexp_extract_all_8 "SELECT REGEXP_EXTRACT_ALL(concat('foo', 
char(10), 'bar'), '(?s)(foo.bar)');"
+    qt_sql_regexp_extract_all_9 "SELECT REGEXP_EXTRACT_ALL(concat('aXb', 
char(10), 'cXd'), '(?-s)(\\\\w.\\\\w)');"
+    qt_sql_regexp_extract_all_10 "SELECT REGEXP_EXTRACT_ALL(concat('aXb', 
char(10), 'cXd'), '(\\\\w.\\\\w)');"
+
     qt_sql "SELECT regexp_replace('a b c', \" \", \"-\");"
     qt_sql "SELECT regexp_replace('a b c','(b)','<\\\\1>');"
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to