This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f84481886b [feature](string_functions) The 'split_part' function 
supports non-constant parameters (#18029)
f84481886b is described below

commit f84481886b35b6b71db3469641b3e0d74ab6dd78
Author: Jerry Hu <[email protected]>
AuthorDate: Sat Mar 25 12:03:11 2023 +0800

    [feature](string_functions) The 'split_part' function supports non-constant 
parameters (#18029)
---
 be/src/vec/functions/function_string.h             | 155 ++++++++++-----------
 .../string_functions/test_split_part.out           |  24 ++++
 .../string_functions/test_split_part.groovy        |   1 -
 .../string_functions/test_split_part.groovy        |  40 +++++-
 4 files changed, 136 insertions(+), 84 deletions(-)

diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 855f15a611..f257ad3537 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -1325,48 +1325,45 @@ public:
         auto& res_chars = res->get_chars();
         res_offsets.resize(input_rows_count);
 
-        ColumnPtr content_column =
-                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-
-        if (auto* nullable = check_and_get_column<const 
ColumnNullable>(*content_column)) {
-            // Danger: Here must dispose the null map data first! Because
-            // argument_columns[0]=nullable->get_nested_column_ptr(); will 
release the mem
-            // of column nullable mem of null map
-            VectorizedUtils::update_null_map(null_map->get_data(), 
nullable->get_null_map_data());
-            content_column = nullable->get_nested_column_ptr();
-        }
-
-        for (size_t i = 1; i <= 2; i++) {
-            ColumnPtr columnPtr = 
remove_nullable(block.get_by_position(arguments[i]).column);
-
-            if (!is_column_const(*columnPtr)) {
-                return Status::RuntimeError("Argument at index {} for function 
{} must be constant",
-                                            i + 1, get_name());
+        const size_t argument_size = arguments.size();
+        ColumnPtr argument_columns[argument_size];
+        for (size_t i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<const 
ColumnNullable>(*argument_columns[i])) {
+                // Danger: Here must dispose the null map data first! Because
+                // argument_columns[i]=nullable->get_nested_column_ptr(); will 
release the mem
+                // of column nullable mem of null map
+                VectorizedUtils::update_null_map(null_map->get_data(),
+                                                 
nullable->get_null_map_data());
+                argument_columns[i] = nullable->get_nested_column_ptr();
             }
         }
 
-        auto str_col = assert_cast<const ColumnString*>(content_column.get());
+        auto str_col = assert_cast<const 
ColumnString*>(argument_columns[0].get());
 
-        const IColumn& delimiter_col = 
*block.get_by_position(arguments[1]).column;
-        const auto* delimiter_const = typeid_cast<const 
ColumnConst*>(&delimiter_col);
-        auto delimiter = delimiter_const->get_field().get<String>();
-        int32_t delimiter_size = delimiter.size();
+        auto delimiter_col = assert_cast<const 
ColumnString*>(argument_columns[1].get());
 
-        const IColumn& part_num_col = 
*block.get_by_position(arguments[2]).column;
-        const auto* part_num_col_const = typeid_cast<const 
ColumnConst*>(&part_num_col);
-        auto part_number = part_num_col_const->get_field().get<Int32>();
+        auto part_num_col = assert_cast<const 
ColumnInt32*>(argument_columns[2].get());
+        auto& part_num_col_data = part_num_col->get_data();
 
-        if (part_number >= 0) {
-            for (size_t i = 0; i < input_rows_count; ++i) {
-                if (part_number == 0) {
-                    StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
-                    continue;
-                }
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (part_num_col_data[i] == 0) {
+                StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
+                continue;
+            }
 
-                auto str = str_col->get_data_at(i);
-                if (delimiter_size == 0) {
-                    StringOP::push_empty_string(i, res_chars, res_offsets);
-                } else if (delimiter_size == 1) {
+            auto delimiter = delimiter_col->get_data_at(i);
+            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
+            auto part_number = part_num_col_data[i];
+            auto str = str_col->get_data_at(i);
+            if (delimiter.size == 0) {
+                StringOP::push_empty_string(i, res_chars, res_offsets);
+                continue;
+            }
+
+            if (part_number > 0) {
+                if (delimiter.size == 1) {
                     // If delimiter is a char, use memchr to split
                     int32_t pre_offset = -1;
                     int32_t offset = -1;
@@ -1375,7 +1372,7 @@ public:
                         pre_offset = offset;
                         size_t n = str.size - offset - 1;
                         const char* pos = reinterpret_cast<const char*>(
-                                memchr(str.data + offset + 1, delimiter[0], 
n));
+                                memchr(str.data + offset + 1, 
delimiter_str[0], n));
                         if (pos != nullptr) {
                             offset = pos - str.data;
                             num++;
@@ -1397,15 +1394,15 @@ public:
                     }
                 } else {
                     // If delimiter is a string, use memmem to split
-                    int32_t pre_offset = -delimiter_size;
-                    int32_t offset = pre_offset;
+                    int32_t pre_offset = -delimiter.size;
+                    int32_t offset = -delimiter.size;
                     int32_t num = 0;
                     while (num < part_number) {
                         pre_offset = offset;
-                        size_t n = str.size - offset - delimiter_size;
-                        char* pos = reinterpret_cast<char*>(
-                                memmem(str.data + offset + delimiter_size, n, 
delimiter.c_str(),
-                                       delimiter_size));
+                        size_t n = str.size - offset - delimiter.size;
+                        char* pos =
+                                reinterpret_cast<char*>(memmem(str.data + 
offset + delimiter.size,
+                                                               n, 
delimiter.data, delimiter.size));
                         if (pos != nullptr) {
                             offset = pos - str.data;
                             num++;
@@ -1419,60 +1416,54 @@ public:
                     if (num == part_number) {
                         StringOP::push_value_string(
                                 std::string_view {reinterpret_cast<const 
char*>(
-                                                          str.data + 
pre_offset + delimiter_size),
-                                                  (size_t)offset - pre_offset 
- delimiter_size},
+                                                          str.data + 
pre_offset + delimiter.size),
+                                                  (size_t)offset - pre_offset 
- delimiter.size},
                                 i, res_chars, res_offsets);
                     } else {
                         StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
                     }
                 }
-            }
-        } else {
-            part_number = -part_number;
-            for (size_t i = 0; i < input_rows_count; ++i) {
-                if (delimiter_size == 0) {
-                    StringOP::push_empty_string(i, res_chars, res_offsets);
-                } else {
-                    auto str = str_col->get_data_at(i);
-                    auto str_str = str.to_string();
-                    int32_t offset = str.size;
-                    int32_t pre_offset = offset;
-                    int32_t num = 0;
-                    auto substr = str_str;
-                    while (num <= part_number && offset >= 0) {
-                        offset = (int)substr.rfind(delimiter, offset);
-                        if (offset != -1) {
-                            if (++num == part_number) {
-                                break;
-                            }
-                            pre_offset = offset;
-                            offset = offset - 1;
-                            substr = str_str.substr(0, pre_offset);
-                        } else {
+            } else {
+                part_number = -part_number;
+                auto str_str = str.to_string();
+                int32_t offset = str.size;
+                int32_t pre_offset = offset;
+                int32_t num = 0;
+                auto substr = str_str;
+                while (num <= part_number && offset >= 0) {
+                    offset = (int)substr.rfind(delimiter, offset);
+                    if (offset != -1) {
+                        if (++num == part_number) {
                             break;
                         }
+                        pre_offset = offset;
+                        offset = offset - 1;
+                        substr = str_str.substr(0, pre_offset);
+                    } else {
+                        break;
                     }
-                    num = (offset == -1 && num != 0) ? num + 1 : num;
+                }
+                num = (offset == -1 && num != 0) ? num + 1 : num;
 
-                    if (num == part_number) {
-                        if (offset == -1) {
-                            StringOP::push_value_string(
-                                    std::string_view {reinterpret_cast<const 
char*>(str.data),
-                                                      (size_t)pre_offset},
-                                    i, res_chars, res_offsets);
-                        } else {
-                            StringOP::push_value_string(
-                                    std::string_view {str_str.substr(
-                                            offset + delimiter_size,
-                                            (size_t)pre_offset - offset - 
delimiter_size)},
-                                    i, res_chars, res_offsets);
-                        }
+                if (num == part_number) {
+                    if (offset == -1) {
+                        StringOP::push_value_string(
+                                std::string_view {reinterpret_cast<const 
char*>(str.data),
+                                                  (size_t)pre_offset},
+                                i, res_chars, res_offsets);
                     } else {
-                        StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
+                        StringOP::push_value_string(
+                                std::string_view {str_str.substr(
+                                        offset + delimiter.size,
+                                        (size_t)pre_offset - offset - 
delimiter.size)},
+                                i, res_chars, res_offsets);
                     }
+                } else {
+                    StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
                 }
             }
         }
+
         block.get_by_position(result).column =
                 ColumnNullable::create(std::move(res), std::move(null_map));
         return Status::OK();
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_split_part.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_split_part.out
index 02bb117d12..93d1183414 100644
--- 
a/regression-test/data/query_p0/sql_functions/string_functions/test_split_part.out
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_split_part.out
@@ -3,3 +3,27 @@
 0.     0.1     0.
 \N     20.268  20.268
 
+-- !non_const1 --
+1      kkk     18      -1      beijing a,b,c,d,e,f     2022-06-28      a
+2      kkk     18      -2      shanghai        a,b,c,d,e,f     2022-07-28      
b
+3      kkk     20      -3      beijing a,b,c,d,e,f     2022-06-28      c
+4      hhh     45      -4      beijing a,b,c,d,e,f     2022-05-28      d
+
+-- !non_const2 --
+1      kkk     18      -1      beijing a,b,c,d,e,f     2022-06-28      a,b
+2      kkk     18      -2      shanghai        a,b,c,d,e,f     2022-07-28      
,d,e,f
+3      kkk     20      -3      beijing a,b,c,d,e,f     2022-06-28      \N
+4      hhh     45      -4      beijing a,b,c,d,e,f     2022-05-28      \N
+
+-- !non_const3 --
+1      kkk     18      -1      beijing a,b,c,d,e,f     2022-06-28      f
+2      kkk     18      -2      shanghai        a,b,c,d,e,f     2022-07-28      
e
+3      kkk     20      -3      beijing a,b,c,d,e,f     2022-06-28      d
+4      hhh     45      -4      beijing a,b,c,d,e,f     2022-05-28      c
+
+-- !non_const4 --
+1      kkk     18      -1      beijing a,b,c,d,e,f     2022-06-28      ,d,e,f
+2      kkk     18      -2      shanghai        a,b,c,d,e,f     2022-07-28      
a,b
+3      kkk     20      -3      beijing a,b,c,d,e,f     2022-06-28      \N
+4      hhh     45      -4      beijing a,b,c,d,e,f     2022-05-28      \N
+
diff --git 
a/regression-test/suites/nereids_p0/sql_functions/string_functions/test_split_part.groovy
 
b/regression-test/suites/nereids_p0/sql_functions/string_functions/test_split_part.groovy
index a48017f5b7..fe3d82b422 100644
--- 
a/regression-test/suites/nereids_p0/sql_functions/string_functions/test_split_part.groovy
+++ 
b/regression-test/suites/nereids_p0/sql_functions/string_functions/test_split_part.groovy
@@ -27,7 +27,6 @@ suite("test_split_part") {
       where
           split_part("bCKHDX07at", "5.7.37", cast(name as int)) is not null;
     """
-    exception "Argument at index 3 for function split_part must be constant"
   }
 
   qt_1 "select split_part(k8, '1', 1), k8, split_part(concat(k8, '12'), '1', 
1) from test_query_db.test order by k8 limit 2;"
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
index 42a183c2f3..286c7c95f2 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
@@ -25,8 +25,46 @@ suite("test_split_part") {
       where
           split_part("bCKHDX07at", "5.7.37", cast(name as int)) is not null;
     """
-    exception "Argument at index 3 for function split_part must be constant"
   }
 
   qt_1 "select split_part(k8, '1', 1), k8, split_part(concat(k8, '12'), '1', 
1) from test_query_db.test order by k8 limit 2;"
+
+  sql """
+    DROP TABLE IF EXISTS `test_split_part_non_const`;
+  """
+  sql """
+      CREATE TABLE `test_split_part_non_const`(
+      `id` LARGEINT,
+      `name` VARCHAR(20),
+      `age` SMALLINT,
+      `part` int,
+      `address` VARCHAR(100),
+      `test_t` string,
+      `date` DATE
+      )
+      DUPLICATE KEY (`id`,`name`)
+      DISTRIBUTED BY HASH (`id`) BUCKETS 3
+      PROPERTIES("replication_num" = "1");
+  """
+  sql """
+      INSERT INTO test_split_part_non_const VALUES (1,"kkk",18, -1, 
"beijing","a,b,c,d,e,f","2022-06-28"),
+          (2, "kkk",18, -2, "shanghai","a,b,c,d,e,f","2022-07-28"),
+          (3, "kkk",20, -3, "beijing","a,b,c,d,e,f","2022-06-28"),
+          (4, "hhh",45, -4, "beijing","a,b,c,d,e,f","2022-05-28");
+  """
+  qt_non_const1 """
+      select *, split_part(test_t, ',', id) from test_split_part_non_const 
order by id, name, age;
+  """
+
+  qt_non_const2 """
+      select *, split_part(test_t, ',c', id) from test_split_part_non_const 
order by id, name, age;
+  """
+
+  qt_non_const3 """
+      select *, split_part(test_t, ',', part) from test_split_part_non_const 
order by id, name, age;
+  """
+
+  qt_non_const4 """
+      select *, split_part(test_t, ',c', part) from test_split_part_non_const 
order by id, name, age;
+  """
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to