This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new e762233277 [feature](string_functions) The 'split_part' function 
supports non-constant parameters (#18333)
e762233277 is described below

commit e762233277b488ca3ebc9dd3aff05b34b11536d0
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Apr 3 20:38:06 2023 +0800

    [feature](string_functions) The 'split_part' function supports non-constant 
parameters (#18333)
---
 be/src/vec/functions/function_string.h             | 155 ++++++++++-----------
 .../string_functions/test_split_part.groovy        |  42 +++++-
 2 files changed, 114 insertions(+), 83 deletions(-)

diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index c891164508..a1e0cb9d32 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -1253,48 +1253,45 @@ public:
         auto& res_chars = res->get_chars();
         res_offsets.resize(input_rows_count);
 
-        ColumnPtr content_column =
-                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-
-        if (auto* nullable = check_and_get_column<const 
ColumnNullable>(*content_column)) {
-            // Danger: Here must dispose the null map data first! Because
-            // argument_columns[0]=nullable->get_nested_column_ptr(); will 
release the mem
-            // of column nullable mem of null map
-            VectorizedUtils::update_null_map(null_map->get_data(), 
nullable->get_null_map_data());
-            content_column = nullable->get_nested_column_ptr();
-        }
-
-        for (size_t i = 1; i <= 2; i++) {
-            ColumnPtr columnPtr = 
remove_nullable(block.get_by_position(arguments[i]).column);
-
-            if (!is_column_const(*columnPtr)) {
-                return Status::RuntimeError("Argument at index {} for function 
{} must be constant",
-                                            i + 1, get_name());
+        const size_t argument_size = arguments.size();
+        ColumnPtr argument_columns[argument_size];
+        for (size_t i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<const 
ColumnNullable>(*argument_columns[i])) {
+                // Danger: Here must dispose the null map data first! Because
+                // argument_columns[i]=nullable->get_nested_column_ptr(); will 
release the mem
+                // of column nullable mem of null map
+                VectorizedUtils::update_null_map(null_map->get_data(),
+                                                 
nullable->get_null_map_data());
+                argument_columns[i] = nullable->get_nested_column_ptr();
             }
         }
 
-        auto str_col = assert_cast<const ColumnString*>(content_column.get());
+        auto str_col = assert_cast<const 
ColumnString*>(argument_columns[0].get());
 
-        const IColumn& delimiter_col = 
*block.get_by_position(arguments[1]).column;
-        const auto* delimiter_const = typeid_cast<const 
ColumnConst*>(&delimiter_col);
-        auto delimiter = delimiter_const->get_field().get<String>();
-        int32_t delimiter_size = delimiter.size();
+        auto delimiter_col = assert_cast<const 
ColumnString*>(argument_columns[1].get());
 
-        const IColumn& part_num_col = 
*block.get_by_position(arguments[2]).column;
-        const auto* part_num_col_const = typeid_cast<const 
ColumnConst*>(&part_num_col);
-        auto part_number = part_num_col_const->get_field().get<Int32>();
+        auto part_num_col = assert_cast<const 
ColumnInt32*>(argument_columns[2].get());
+        auto& part_num_col_data = part_num_col->get_data();
 
-        if (part_number >= 0) {
-            for (size_t i = 0; i < input_rows_count; ++i) {
-                if (part_number == 0) {
-                    StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
-                    continue;
-                }
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (part_num_col_data[i] == 0) {
+                StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
+                continue;
+            }
 
-                auto str = str_col->get_data_at(i);
-                if (delimiter_size == 0) {
-                    StringOP::push_empty_string(i, res_chars, res_offsets);
-                } else if (delimiter_size == 1) {
+            auto delimiter = delimiter_col->get_data_at(i);
+            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
+            auto part_number = part_num_col_data[i];
+            auto str = str_col->get_data_at(i);
+            if (delimiter.size == 0) {
+                StringOP::push_empty_string(i, res_chars, res_offsets);
+                continue;
+            }
+
+            if (part_number > 0) {
+                if (delimiter.size == 1) {
                     // If delimiter is a char, use memchr to split
                     int32_t pre_offset = -1;
                     int32_t offset = -1;
@@ -1303,7 +1300,7 @@ public:
                         pre_offset = offset;
                         size_t n = str.size - offset - 1;
                         const char* pos = reinterpret_cast<const char*>(
-                                memchr(str.data + offset + 1, delimiter[0], 
n));
+                                memchr(str.data + offset + 1, 
delimiter_str[0], n));
                         if (pos != nullptr) {
                             offset = pos - str.data;
                             num++;
@@ -1325,15 +1322,15 @@ public:
                     }
                 } else {
                     // If delimiter is a string, use memmem to split
-                    int32_t pre_offset = -delimiter_size;
-                    int32_t offset = pre_offset;
+                    int32_t pre_offset = -delimiter.size;
+                    int32_t offset = -delimiter.size;
                     int32_t num = 0;
                     while (num < part_number) {
                         pre_offset = offset;
-                        size_t n = str.size - offset - delimiter_size;
-                        char* pos = reinterpret_cast<char*>(
-                                memmem(str.data + offset + delimiter_size, n, 
delimiter.c_str(),
-                                       delimiter_size));
+                        size_t n = str.size - offset - delimiter.size;
+                        char* pos =
+                                reinterpret_cast<char*>(memmem(str.data + 
offset + delimiter.size,
+                                                               n, 
delimiter.data, delimiter.size));
                         if (pos != nullptr) {
                             offset = pos - str.data;
                             num++;
@@ -1347,60 +1344,54 @@ public:
                     if (num == part_number) {
                         StringOP::push_value_string(
                                 std::string_view {reinterpret_cast<const 
char*>(
-                                                          str.data + 
pre_offset + delimiter_size),
-                                                  (size_t)offset - pre_offset 
- delimiter_size},
+                                                          str.data + 
pre_offset + delimiter.size),
+                                                  (size_t)offset - pre_offset 
- delimiter.size},
                                 i, res_chars, res_offsets);
                     } else {
                         StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
                     }
                 }
-            }
-        } else {
-            part_number = -part_number;
-            for (size_t i = 0; i < input_rows_count; ++i) {
-                if (delimiter_size == 0) {
-                    StringOP::push_empty_string(i, res_chars, res_offsets);
-                } else {
-                    auto str = str_col->get_data_at(i);
-                    auto str_str = str.to_string();
-                    int32_t offset = str.size;
-                    int32_t pre_offset = offset;
-                    int32_t num = 0;
-                    auto substr = str_str;
-                    while (num <= part_number && offset >= 0) {
-                        offset = (int)substr.rfind(delimiter, offset);
-                        if (offset != -1) {
-                            if (++num == part_number) {
-                                break;
-                            }
-                            pre_offset = offset;
-                            offset = offset - 1;
-                            substr = str_str.substr(0, pre_offset);
-                        } else {
+            } else {
+                part_number = -part_number;
+                auto str_str = str.to_string();
+                int32_t offset = str.size;
+                int32_t pre_offset = offset;
+                int32_t num = 0;
+                auto substr = str_str;
+                while (num <= part_number && offset >= 0) {
+                    offset = (int)substr.rfind(delimiter, offset);
+                    if (offset != -1) {
+                        if (++num == part_number) {
                             break;
                         }
+                        pre_offset = offset;
+                        offset = offset - 1;
+                        substr = str_str.substr(0, pre_offset);
+                    } else {
+                        break;
                     }
-                    num = (offset == -1 && num != 0) ? num + 1 : num;
+                }
+                num = (offset == -1 && num != 0) ? num + 1 : num;
 
-                    if (num == part_number) {
-                        if (offset == -1) {
-                            StringOP::push_value_string(
-                                    std::string_view {reinterpret_cast<const 
char*>(str.data),
-                                                      (size_t)pre_offset},
-                                    i, res_chars, res_offsets);
-                        } else {
-                            StringOP::push_value_string(
-                                    std::string_view {str_str.substr(
-                                            offset + delimiter_size,
-                                            (size_t)pre_offset - offset - 
delimiter_size)},
-                                    i, res_chars, res_offsets);
-                        }
+                if (num == part_number) {
+                    if (offset == -1) {
+                        StringOP::push_value_string(
+                                std::string_view {reinterpret_cast<const 
char*>(str.data),
+                                                  (size_t)pre_offset},
+                                i, res_chars, res_offsets);
                     } else {
-                        StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
+                        StringOP::push_value_string(
+                                std::string_view {str_str.substr(
+                                        offset + delimiter.size,
+                                        (size_t)pre_offset - offset - 
delimiter.size)},
+                                i, res_chars, res_offsets);
                     }
+                } else {
+                    StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
                 }
             }
         }
+
         block.get_by_position(result).column =
                 ColumnNullable::create(std::move(res), std::move(null_map));
         return Status::OK();
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
index b808be2f8f..800cd0042c 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
@@ -25,6 +25,46 @@ suite("test_split_part") {
       where
           split_part("bCKHDX07at", "5.7.37", cast(name as int)) is not null;
     """
-    exception "Argument at index 3 for function split_part must be constant"
   }
+
+  qt_1 "select split_part(k8, '1', 1), k8, split_part(concat(k8, '12'), '1', 
1) from test_query_db.test order by k8 limit 2;"
+
+  sql """
+    DROP TABLE IF EXISTS `test_split_part_non_const`;
+  """
+  sql """
+      CREATE TABLE `test_split_part_non_const`(
+      `id` LARGEINT,
+      `name` VARCHAR(20),
+      `age` SMALLINT,
+      `part` int,
+      `address` VARCHAR(100),
+      `test_t` string,
+      `date` DATE
+      )
+      DUPLICATE KEY (`id`,`name`)
+      DISTRIBUTED BY HASH (`id`) BUCKETS 3
+      PROPERTIES("replication_num" = "1");
+  """
+  sql """
+      INSERT INTO test_split_part_non_const VALUES (1,"kkk",18, -1, 
"beijing","a,b,c,d,e,f","2022-06-28"),
+          (2, "kkk",18, -2, "shanghai","a,b,c,d,e,f","2022-07-28"),
+          (3, "kkk",20, -3, "beijing","a,b,c,d,e,f","2022-06-28"),
+          (4, "hhh",45, -4, "beijing","a,b,c,d,e,f","2022-05-28");
+  """
+  qt_non_const1 """
+      select *, split_part(test_t, ',', id) from test_split_part_non_const 
order by id, name, age;
+  """
+
+  qt_non_const2 """
+      select *, split_part(test_t, ',c', id) from test_split_part_non_const 
order by id, name, age;
+  """
+
+  qt_non_const3 """
+      select *, split_part(test_t, ',', part) from test_split_part_non_const 
order by id, name, age;
+  """
+
+  qt_non_const4 """
+      select *, split_part(test_t, ',c', part) from test_split_part_non_const 
order by id, name, age;
+  """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to