This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 41b5aeb1c5 [fix](concat) ColumnString::chars is resized with wrong
size (#22610)
41b5aeb1c5 is described below
commit 41b5aeb1c5332e41dca42ac8bd6558ed5057b184
Author: TengJianPing <[email protected]>
AuthorDate: Fri Aug 4 19:13:35 2023 +0800
[fix](concat) ColumnString::chars is resized with wrong size (#22610)
FunctionStringConcat::execute_impl resized with size that include string
null terminator, which causes ColumnString::chars.size() does not match with
ColumnString::offsets.back, this will cause problems for some string functions,
e.g. like and regexp.
---
be/src/vec/columns/column_string.cpp | 17 ++++++++++
be/src/vec/columns/column_string.h | 2 ++
be/src/vec/functions/function_string.h | 2 --
be/test/vec/core/column_string_test.cpp | 59 +++++++++++++++++++++++++++++++++
4 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index ed3cd28be9..5d2670acb7 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -35,6 +35,23 @@
namespace doris::vectorized {
+void ColumnString::sanity_check() const {
+ auto count = offsets.size();
+ if (chars.size() != offsets[count - 1]) {
+ LOG(FATAL) << "row count: " << count << ", chars.size(): " <<
chars.size() << ", offset["
+ << count - 1 << "]: " << offsets[count - 1];
+ }
+ if (offsets[-1] != 0) {
+ LOG(FATAL) << "wrong offsets[-1]: " << offsets[-1];
+ }
+ for (size_t i = 0; i < count; ++i) {
+ if (offsets[i] < offsets[i - 1]) {
+ LOG(FATAL) << "row count: " << count << ", offsets[" << i << "]: "
<< offsets[i]
+ << ", offsets[" << i - 1 << "]: " << offsets[i - 1];
+ }
+ }
+}
+
MutableColumnPtr ColumnString::clone_resized(size_t to_size) const {
auto res = ColumnString::create();
if (to_size == 0) {
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index 63ebeb4686..26a7093140 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -106,6 +106,8 @@ private:
chars(src.chars.begin(), src.chars.end()) {}
public:
+ void sanity_check() const;
+
const char* get_family_name() const override { return "String"; }
size_t size() const override { return offsets.size(); }
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index 83f98d726a..32e373ffa0 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -776,8 +776,6 @@ public:
if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
return Status::BufferAllocFailed("concat output is too large to
allocate");
}
- // for each terminal zero
- res_reserve_size += input_rows_count;
res_data.resize(res_reserve_size);
diff --git a/be/test/vec/core/column_string_test.cpp
b/be/test/vec/core/column_string_test.cpp
new file mode 100644
index 0000000000..81f41bd11c
--- /dev/null
+++ b/be/test/vec/core/column_string_test.cpp
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_string.h"
+
+#include <gtest/gtest.h>
+
+#include "vec/core/block.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function_string.h"
+
+namespace doris::vectorized {
+TEST(ColumnStringTest, TestConcat) {
+ Block block;
+ vectorized::DataTypePtr str_type =
std::make_shared<vectorized::DataTypeString>();
+
+ auto str_col0 = ColumnString::create();
+ std::vector<std::string> vals0 = {"aaa", "bb", "cccc"};
+ for (auto& v : vals0) {
+ str_col0->insert_data(v.data(), v.size());
+ }
+ block.insert({std::move(str_col0), str_type, "test_str_col0"});
+
+ auto str_col1 = ColumnString::create();
+ std::vector<std::string> vals1 = {"3", "2", "4"};
+ for (auto& v : vals1) {
+ str_col1->insert_data(v.data(), v.size());
+ }
+ block.insert({std::move(str_col1), str_type, "test_str_col1"});
+
+ auto str_col_res = ColumnString::create();
+ block.insert({std::move(str_col_res), str_type, "test_str_res"});
+
+ ColumnNumbers arguments = {0, 1};
+
+ FunctionStringConcat func_concat;
+ auto status = func_concat.execute_impl(nullptr, block, arguments, 2, 3);
+ EXPECT_TRUE(status.ok());
+
+ auto actual_res_col = block.get_by_position(2).column;
+ EXPECT_EQ(actual_res_col->size(), 3);
+ auto actual_res_col_str = assert_cast<const
ColumnString*>(actual_res_col.get());
+ actual_res_col_str->sanity_check();
+}
+} // namespace doris::vectorized
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]