This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new a90684ee322 [fix](columns) fix bug found by UT and add regression test
(#48554) (#48854)
a90684ee322 is described below
commit a90684ee32298fbd0ef1f9e60be8a95c0cb670e3
Author: TengJianPing <[email protected]>
AuthorDate: Mon Apr 7 15:15:14 2025 +0800
[fix](columns) fix bug found by UT and add regression test (#48554) (#48854)
Related PR: pick #48554
---
be/src/vec/columns/column_decimal.cpp | 10 ++---
be/src/vec/columns/column_string.cpp | 33 ++++++++-------
be/src/vec/columns/column_string.h | 1 +
be/src/vec/columns/columns_number.h | 1 -
be/src/vec/core/field.cpp | 6 +--
.../data_types/serde/data_type_decimal_serde.cpp | 2 +-
regression-test/data/query_p0/sort/heap_sort.csv | 7 ++++
regression-test/data/query_p0/sort/heap_sort.out | Bin 0 -> 193 bytes
.../suites/query_p0/sort/heap_sort.groovy | 46 +++++++++++++++++++++
9 files changed, 80 insertions(+), 26 deletions(-)
diff --git a/be/src/vec/columns/column_decimal.cpp
b/be/src/vec/columns/column_decimal.cpp
index cf0193b29e1..306876a393c 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -100,12 +100,10 @@ void
ColumnDecimal<T>::serialize_vec_with_null_map(std::vector<StringRef>& keys,
}
} else {
for (size_t i = 0; i < num_rows; ++i) {
- if (null_map[i] == 0) {
- char* __restrict dest = const_cast<char*>(keys[i].data +
+keys[i].size);
- memset(dest, 0, 1);
- memcpy_fixed<T>(dest + 1, (char*)&data[i]);
- keys[i].size += sizeof(T) + sizeof(UInt8);
- }
+ char* __restrict dest = const_cast<char*>(keys[i].data +
+keys[i].size);
+ memset(dest, 0, 1);
+ memcpy_fixed<T>(dest + 1, (char*)&data[i]);
+ keys[i].size += sizeof(T) + sizeof(UInt8);
}
}
}
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index b61e28ce3f8..734eb2be9aa 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -121,9 +121,9 @@ void ColumnStr<T>::insert_range_from_ignore_overflow(const
doris::vectorized::IC
const auto& src_concrete = assert_cast<const ColumnStr<T>&>(src);
if (start + length > src_concrete.offsets.size()) {
- throw doris::Exception(
- doris::ErrorCode::INTERNAL_ERROR,
- "Parameter out of bound in IColumnStr<T>::insert_range_from
method.");
+ throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+ "Parameter out of bound in "
+
"IColumnStr<T>::insert_range_from_ignore_overflow method.");
}
size_t nested_offset = src_concrete.offset_at(start);
@@ -292,11 +292,11 @@ void ColumnStr<T>::update_crcs_with_value(uint32_t*
__restrict hashes, doris::Pr
template <typename T>
ColumnPtr ColumnStr<T>::filter(const IColumn::Filter& filt, ssize_t
result_size_hint) const {
- if (offsets.size() == 0) {
- return ColumnStr<T>::create();
- }
-
if constexpr (std::is_same_v<UInt32, T>) {
+ if (offsets.size() == 0) {
+ return ColumnStr<T>::create();
+ }
+
auto res = ColumnStr<T>::create();
Chars& res_chars = res->chars;
IColumn::Offsets& res_offsets = res->offsets;
@@ -312,13 +312,13 @@ ColumnPtr ColumnStr<T>::filter(const IColumn::Filter&
filt, ssize_t result_size_
template <typename T>
size_t ColumnStr<T>::filter(const IColumn::Filter& filter) {
- CHECK_EQ(filter.size(), offsets.size());
- if (offsets.size() == 0) {
- resize(0);
- return 0;
- }
-
if constexpr (std::is_same_v<UInt32, T>) {
+ CHECK_EQ(filter.size(), offsets.size());
+ if (offsets.size() == 0) {
+ resize(0);
+ return 0;
+ }
+
auto res = filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets,
filter);
sanity_check_simple();
return res;
@@ -636,8 +636,11 @@ void ColumnStr<T>::compare_internal(size_t rhs_row_id,
const IColumn& rhs, int n
size_t end = simd::find_one(cmp_res, begin + 1);
for (size_t row_id = begin; row_id < end; row_id++) {
auto value_a = get_data_at(row_id);
- int res = memcmp_small_allow_overflow15(value_a.data,
value_a.size, cmp_base.data,
- cmp_base.size);
+ // need to covert to unsigned char, orelse the compare semantic is
not consistent
+ // with other member functions, e.g. get_permutation and
compare_at,
+ // and will result wrong result.
+ int res = memcmp_small_allow_overflow15((Char*)value_a.data,
value_a.size,
+ (Char*)cmp_base.data,
cmp_base.size);
cmp_res[row_id] = res != 0;
filter[row_id] = res * direction < 0;
}
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index 33afabfc576..37b9218ed7b 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -224,6 +224,7 @@ public:
void insert_many_strings_without_reserve(const StringRef* strings, size_t
num) {
Char* data = chars.data();
size_t offset = chars.size();
+ data += offset;
size_t length = 0;
const char* ptr = strings[0].data;
diff --git a/be/src/vec/columns/columns_number.h
b/be/src/vec/columns/columns_number.h
index 33dbe4c8c63..9c0005e9d5d 100644
--- a/be/src/vec/columns/columns_number.h
+++ b/be/src/vec/columns/columns_number.h
@@ -32,7 +32,6 @@ using ColumnUInt8 = ColumnVector<UInt8>;
using ColumnUInt16 = ColumnVector<UInt16>;
using ColumnUInt32 = ColumnVector<UInt32>;
using ColumnUInt64 = ColumnVector<UInt64>;
-using ColumnUInt128 = ColumnVector<UInt128>;
using ColumnInt8 = ColumnVector<Int8>;
using ColumnInt16 = ColumnVector<Int16>;
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
index e652fc2dc9e..ac92605007d 100644
--- a/be/src/vec/core/field.cpp
+++ b/be/src/vec/core/field.cpp
@@ -174,14 +174,14 @@ DECLARE_DECIMAL_COMPARISON(Decimal256)
template <>
bool decimal_equal(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 ys) {
- return dec_equal(Decimal128V2(x.value), Decimal128V2(y.value), xs, ys);
+ return dec_equal(x, y, xs, ys);
}
template <>
bool decimal_less(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 ys) {
- return dec_less(Decimal128V2(x.value), Decimal128V2(y.value), xs, ys);
+ return dec_less(x, y, xs, ys);
}
template <>
bool decimal_less_or_equal(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32
ys) {
- return dec_less_or_equal(Decimal128V2(x.value), Decimal128V2(y.value), xs,
ys);
+ return dec_less_or_equal(x, y, xs, ys);
}
} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index acb09ee773e..d98f6cae2b0 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -79,7 +79,7 @@ Status
DataTypeDecimalSerDe<T>::deserialize_one_cell_from_json(IColumn& column,
return Status::OK();
}
return Status::InvalidArgument("parse decimal fail, string: '{}',
primitive type: '{}'",
- std::string(rb.position(),
rb.count()).c_str(),
+ std::string(slice.data, slice.size).c_str(),
get_primitive_type());
}
diff --git a/regression-test/data/query_p0/sort/heap_sort.csv
b/regression-test/data/query_p0/sort/heap_sort.csv
new file mode 100644
index 00000000000..8b7670cf735
--- /dev/null
+++ b/regression-test/data/query_p0/sort/heap_sort.csv
@@ -0,0 +1,7 @@
+1;中文测试a
+9;a
+3;中文测试b
+2;b
+5;c
+6;d
+1;e
diff --git a/regression-test/data/query_p0/sort/heap_sort.out
b/regression-test/data/query_p0/sort/heap_sort.out
new file mode 100644
index 00000000000..6d8b24cc800
Binary files /dev/null and b/regression-test/data/query_p0/sort/heap_sort.out
differ
diff --git a/regression-test/suites/query_p0/sort/heap_sort.groovy
b/regression-test/suites/query_p0/sort/heap_sort.groovy
new file mode 100644
index 00000000000..aa29a9893fa
--- /dev/null
+++ b/regression-test/suites/query_p0/sort/heap_sort.groovy
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+//
/testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("heap_sort") {
+ sql """
+ drop table if exists test_heap_sort;
+ """
+ sql """
+ create table if not exists test_heap_sort (
+ f1 int,
+ f2 varchar(65533)
+ ) properties("replication_num"="1", "disable_auto_compaction"="true");
+ """
+ streamLoad {
+ table "test_heap_sort"
+
+ set 'column_separator', ';'
+
+ file 'heap_sort.csv'
+
+ time 10000 // limit inflight 10s
+ }
+ sql """ set parallel_pipeline_task_num = 1; """
+ sql """ set batch_size = 3; """
+ sql """ set force_sort_algorithm = "heap"; """
+ qt_select_1 "select * from test_heap_sort order by f1, f2;"
+ qt_select_2 "select * from test_heap_sort order by f2 limit 3;"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]