This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a1e482cca31 [opt](sort)use HybridSorter to choose between timsort and
pdqsort (#59207)
a1e482cca31 is described below
commit a1e482cca31b9be61fa7c2cdc3cb34f46e0de397
Author: Mryange <[email protected]>
AuthorDate: Mon Jan 5 10:20:24 2026 +0800
[opt](sort)use HybridSorter to choose between timsort and pdqsort (#59207)
### What problem does this PR solve?
Timsort is faster than pdqsort for partially-ordered data.
Introduce a HybridSorter that samples at sort time to choose between
timsort and pdqsort.
This optimization can be disabled with enable_use_hybrid_sort; it is
enabled by default.
```
Run on (128 X 2250.02 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x64)
L1 Instruction 32 KiB (x64)
L2 Unified 1024 KiB (x64)
L3 Unified 16384 KiB (x8)
Load Average: 36.25, 46.31, 138.22
--------------------------------------------------------------------------------
Benchmark Time CPU Iterations
UserCounters...
--------------------------------------------------------------------------------
BM_PdqSort/10000000/0 350 ms 350 ms 2
items_per_second=28.5574M/s random
BM_PdqSort/10000000/1 216 ms 216 ms 3
items_per_second=46.3077M/s ascending_saw
BM_PdqSort/10000000/2 217 ms 217 ms 3
items_per_second=46.1045M/s descending_saw
BM_PdqSort/10000000/3 274 ms 274 ms 3
items_per_second=36.4636M/s generic
BM_PdqSort/10000000/4 326 ms 326 ms 2
items_per_second=30.6559M/s random_tail
BM_PdqSort/10000000/5 346 ms 346 ms 2
items_per_second=28.8922M/s random_half
BM_PdqSort/10000000/6 230 ms 230 ms 3
items_per_second=43.5162M/s wave
BM_TimSort/10000000/0 1113 ms 1113 ms 1
items_per_second=8.98417M/s random
BM_TimSort/10000000/1 89.9 ms 89.9 ms 8
items_per_second=111.209M/s ascending_saw
BM_TimSort/10000000/2 91.0 ms 91.0 ms 8
items_per_second=109.926M/s descending_saw
BM_TimSort/10000000/3 533 ms 533 ms 1
items_per_second=18.7505M/s generic
BM_TimSort/10000000/4 228 ms 228 ms 3
items_per_second=43.7805M/s random_tail
BM_TimSort/10000000/5 559 ms 559 ms 1
items_per_second=17.8817M/s random_half
BM_TimSort/10000000/6 87.4 ms 87.4 ms 8
items_per_second=114.384M/s wave
```
---
be/src/pipeline/exec/sort_sink_operator.cpp | 5 +-
be/src/runtime/runtime_state.h | 5 +
.../aggregate_functions/aggregate_function_sort.h | 5 +-
be/src/vec/columns/column.h | 11 +-
be/src/vec/columns/column_array.cpp | 6 +-
be/src/vec/columns/column_array.h | 2 +-
be/src/vec/columns/column_const.cpp | 2 +-
be/src/vec/columns/column_const.h | 2 +-
be/src/vec/columns/column_decimal.cpp | 6 +-
be/src/vec/columns/column_decimal.h | 13 +-
be/src/vec/columns/column_dummy.h | 2 +-
be/src/vec/columns/column_map.cpp | 6 +-
be/src/vec/columns/column_map.h | 2 +-
be/src/vec/columns/column_nullable.cpp | 4 +-
be/src/vec/columns/column_nullable.h | 2 +-
be/src/vec/columns/column_string.cpp | 6 +-
be/src/vec/columns/column_string.h | 2 +-
be/src/vec/columns/column_struct.cpp | 6 +-
be/src/vec/columns/column_struct.h | 2 +-
be/src/vec/columns/column_varbinary.cpp | 6 +-
be/src/vec/columns/column_varbinary.h | 2 +-
be/src/vec/columns/column_vector.cpp | 6 +-
be/src/vec/columns/column_vector.h | 2 +-
be/src/vec/common/sort/heap_sorter.cpp | 9 +-
be/src/vec/common/sort/heap_sorter.h | 4 +-
be/src/vec/common/sort/partition_sorter.cpp | 2 +-
be/src/vec/common/sort/sorter.cpp | 4 +-
be/src/vec/common/sort/sorter.h | 9 +-
be/src/vec/common/sort/topn_sorter.cpp | 2 +-
be/src/vec/core/hybrid_sorter.h | 205 +++++++++++++++++++++
be/src/vec/core/sort_block.cpp | 7 +-
be/src/vec/core/sort_block.h | 16 +-
.../operator/hashjoin_probe_operator_test.cpp | 3 +-
be/test/testutil/mock/mock_runtime_state.h | 2 +
be/test/vec/columns/column_const_test.cpp | 2 +-
be/test/vec/columns/column_varbinary_test.cpp | 7 +-
be/test/vec/columns/common_column_test.h | 9 +-
.../vec/data_types/data_type_timestamptz_test.cpp | 3 +-
be/test/vec/exec/sort/full_sort_test.cpp | 6 +-
be/test/vec/exec/sort/heap_sorter_test.cpp | 4 +-
be/test/vec/exec/sort/partition_sorter_test.cpp | 6 +-
be/test/vec/exec/sort/sort_test.cpp | 14 +-
be/test/vec/exec/sort/topn_sort_test.cpp | 4 +-
.../java/org/apache/doris/qe/SessionVariable.java | 15 ++
gensrc/thrift/PaloInternalService.thrift | 4 +
45 files changed, 352 insertions(+), 90 deletions(-)
diff --git a/be/src/pipeline/exec/sort_sink_operator.cpp
b/be/src/pipeline/exec/sort_sink_operator.cpp
index b4d0e4267be..10f89c38722 100644
--- a/be/src/pipeline/exec/sort_sink_operator.cpp
+++ b/be/src/pipeline/exec/sort_sink_operator.cpp
@@ -48,8 +48,9 @@ Status SortSinkLocalState::open(RuntimeState* state) {
switch (p._algorithm) {
case TSortAlgorithm::HEAP_SORT: {
_shared_state->sorter = vectorized::HeapSorter::create_shared(
- _vsort_exec_exprs, p._limit, p._offset, p._pool,
p._is_asc_order, p._nulls_first,
- p._child->row_desc(),
state->get_query_ctx()->has_runtime_predicate(p._node_id));
+ _vsort_exec_exprs, state, p._limit, p._offset, p._pool,
p._is_asc_order,
+ p._nulls_first, p._child->row_desc(),
+ state->get_query_ctx()->has_runtime_predicate(p._node_id));
break;
}
case TSortAlgorithm::TOPN_SORT: {
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index e7c66436981..b781edfdee2 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -687,6 +687,11 @@ public:
}
}
+ MOCK_FUNCTION bool enable_use_hybrid_sort() const {
+ return _query_options.__isset.enable_use_hybrid_sort &&
+ _query_options.enable_use_hybrid_sort;
+ }
+
void set_max_operator_id(int max_operator_id) { _max_operator_id =
max_operator_id; }
int max_operator_id() const { return _max_operator_id; }
diff --git a/be/src/vec/aggregate_functions/aggregate_function_sort.h
b/be/src/vec/aggregate_functions/aggregate_function_sort.h
index 3d8bc2480c2..86f1fe63fd0 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_sort.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_sort.h
@@ -110,7 +110,10 @@ struct AggregateFunctionSortData {
}
}
- void sort() { sort_block(block, block, sort_desc, block.rows()); }
+ void sort() {
+ HybridSorter hybrid_sorter;
+ sort_block(block, block, sort_desc, hybrid_sorter, block.rows());
+ }
};
template <typename Data>
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 1fbbdae8992..99653bd4acf 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -36,6 +36,7 @@
#include "vec/common/string_ref.h"
#include "vec/common/typeid_cast.h"
#include "vec/core/field.h"
+#include "vec/core/hybrid_sorter.h"
#include "vec/core/types.h"
namespace doris {
@@ -491,11 +492,19 @@ public:
* nan_direction_hint - see above.
*/
virtual void get_permutation(bool reverse, size_t limit, int
nan_direction_hint,
- Permutation& res) const {
+ HybridSorter& sorter, Permutation& res) const
{
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"get_permutation for " + get_name());
}
+#ifdef BE_TEST
+ void get_permutation_default(bool reverse, size_t limit, int
nan_direction_hint,
+ Permutation& res) const {
+ HybridSorter sorter;
+ get_permutation(reverse, limit, nan_direction_hint, sorter, res);
+ }
+#endif
+
/** Split column to smaller columns. Each value goes to column index,
selected by corresponding element of 'selector'.
* Selector must contain values from 0 to num_columns - 1.
* For default implementation, see column_impl.h
diff --git a/be/src/vec/columns/column_array.cpp
b/be/src/vec/columns/column_array.cpp
index 0ee4ef0a07a..084073de7e9 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -241,7 +241,7 @@ struct ColumnArray::less {
};
void ColumnArray::get_permutation(bool reverse, size_t limit, int
nan_direction_hint,
- IColumn::Permutation& res) const {
+ HybridSorter& sorter, IColumn::Permutation&
res) const {
size_t s = size();
res.resize(s);
for (size_t i = 0; i < s; ++i) {
@@ -249,9 +249,9 @@ void ColumnArray::get_permutation(bool reverse, size_t
limit, int nan_direction_
}
if (reverse) {
- pdqsort(res.begin(), res.end(), ColumnArray::less<false>(*this,
nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), ColumnArray::less<false>(*this,
nan_direction_hint));
} else {
- pdqsort(res.begin(), res.end(), ColumnArray::less<true>(*this,
nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), ColumnArray::less<true>(*this,
nan_direction_hint));
}
}
diff --git a/be/src/vec/columns/column_array.h
b/be/src/vec/columns/column_array.h
index 2bf2a36f9f4..7d67cdf1233 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -171,7 +171,7 @@ public:
size_t allocated_bytes() const override;
bool has_enough_capacity(const IColumn& src) const override;
void insert_many_from(const IColumn& src, size_t position, size_t length)
override;
- void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
HybridSorter& sorter,
IColumn::Permutation& res) const override;
void sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms,
EqualRange& range, bool last_column) const override;
diff --git a/be/src/vec/columns/column_const.cpp
b/be/src/vec/columns/column_const.cpp
index 6f7dd517233..756d989879f 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -101,7 +101,7 @@ MutableColumnPtr ColumnConst::permute(const Permutation&
perm, size_t limit) con
}
void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int
/*nan_direction_hint*/,
- Permutation& res) const {
+ HybridSorter& /*sorter*/, Permutation& res)
const {
res.resize(s);
for (size_t i = 0; i < s; ++i) {
res[i] = i;
diff --git a/be/src/vec/columns/column_const.h
b/be/src/vec/columns/column_const.h
index 8f21b8f8f63..741a4572159 100644
--- a/be/src/vec/columns/column_const.h
+++ b/be/src/vec/columns/column_const.h
@@ -225,7 +225,7 @@ public:
MutableColumnPtr permute(const Permutation& perm, size_t limit) const
override;
// ColumnPtr index(const IColumn & indexes, size_t limit) const override;
- void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
HybridSorter& sorter,
Permutation& res) const override;
size_t byte_size() const override { return s > 0 ? data->byte_size() +
sizeof(s) : 0; }
diff --git a/be/src/vec/columns/column_decimal.cpp
b/be/src/vec/columns/column_decimal.cpp
index 782514fcc6f..95b3291e4d0 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -286,18 +286,16 @@ Field ColumnDecimal<T>::operator[](size_t n) const {
}
template <PrimitiveType T>
-void ColumnDecimal<T>::get_permutation(bool reverse, size_t limit, int,
+void ColumnDecimal<T>::get_permutation(bool reverse, size_t limit, int,
HybridSorter& sorter,
IColumn::Permutation& res) const {
-#if 1 /// TODO: perf test
if (data.size() <= std::numeric_limits<UInt32>::max()) {
PaddedPODArray<UInt32> tmp_res;
- permutation(reverse, limit, tmp_res);
+ permutation(reverse, limit, sorter, tmp_res);
res.resize(tmp_res.size());
for (size_t i = 0; i < tmp_res.size(); ++i) res[i] = tmp_res[i];
return;
}
-#endif
}
template <PrimitiveType T>
diff --git a/be/src/vec/columns/column_decimal.h
b/be/src/vec/columns/column_decimal.h
index 46d5663b6ae..44d193d8e9e 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -185,7 +185,7 @@ public:
const uint8_t* __restrict null_data) const
override;
int compare_at(size_t n, size_t m, const IColumn& rhs_, int
nan_direction_hint) const override;
- void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
HybridSorter& sorter,
IColumn::Permutation& res) const override;
MutableColumnPtr clone_resized(size_t size) const override;
@@ -264,7 +264,8 @@ protected:
Container data;
UInt32 scale;
template <typename U>
- void permutation(bool reverse, size_t limit, PaddedPODArray<U>& res) const
{
+ void permutation(bool reverse, size_t limit, HybridSorter& sorter,
+ PaddedPODArray<U>& res) const {
size_t s = data.size();
res.resize(s);
for (U i = 0; i < s; ++i) res[i] = i;
@@ -280,11 +281,11 @@ protected:
[this](size_t a, size_t b) { return data[a]
< data[b]; });
} else {
if (reverse)
- pdqsort(res.begin(), res.end(),
- [this](size_t a, size_t b) { return data[a] > data[b];
});
+ sorter.sort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] >
data[b]; });
else
- pdqsort(res.begin(), res.end(),
- [this](size_t a, size_t b) { return data[a] < data[b];
});
+ sorter.sort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] <
data[b]; });
}
}
diff --git a/be/src/vec/columns/column_dummy.h
b/be/src/vec/columns/column_dummy.h
index 700ed09fbf4..f6530e3ddb3 100644
--- a/be/src/vec/columns/column_dummy.h
+++ b/be/src/vec/columns/column_dummy.h
@@ -110,7 +110,7 @@ public:
}
void get_permutation(bool /*reverse*/, size_t /*limit*/, int
/*nan_direction_hint*/,
- Permutation& res) const override {
+ HybridSorter&, Permutation& res) const override {
res.resize(s);
for (size_t i = 0; i < s; ++i) res[i] = i;
}
diff --git a/be/src/vec/columns/column_map.cpp
b/be/src/vec/columns/column_map.cpp
index 5e5f3a841d5..9bc8a9157f0 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -741,7 +741,7 @@ struct ColumnMap::less {
};
void ColumnMap::get_permutation(bool reverse, size_t limit, int
nan_direction_hint,
- IColumn::Permutation& res) const {
+ HybridSorter& sorter, IColumn::Permutation&
res) const {
size_t s = size();
res.resize(s);
for (size_t i = 0; i < s; ++i) {
@@ -749,9 +749,9 @@ void ColumnMap::get_permutation(bool reverse, size_t limit,
int nan_direction_hi
}
if (reverse) {
- pdqsort(res.begin(), res.end(), ColumnMap::less<false>(*this,
nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), ColumnMap::less<false>(*this,
nan_direction_hint));
} else {
- pdqsort(res.begin(), res.end(), ColumnMap::less<true>(*this,
nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), ColumnMap::less<true>(*this,
nan_direction_hint));
}
}
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 925f43da1d3..1706be36b37 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -224,7 +224,7 @@ public:
size_t serialize_impl(char* pos, const size_t row) const override;
size_t deserialize_impl(const char* pos) override;
size_t serialize_size_at(size_t row) const override;
- void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
HybridSorter& sorter,
IColumn::Permutation& res) const override;
void sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms,
EqualRange& range, bool last_column) const override;
diff --git a/be/src/vec/columns/column_nullable.cpp
b/be/src/vec/columns/column_nullable.cpp
index 5c78fa456c7..c7bc579c457 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -466,9 +466,9 @@ void ColumnNullable::compare_internal(size_t rhs_row_id,
const IColumn& rhs, int
}
void ColumnNullable::get_permutation(bool reverse, size_t limit, int
null_direction_hint,
- Permutation& res) const {
+ HybridSorter& sorter, Permutation& res)
const {
/// Cannot pass limit because of unknown amount of NULLs.
- get_nested_column().get_permutation(reverse, 0, null_direction_hint, res);
+ get_nested_column().get_permutation(reverse, 0, null_direction_hint,
sorter, res);
if ((null_direction_hint > 0) != reverse) {
/// Shift all NULL values to the end.
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 83ad71ad400..2b532b47d4a 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -213,7 +213,7 @@ public:
void compare_internal(size_t rhs_row_id, const IColumn& rhs, int
nan_direction_hint,
int direction, std::vector<uint8_t>& cmp_res,
uint8_t* __restrict filter) const override;
- void get_permutation(bool reverse, size_t limit, int null_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int null_direction_hint,
HybridSorter& sorter,
Permutation& res) const override;
void reserve(size_t n) override;
void resize(size_t n) override;
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index 57ed137cc3f..03ca5b3e6a1 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -601,7 +601,7 @@ struct ColumnStr<T>::less {
template <typename T>
void ColumnStr<T>::get_permutation(bool reverse, size_t limit, int
/*nan_direction_hint*/,
- IColumn::Permutation& res) const {
+ HybridSorter& sorter, IColumn::Permutation&
res) const {
size_t s = offsets.size();
res.resize(s);
for (size_t i = 0; i < s; ++i) {
@@ -609,9 +609,9 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t
limit, int /*nan_directi
}
if (reverse) {
- pdqsort(res.begin(), res.end(), less<false>(*this));
+ sorter.sort(res.begin(), res.end(), less<false>(*this));
} else {
- pdqsort(res.begin(), res.end(), less<true>(*this));
+ sorter.sort(res.begin(), res.end(), less<true>(*this));
}
}
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index 03051c8cb90..b885a1eba00 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -487,7 +487,7 @@ public:
rhs.chars.data() +
rhs.offset_at(m), rhs.size_at(m));
}
- void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
HybridSorter& sorter,
IColumn::Permutation& res) const override;
void reserve(size_t n) override;
diff --git a/be/src/vec/columns/column_struct.cpp
b/be/src/vec/columns/column_struct.cpp
index 415f08701a5..01ddde62456 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -428,7 +428,7 @@ struct ColumnStruct::less {
};
void ColumnStruct::get_permutation(bool reverse, size_t limit, int
nan_direction_hint,
- IColumn::Permutation& res) const {
+ HybridSorter& sorter, IColumn::Permutation&
res) const {
size_t s = size();
res.resize(s);
for (size_t i = 0; i < s; ++i) {
@@ -436,9 +436,9 @@ void ColumnStruct::get_permutation(bool reverse, size_t
limit, int nan_direction
}
if (reverse) {
- pdqsort(res.begin(), res.end(), ColumnStruct::less<false>(*this,
nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), ColumnStruct::less<false>(*this,
nan_direction_hint));
} else {
- pdqsort(res.begin(), res.end(), ColumnStruct::less<true>(*this,
nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), ColumnStruct::less<true>(*this,
nan_direction_hint));
}
}
diff --git a/be/src/vec/columns/column_struct.h
b/be/src/vec/columns/column_struct.h
index 6e8ffdf69e8..e90828bb294 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -193,7 +193,7 @@ public:
size_t serialize_size_at(size_t row) const override;
size_t deserialize_impl(const char* pos) override;
size_t serialize_impl(char* pos, const size_t row) const override;
- void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
HybridSorter& sorter,
IColumn::Permutation& res) const override;
void sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms,
EqualRange& range, bool last_column) const override;
diff --git a/be/src/vec/columns/column_varbinary.cpp
b/be/src/vec/columns/column_varbinary.cpp
index e352513804c..e4a9d374357 100644
--- a/be/src/vec/columns/column_varbinary.cpp
+++ b/be/src/vec/columns/column_varbinary.cpp
@@ -198,7 +198,7 @@ struct ColumnVarbinary::less {
};
void ColumnVarbinary::get_permutation(bool reverse, size_t limit, int
/*nan_direction_hint*/,
- IColumn::Permutation& res) const {
+ HybridSorter& sorter,
IColumn::Permutation& res) const {
size_t s = _data.size();
res.resize(s);
for (size_t i = 0; i < s; ++i) {
@@ -206,9 +206,9 @@ void ColumnVarbinary::get_permutation(bool reverse, size_t
limit, int /*nan_dire
}
if (reverse) {
- pdqsort(res.begin(), res.end(), less<false>(*this));
+ sorter.sort(res.begin(), res.end(), less<false>(*this));
} else {
- pdqsort(res.begin(), res.end(), less<true>(*this));
+ sorter.sort(res.begin(), res.end(), less<true>(*this));
}
}
diff --git a/be/src/vec/columns/column_varbinary.h
b/be/src/vec/columns/column_varbinary.h
index 56e994b916e..7b3688c2bec 100644
--- a/be/src/vec/columns/column_varbinary.h
+++ b/be/src/vec/columns/column_varbinary.h
@@ -114,7 +114,7 @@ public:
}
void get_permutation(bool reverse, size_t limit, int
/*nan_direction_hint*/,
- IColumn::Permutation& res) const override;
+ HybridSorter& sorter, IColumn::Permutation& res)
const override;
size_t get_max_row_byte_size() const override;
diff --git a/be/src/vec/columns/column_vector.cpp
b/be/src/vec/columns/column_vector.cpp
index a5367ab8634..8bc629c444b 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -308,7 +308,7 @@ struct ColumnVector<T>::greater {
template <PrimitiveType T>
void ColumnVector<T>::get_permutation(bool reverse, size_t limit, int
nan_direction_hint,
- IColumn::Permutation& res) const {
+ HybridSorter& sorter,
IColumn::Permutation& res) const {
size_t s = data.size();
res.resize(s);
@@ -331,9 +331,9 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t
limit, int nan_direct
for (size_t i = 0; i < s; ++i) res[i] = i;
if (reverse)
- pdqsort(res.begin(), res.end(), greater(*this,
nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), greater(*this,
nan_direction_hint));
else
- pdqsort(res.begin(), res.end(), less(*this, nan_direction_hint));
+ sorter.sort(res.begin(), res.end(), less(*this,
nan_direction_hint));
}
}
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index da959353caf..e1062e83c3e 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -277,7 +277,7 @@ public:
data[n], assert_cast<const Self&,
TypeCheckOnRelease::DISABLE>(rhs_).data[m]);
}
- void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
HybridSorter& sorter,
IColumn::Permutation& res) const override;
void reserve(size_t n) override { data.reserve(n); }
diff --git a/be/src/vec/common/sort/heap_sorter.cpp
b/be/src/vec/common/sort/heap_sorter.cpp
index 6f87b230630..2fb7717d502 100644
--- a/be/src/vec/common/sort/heap_sorter.cpp
+++ b/be/src/vec/common/sort/heap_sorter.cpp
@@ -19,17 +19,18 @@
#include <glog/logging.h>
+#include "runtime/runtime_state.h"
#include "util/runtime_profile.h"
#include "vec/core/sort_block.h"
namespace doris::vectorized {
#include "common/compile_check_begin.h"
-HeapSorter::HeapSorter(VSortExecExprs& vsort_exec_exprs, int64_t limit,
int64_t offset,
- ObjectPool* pool, std::vector<bool>& is_asc_order,
+HeapSorter::HeapSorter(VSortExecExprs& vsort_exec_exprs, RuntimeState* state,
int64_t limit,
+ int64_t offset, ObjectPool* pool, std::vector<bool>&
is_asc_order,
std::vector<bool>& nulls_first, const RowDescriptor&
row_desc,
bool have_runtime_predicate)
- : Sorter(vsort_exec_exprs, limit, offset, pool, is_asc_order,
nulls_first),
+ : Sorter(vsort_exec_exprs, state, limit, offset, pool, is_asc_order,
nulls_first),
_heap_size(limit + offset),
_state(MergeSorterState::create_unique(row_desc, offset)),
_have_runtime_predicate(have_runtime_predicate) {}
@@ -57,7 +58,7 @@ Status HeapSorter::append_block(Block* block) {
for (auto& d : rev_desc) {
d.direction *= -1;
}
- sort_block(*tmp_block, *sorted_block, rev_desc, 0 /*limit*/);
+ sort_block(*tmp_block, *sorted_block, rev_desc, _hybrid_sorter, 0
/*limit*/);
_queue_row_num += sorted_block->rows();
_data_size += sorted_block->allocated_bytes();
_queue.push(MergeSortCursor(MergeSortCursorImpl::create_shared(sorted_block,
rev_desc)));
diff --git a/be/src/vec/common/sort/heap_sorter.h
b/be/src/vec/common/sort/heap_sorter.h
index a163afa379a..b9eeead1bb3 100644
--- a/be/src/vec/common/sort/heap_sorter.h
+++ b/be/src/vec/common/sort/heap_sorter.h
@@ -26,8 +26,8 @@ class HeapSorter final : public Sorter {
ENABLE_FACTORY_CREATOR(HeapSorter);
public:
- HeapSorter(VSortExecExprs& vsort_exec_exprs, int64_t limit, int64_t
offset, ObjectPool* pool,
- std::vector<bool>& is_asc_order, std::vector<bool>& nulls_first,
+ HeapSorter(VSortExecExprs& vsort_exec_exprs, RuntimeState* state, int64_t
limit, int64_t offset,
+ ObjectPool* pool, std::vector<bool>& is_asc_order,
std::vector<bool>& nulls_first,
const RowDescriptor& row_desc, bool have_runtime_predicate =
true);
~HeapSorter() override = default;
diff --git a/be/src/vec/common/sort/partition_sorter.cpp
b/be/src/vec/common/sort/partition_sorter.cpp
index 305a803c9e0..9eb7ec9000c 100644
--- a/be/src/vec/common/sort/partition_sorter.cpp
+++ b/be/src/vec/common/sort/partition_sorter.cpp
@@ -45,7 +45,7 @@ PartitionSorter::PartitionSorter(VSortExecExprs&
vsort_exec_exprs, int64_t limit
RuntimeState* state, RuntimeProfile* profile,
bool has_global_limit, int64_t
partition_inner_limit,
TopNAlgorithm::type top_n_algorithm,
SortCursorCmp* previous_row)
- : Sorter(vsort_exec_exprs, limit, offset, pool, is_asc_order,
nulls_first),
+ : Sorter(vsort_exec_exprs, state, limit, offset, pool, is_asc_order,
nulls_first),
_state(MergeSorterState::create_unique(row_desc, offset)),
_row_desc(row_desc),
_partition_inner_limit(partition_inner_limit),
diff --git a/be/src/vec/common/sort/sorter.cpp
b/be/src/vec/common/sort/sorter.cpp
index 0315840d4f4..0e588b119f0 100644
--- a/be/src/vec/common/sort/sorter.cpp
+++ b/be/src/vec/common/sort/sorter.cpp
@@ -144,7 +144,7 @@ Status Sorter::partial_sort(Block& src_block, Block&
dest_block, bool reversed)
SCOPED_TIMER(_partial_sort_timer);
uint64_t limit = reversed ? 0 : (_offset + _limit);
sort_block(_materialize_sort_exprs ? dest_block : src_block,
dest_block, _sort_description,
- limit);
+ _hybrid_sorter, limit);
}
src_block.clear_column_data(num_cols);
@@ -183,7 +183,7 @@ FullSorter::FullSorter(VSortExecExprs& vsort_exec_exprs,
int64_t limit, int64_t
ObjectPool* pool, std::vector<bool>& is_asc_order,
std::vector<bool>& nulls_first, const RowDescriptor&
row_desc,
RuntimeState* state, RuntimeProfile* profile)
- : Sorter(vsort_exec_exprs, limit, offset, pool, is_asc_order,
nulls_first),
+ : Sorter(vsort_exec_exprs, state, limit, offset, pool, is_asc_order,
nulls_first),
_state(MergeSorterState::create_unique(row_desc, offset)) {}
// check whether the unsorted block can hold more data from input block and no
need to alloc new memory
diff --git a/be/src/vec/common/sort/sorter.h b/be/src/vec/common/sort/sorter.h
index 68be0872b6a..bd5ae9f9726 100644
--- a/be/src/vec/common/sort/sorter.h
+++ b/be/src/vec/common/sort/sorter.h
@@ -32,6 +32,7 @@
#include "vec/common/sort/vsort_exec_exprs.h"
#include "vec/core/block.h"
#include "vec/core/field.h"
+#include "vec/core/hybrid_sorter.h"
#include "vec/core/sort_cursor.h"
#include "vec/core/sort_description.h"
#include "vec/runtime/vsorted_run_merger.h"
@@ -101,15 +102,16 @@ private:
class Sorter {
public:
- Sorter(VSortExecExprs& vsort_exec_exprs, int64_t limit, int64_t offset,
ObjectPool* pool,
- std::vector<bool>& is_asc_order, std::vector<bool>& nulls_first)
+ Sorter(VSortExecExprs& vsort_exec_exprs, RuntimeState* state, int64_t
limit, int64_t offset,
+ ObjectPool* pool, std::vector<bool>& is_asc_order,
std::vector<bool>& nulls_first)
: _vsort_exec_exprs(vsort_exec_exprs),
_limit(limit),
_offset(offset),
_pool(pool),
_is_asc_order(is_asc_order),
_nulls_first(nulls_first),
-
_materialize_sort_exprs(vsort_exec_exprs.need_materialize_tuple()) {}
+
_materialize_sort_exprs(vsort_exec_exprs.need_materialize_tuple()),
+ _hybrid_sorter(state->enable_use_hybrid_sort()) {}
#ifdef BE_TEST
VSortExecExprs mock_vsort_exec_exprs;
std::vector<bool> mock_is_asc_order;
@@ -169,6 +171,7 @@ protected:
std::priority_queue<MergeSortBlockCursor> _block_priority_queue;
bool _materialize_sort_exprs;
+ HybridSorter _hybrid_sorter;
};
class FullSorter final : public Sorter {
diff --git a/be/src/vec/common/sort/topn_sorter.cpp
b/be/src/vec/common/sort/topn_sorter.cpp
index daacd064118..bc314217840 100644
--- a/be/src/vec/common/sort/topn_sorter.cpp
+++ b/be/src/vec/common/sort/topn_sorter.cpp
@@ -43,7 +43,7 @@ TopNSorter::TopNSorter(VSortExecExprs& vsort_exec_exprs,
int64_t limit, int64_t
ObjectPool* pool, std::vector<bool>& is_asc_order,
std::vector<bool>& nulls_first, const RowDescriptor&
row_desc,
RuntimeState* state, RuntimeProfile* profile)
- : Sorter(vsort_exec_exprs, limit, offset, pool, is_asc_order,
nulls_first),
+ : Sorter(vsort_exec_exprs, state, limit, offset, pool, is_asc_order,
nulls_first),
_state(MergeSorterState::create_unique(row_desc, offset)),
_row_desc(row_desc) {}
diff --git a/be/src/vec/core/hybrid_sorter.h b/be/src/vec/core/hybrid_sorter.h
new file mode 100644
index 00000000000..15034569edd
--- /dev/null
+++ b/be/src/vec/core/hybrid_sorter.h
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This file is copied from
+//
https://github.com/bytedance/bolt/blob/b68480b45bbf8086add7bd1e64ad0ee6557198dd/bolt/exec/HybridSorter.h
+// and modified by Doris
+//
+// Original Copyright:
+// Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
+// Licensed under the Apache License, Version 2.0
+
+#pragma once
+#include <pdqsort.h>
+
+#include <gfx/timsort.hpp>
+#include <iterator>
+
+#include "common/compiler_util.h"
+#include "common/logging.h"
+namespace doris::vectorized {
+
+// HybridSorter samples during runtime and selects an appropriate sort
algorithm.
+// It compares PdqSort and TimSort performance dynamically and chooses the
most efficient one
+// based on the characteristics of the input data.
+class HybridSorter {
+public:
+ // Sorting algorithm options: Auto (auto-select), TimSort, or PdqSort
+ enum class SortAlgo { Auto, TimSort, PdqSort };
+
+ HybridSorter() : _enable_use_hybrid_sort(false) {};
+
+ HybridSorter(bool enable_use_hybrid_sort) :
_enable_use_hybrid_sort(enable_use_hybrid_sort) {};
+
+ // Main sorting function: sorts elements in the range [first, last) using
the comparator cmp.
+ // Automatically selects between PdqSort and TimSort based on runtime
profiling.
+ template <std::random_access_iterator Iter, typename Compare>
+ void sort(Iter first, Iter last, Compare cmp) {
+ // If hybrid sorting is disabled, directly use PdqSort
+ if (!_enable_use_hybrid_sort) {
+ pdqsort(first, last, cmp);
+ return;
+ }
+
+ auto rec_cnt = std::distance(first, last);
+
+ // Periodically reset and re-probe to adapt to changing data patterns
+ if (UNLIKELY(block_cnt % PROBE_AGAIN_BATCH_NUMBER == 0)) {
+ // Prepare to probe: reset metrics and algorithm selection
+ reset();
+ }
+
+ // If algo has been determined, or very small batch, no need to probe
+ if (algo == SortAlgo::PdqSort ||
+ (rec_cnt < SMALL_BATCH_THRESHOLD && algo == SortAlgo::Auto)) {
+ pdqsort(first, last, cmp);
+ } else if (algo == SortAlgo::TimSort) {
+ gfx::timsort(first, last, cmp);
+ } else {
+ // At auto-probe stage: alternately test PdqSort and TimSort
+ // to collect performance metrics for algorithm selection
+ size_t cmp_cnt = 0;
+ // Wrapper comparator that counts comparison operations
+ auto cmpWithCnt = [&cmp_cnt, &cmp](auto lhs, auto rhs) -> bool {
+ ++cmp_cnt;
+ return static_cast<bool>(std::invoke(cmp, lhs, rhs));
+ };
+
+ if (is_pdq_sort_turn()) {
+ pdqsort(first, last, cmpWithCnt);
+ } else {
+ gfx::timsort(first, last, cmpWithCnt);
+ }
+
+ update_metrics(cmp_cnt, std::distance(first, last));
+
+ // If a probe round is finished, each algorithm has been run
+ // NUM_PROBE_PER_ROUND times.
+ // Now, check if the performance difference is significant
+ if (probe_rounds_end()) {
+ check_if_significant();
+ }
+ }
+
+ ++block_cnt;
+ }
+
+ // Returns the currently selected sorting algorithm
+ SortAlgo get_sort_algo() const noexcept { return algo; }
+
+private:
+ // Checks if the performance difference between PdqSort and TimSort is
significant.
+ // If one algorithm is significantly better (by significantThreshold_), it
is selected.
+ // Otherwise, the threshold is decayed to make selection easier in
subsequent rounds.
+ void check_if_significant() {
+ // Calculate efficiency: higher ratio means fewer comparisons per
record (better)
+ // efficiency = record_cnt / compare_cnt
+ auto pdqEfficiency = pdq_metric.compare_cnt > 0
+ ? ((double)pdq_metric.intput_cnt) /
pdq_metric.compare_cnt
+ : -1.0;
+ auto timEfficiency = tim_metric.compare_cnt > 0
+ ? ((double)tim_metric.intput_cnt) /
tim_metric.compare_cnt
+ : -1.0;
+
+ if (pdqEfficiency > timEfficiency * significantThreshold_) {
+ algo = SortAlgo::PdqSort;
+ VLOG_ROW << "PdqSort is chosen: pdqEfficiency / timEfficiency = "
+ << pdqEfficiency / timEfficiency << std::endl;
+ return;
+ }
+ if (timEfficiency > pdqEfficiency * significantThreshold_) {
+ algo = SortAlgo::TimSort;
+ VLOG_ROW << "TimSort is chosen: timEfficiency / pdqEfficiency = "
+ << timEfficiency / pdqEfficiency << std::endl;
+ return;
+ }
+
+ // Decay the threshold to make algorithm selection easier over time
+ significantThreshold_ *= DECAY_FACTOR;
+
+ // If after multiple rounds the difference is still not significant
enough,
+ // stop probing and simply choose the algorithm with better efficiency
+ if (significantThreshold_ <= 1.0) {
+ algo = pdqEfficiency >= timEfficiency ? SortAlgo::PdqSort :
SortAlgo::TimSort;
+ }
+ }
+
+ // Updates performance metrics for the algorithm that was just used.
+ // Records the number of comparisons and records processed.
+ void update_metrics(size_t cmp_cnt, size_t record_cnt) {
+ if (is_pdq_sort_turn()) {
+ pdq_metric.compare_cnt += cmp_cnt;
+ pdq_metric.intput_cnt += record_cnt;
+ } else {
+ tim_metric.compare_cnt += cmp_cnt;
+ tim_metric.intput_cnt += record_cnt;
+ }
+ }
+
+ // Resets the sorter state to begin a new probing cycle.
+ // Clears all metrics and resets the algorithm to Auto mode.
+ void reset() {
+ algo = SortAlgo::Auto;
+ significantThreshold_ = SIGNIFICANT_THRESHOLD;
+ pdq_metric.compare_cnt = 0;
+ pdq_metric.intput_cnt = 0;
+ tim_metric.compare_cnt = 0;
+ tim_metric.intput_cnt = 0;
+ }
+
+ // Returns true if it's PdqSort's turn in the alternating probe pattern
+ inline bool is_pdq_sort_turn() const noexcept { return block_cnt % 2 == 0;
}
+
+ // Returns true if a complete probe round has finished
+ // (both algorithms have been tested NUM_PROBE_PER_ROUND times)
+ inline bool probe_rounds_end() const noexcept {
+ return (block_cnt + 1) % (NUM_PROBE_PER_ROUND * 2) == 0;
+ }
+
+private:
+ // Configuration constants
+ constexpr static size_t NUM_PROBE_PER_ROUND =
+ 4; // Number of times to test each algorithm per round
+ constexpr static size_t PROBE_AGAIN_BATCH_NUMBER =
+ NUM_PROBE_PER_ROUND * 128; // Batches before re-probing
+ constexpr static double DECAY_FACTOR = 0.9; // Threshold decay rate per
round
+ constexpr static double SIGNIFICANT_THRESHOLD =
+ 1.4; // Initial threshold for significant difference (40%)
+ constexpr static size_t SMALL_BATCH_THRESHOLD =
+ 128; // Skip probing for batches smaller than this
+
+ // Currently selected algorithm (Auto, TimSort, or PdqSort)
+ SortAlgo algo {SortAlgo::Auto};
+
+ // Stateful statistics info:
+ double significantThreshold_ {
+ SIGNIFICANT_THRESHOLD}; // Current threshold for algorithm
selection
+
+ // Metrics structure for tracking algorithm performance
+ struct CompareMetric {
+ size_t compare_cnt {0}; // Total number of comparisons performed
+ size_t intput_cnt {0}; // Total number of records processed
+ };
+ CompareMetric tim_metric; // Performance metrics for TimSort
+ CompareMetric pdq_metric; // Performance metrics for PdqSort
+
+ size_t block_cnt {0}; // Counter for sorting batches processed
+
+ const bool _enable_use_hybrid_sort; // Flag to enable/disable hybrid
sorting
+};
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/core/sort_block.cpp b/be/src/vec/core/sort_block.cpp
index 75aa9d85a12..f556d94db07 100644
--- a/be/src/vec/core/sort_block.cpp
+++ b/be/src/vec/core/sort_block.cpp
@@ -40,7 +40,7 @@ ColumnsWithSortDescriptions
get_columns_with_sort_description(const Block& block
}
void sort_block(Block& src_block, Block& dest_block, const SortDescription&
description,
- UInt64 limit) {
+ HybridSorter& hybrid_sorter, UInt64 limit) {
if (!src_block.columns()) {
return;
}
@@ -53,7 +53,8 @@ void sort_block(Block& src_block, Block& dest_block, const
SortDescription& desc
src_block.safe_get_by_position(description[0].column_number).column.get();
IColumn::Permutation perm;
- column->get_permutation(reverse, limit,
description[0].nulls_direction, perm);
+ column->get_permutation(reverse, limit,
description[0].nulls_direction, hybrid_sorter,
+ perm);
size_t columns = src_block.columns();
for (size_t i = 0; i < columns; ++i) {
@@ -79,7 +80,7 @@ void sort_block(Block& src_block, Block& dest_block, const
SortDescription& desc
// TODO: ColumnSorter should be constructed only once.
for (size_t i = 0; i < columns_with_sort_desc.size(); i++) {
- ColumnSorter sorter(columns_with_sort_desc[i], limit);
+ ColumnSorter sorter(columns_with_sort_desc[i], hybrid_sorter,
limit);
sorter.operator()(flags, perm, range, i ==
columns_with_sort_desc.size() - 1);
}
}
diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h
index b65f5b715df..e3491536c6b 100644
--- a/be/src/vec/core/sort_block.h
+++ b/be/src/vec/core/sort_block.h
@@ -42,6 +42,7 @@
#include "vec/common/memcmp_small.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
+#include "vec/core/hybrid_sorter.h"
#include "vec/core/sort_description.h"
#include "vec/core/types.h"
@@ -56,7 +57,7 @@ namespace doris::vectorized {
#include "common/compile_check_begin.h"
/// Sort one block by `description`. If limit != 0, then the partial sort of
the first `limit` rows is produced.
void sort_block(Block& src_block, Block& dest_block, const SortDescription&
description,
- UInt64 limit = 0);
+ HybridSorter& hybrid_sorter, UInt64 limit = 0);
using ColumnWithSortDescription = std::pair<const IColumn*,
SortColumnDescription>;
@@ -148,11 +149,13 @@ using PermutationForColumn =
std::vector<PermutationWithInlineValue<T>>;
class ColumnSorter {
public:
- explicit ColumnSorter(const ColumnWithSortDescription& column, const
size_t limit)
+ explicit ColumnSorter(const ColumnWithSortDescription& column,
HybridSorter& hybrid_sorter,
+ const size_t limit)
: _column_with_sort_desc(column),
_limit(limit),
_nulls_direction(column.second.nulls_direction),
- _direction(column.second.direction) {}
+ _direction(column.second.direction),
+ _hybrid_sorter(hybrid_sorter) {}
void operator()(EqualFlags& flags, IColumn::Permutation& perms,
EqualRange& range,
bool last_column) const {
@@ -184,7 +187,7 @@ public:
}
new_limit = _limit + equal_count;
} else {
- pdqsort(begin, end, less);
+ _hybrid_sorter.sort(begin, end, less);
}
};
@@ -415,7 +418,7 @@ private:
}
new_limit = _limit + equal_count;
} else {
- pdqsort(begin, end, sort_comparator);
+ _hybrid_sorter.sort(begin, end, sort_comparator);
}
};
@@ -481,7 +484,7 @@ private:
}
new_limit = _limit + equal_count;
} else {
- pdqsort(begin, end, sort_comparator);
+ _hybrid_sorter.sort(begin, end, sort_comparator);
}
};
@@ -521,6 +524,7 @@ private:
mutable size_t _limit;
const int _nulls_direction;
const int _direction;
+ HybridSorter& _hybrid_sorter;
};
#include "common/compile_check_end.h"
} // namespace doris::vectorized
diff --git a/be/test/pipeline/operator/hashjoin_probe_operator_test.cpp
b/be/test/pipeline/operator/hashjoin_probe_operator_test.cpp
index 927eeddea76..5d765978ac3 100644
--- a/be/test/pipeline/operator/hashjoin_probe_operator_test.cpp
+++ b/be/test/pipeline/operator/hashjoin_probe_operator_test.cpp
@@ -105,7 +105,8 @@ public:
}
auto sorted_block = block.clone_empty();
- sort_block(block, sorted_block, sort_description);
+ HybridSorter hybrid_sorter;
+ sort_block(block, sorted_block, sort_description, hybrid_sorter);
return sorted_block;
}
diff --git a/be/test/testutil/mock/mock_runtime_state.h
b/be/test/testutil/mock/mock_runtime_state.h
index 00c77519d4c..0ca3127a5cc 100644
--- a/be/test/testutil/mock/mock_runtime_state.h
+++ b/be/test/testutil/mock/mock_runtime_state.h
@@ -72,6 +72,8 @@ public:
bool enable_local_exchange() const override { return true; }
WorkloadGroupPtr workload_group() override { return _workload_group; }
+ bool enable_use_hybrid_sort() const override { return false; }
+
// default batch size
int batsh_size = 4096;
bool _enable_shared_exchange_sink_buffer = true;
diff --git a/be/test/vec/columns/column_const_test.cpp
b/be/test/vec/columns/column_const_test.cpp
index a3645c5dc44..3dd239961d9 100644
--- a/be/test/vec/columns/column_const_test.cpp
+++ b/be/test/vec/columns/column_const_test.cpp
@@ -87,7 +87,7 @@ TEST(ColumnConstTest, Testget_permutation) {
auto column_data = ColumnHelper::create_column<DataTypeInt64>({7});
auto column_const = ColumnConst::create(column_data, 3);
IColumn::Permutation res;
- column_const->get_permutation(false, 0, 0, res);
+ column_const->get_permutation_default(false, 0, 0, res);
EXPECT_EQ(res.size(), 3);
for (size_t i = 0; i < res.size(); ++i) {
EXPECT_EQ(res[i], i);
diff --git a/be/test/vec/columns/column_varbinary_test.cpp
b/be/test/vec/columns/column_varbinary_test.cpp
index d903d3abeff..d450730de95 100644
--- a/be/test/vec/columns/column_varbinary_test.cpp
+++ b/be/test/vec/columns/column_varbinary_test.cpp
@@ -609,8 +609,8 @@ TEST_F(ColumnVarbinaryTest,
GetPermutationAscDescIgnoreLimit) {
}
IColumn::Permutation perm_asc;
- col->get_permutation(/*reverse=*/false, /*limit=*/3, /*nan_hint=*/0,
- perm_asc); // limit ignored by impl
+ col->get_permutation_default(/*reverse=*/false, /*limit=*/3,
/*nan_hint=*/0,
+ perm_asc); // limit ignored by impl
ASSERT_EQ(perm_asc.size(), vals.size());
// check ascending ordering
for (size_t i = 1; i < perm_asc.size(); ++i) {
@@ -619,7 +619,8 @@ TEST_F(ColumnVarbinaryTest,
GetPermutationAscDescIgnoreLimit) {
}
IColumn::Permutation perm_desc;
- col->get_permutation(/*reverse=*/true, /*limit=*/vals.size(),
/*nan_hint=*/0, perm_desc);
+ col->get_permutation_default(/*reverse=*/true, /*limit=*/vals.size(),
/*nan_hint=*/0,
+ perm_desc);
ASSERT_EQ(perm_desc.size(), vals.size());
for (size_t i = 1; i < perm_desc.size(); ++i) {
int c = col->compare_at(perm_desc[i - 1], perm_desc[i], *col, 0);
diff --git a/be/test/vec/columns/common_column_test.h
b/be/test/vec/columns/common_column_test.h
index 79a461757ff..90c29e88329 100644
--- a/be/test/vec/columns/common_column_test.h
+++ b/be/test/vec/columns/common_column_test.h
@@ -34,6 +34,7 @@
#include "vec/columns/column_map.h"
#include "vec/common/cow.h"
#include "vec/core/field.h"
+#include "vec/core/hybrid_sorter.h"
#include "vec/core/sort_block.h"
#include "vec/core/sort_description.h"
#include "vec/core/types.h"
@@ -1996,7 +1997,7 @@ public:
LOG(INFO) << "expected_permutation size: " <<
expected_permutation.size() << ", "
<< join_ints(expected_permutation);
// step2. get permutation by column
- column.get_permutation(!ascending, limit, nan_direction_hint,
actual_permutation);
+ column.get_permutation_default(!ascending, limit, nan_direction_hint,
actual_permutation);
LOG(INFO) << "actual_permutation size: " << actual_permutation.size()
<< ", "
<< join_ints(actual_permutation);
@@ -3279,7 +3280,7 @@ auto assert_column_vector_compare_internal_callback =
[](auto x,
auto col_cloned = source_column->clone();
size_t num_rows = col_cloned->size();
IColumn::Permutation permutation;
- col_cloned->get_permutation(false, 0, 1, permutation);
+ col_cloned->get_permutation_default(false, 0, 1, permutation);
auto col_clone_sorted = col_cloned->permute(permutation, 0);
auto test_func = [&](int direction) {
@@ -3550,7 +3551,9 @@ auto assert_sort_column_callback = [](auto x, const
MutableColumnPtr& source_col
for (size_t i = 0; i != cloned_columns.size(); ++i) {
ColumnWithSortDescription
column_with_sort_desc(cloned_columns[i].get(),
SortColumnDescription(i, 1, 0));
- ColumnSorter sorter(column_with_sort_desc, limit);
+
+ HybridSorter hybrid_sorter;
+ ColumnSorter sorter(column_with_sort_desc, hybrid_sorter, limit);
cloned_columns[i]->sort_column(&sorter, flags, perm, range,
i == cloned_columns.size() - 1);
}
diff --git a/be/test/vec/data_types/data_type_timestamptz_test.cpp
b/be/test/vec/data_types/data_type_timestamptz_test.cpp
index df17b5c638d..c46ae25076e 100644
--- a/be/test/vec/data_types/data_type_timestamptz_test.cpp
+++ b/be/test/vec/data_types/data_type_timestamptz_test.cpp
@@ -50,6 +50,7 @@ public:
std::shared_ptr<IDataType> type;
DataTypeSerDeSPtr serder;
+ MockRuntimeState _state;
};
TEST_F(DataTypeTimeStampTzTest, test_normal) {
@@ -129,7 +130,7 @@ TEST_F(DataTypeTimeStampTzTest, test_sort) {
MockSlotRef::create_mock_contexts(0,
std::make_shared<DataTypeTimeStampTz>());
sorter = FullSorter::create_unique(sort_exec_exprs, 3, 3, &pool,
is_asc_order, nulls_first,
- *row_desc, nullptr, nullptr);
+ *row_desc, &_state, nullptr);
sorter->init_profile(&_profile);
{
Block block = ColumnHelper::create_block<DataTypeTimeStampTz>(
diff --git a/be/test/vec/exec/sort/full_sort_test.cpp
b/be/test/vec/exec/sort/full_sort_test.cpp
index 00abfb4e9c0..8add8b7e9f5 100644
--- a/be/test/vec/exec/sort/full_sort_test.cpp
+++ b/be/test/vec/exec/sort/full_sort_test.cpp
@@ -73,7 +73,7 @@ struct FullSorterTest : public testing::Test {
TEST_F(FullSorterTest, test_full_sorter1) {
sorter = FullSorter::create_unique(sort_exec_exprs, -1, 0, &pool,
is_asc_order, nulls_first,
- *row_desc, nullptr, nullptr);
+ *row_desc, &_state, nullptr);
Block block1 = ColumnHelper::create_block<DataTypeInt64>({1, 2, 3, 4, 5,
6, 7, 8, 9, 10});
Block block2 = ColumnHelper::create_block<DataTypeInt64>({10, 9, 8, 7, 6,
5, 4, 3, 2, 1});
@@ -85,7 +85,7 @@ TEST_F(FullSorterTest, test_full_sorter1) {
TEST_F(FullSorterTest, test_full_sorter2) {
sorter = FullSorter::create_unique(sort_exec_exprs, -1, 0, &pool,
is_asc_order, nulls_first,
- *row_desc, nullptr, nullptr);
+ *row_desc, &_state, nullptr);
{
Block block = ColumnHelper::create_block<DataTypeInt64>({1, 2, 3, 4,
5, 6, 7, 8, 9, 10});
EXPECT_TRUE(sorter->append_block(&block).ok());
@@ -104,7 +104,7 @@ TEST_F(FullSorterTest, test_full_sorter2) {
TEST_F(FullSorterTest, test_full_sorter3) {
sorter = FullSorter::create_unique(sort_exec_exprs, 3, 3, &pool,
is_asc_order, nulls_first,
- *row_desc, nullptr, nullptr);
+ *row_desc, &_state, nullptr);
sorter->init_profile(&_profile);
{
Block block = ColumnHelper::create_block<DataTypeInt64>({1, 2, 3, 4,
5, 6, 7, 8, 9, 10});
diff --git a/be/test/vec/exec/sort/heap_sorter_test.cpp
b/be/test/vec/exec/sort/heap_sorter_test.cpp
index 14be58b1618..b2ac3dfe6cf 100644
--- a/be/test/vec/exec/sort/heap_sorter_test.cpp
+++ b/be/test/vec/exec/sort/heap_sorter_test.cpp
@@ -84,8 +84,8 @@ TEST_F(HeapSorterTest, test_topn_sorter1) {
sort_exec_exprs._sort_tuple_slot_expr_ctxs =
MockSlotRef::create_mock_contexts(data_types);
- sorter = HeapSorter::create_unique(sort_exec_exprs, 6, 0, &pool,
is_asc_order, nulls_first,
- *row_desc);
+ sorter = HeapSorter::create_unique(sort_exec_exprs, &_state, 6, 0, &pool,
is_asc_order,
+ nulls_first, *row_desc);
sorter->init_profile(&_profile);
diff --git a/be/test/vec/exec/sort/partition_sorter_test.cpp
b/be/test/vec/exec/sort/partition_sorter_test.cpp
index 9c8fad5ff47..d1c42450ef3 100644
--- a/be/test/vec/exec/sort/partition_sorter_test.cpp
+++ b/be/test/vec/exec/sort/partition_sorter_test.cpp
@@ -75,7 +75,7 @@ struct PartitionSorterTest : public testing::Test {
TEST_F(PartitionSorterTest, test_partition_sorter_read_row_num) {
sorter = PartitionSorter::create_unique(sort_exec_exprs, -1, 0, &pool,
is_asc_order,
- nulls_first, *row_desc, nullptr,
nullptr, false, 20,
+ nulls_first, *row_desc, &_state,
nullptr, false, 20,
TopNAlgorithm::ROW_NUMBER,
nullptr);
sorter->init_profile(&_profile);
{
@@ -121,7 +121,7 @@ TEST_F(PartitionSorterTest,
test_partition_sorter_DENSE_RANK) {
SortCursorCmp previous_row;
sorter = PartitionSorter::create_unique(sort_exec_exprs, -1, 0, &pool,
is_asc_order,
- nulls_first, *row_desc, nullptr,
nullptr, false, 20,
+ nulls_first, *row_desc, &_state,
nullptr, false, 20,
TopNAlgorithm::DENSE_RANK,
&previous_row);
sorter->init_profile(&_profile);
{
@@ -160,7 +160,7 @@ TEST_F(PartitionSorterTest, test_partition_sorter_RANK) {
SortCursorCmp previous_row;
sorter = PartitionSorter::create_unique(sort_exec_exprs, -1, 0, &pool,
is_asc_order,
- nulls_first, *row_desc, nullptr,
nullptr, false, 20,
+ nulls_first, *row_desc, &_state,
nullptr, false, 20,
TopNAlgorithm::RANK,
&previous_row);
sorter->init_profile(&_profile);
{
diff --git a/be/test/vec/exec/sort/sort_test.cpp
b/be/test/vec/exec/sort/sort_test.cpp
index c4388a7365a..bb00880969f 100644
--- a/be/test/vec/exec/sort/sort_test.cpp
+++ b/be/test/vec/exec/sort/sort_test.cpp
@@ -65,14 +65,15 @@ public:
switch (sort_type) {
case SortType::FULL_SORT:
sorter = FullSorter::create_unique(sort_exec_exprs, limit, offset,
&pool, is_asc_order,
- nulls_first, *row_desc,
nullptr, nullptr);
+ nulls_first, *row_desc,
&_state, nullptr);
break;
case SortType::TOPN_SORT:
sorter = TopNSorter::create_unique(sort_exec_exprs, limit, offset,
&pool, is_asc_order,
- nulls_first, *row_desc,
nullptr, nullptr);
+ nulls_first, *row_desc,
&_state, nullptr);
+ break;
case SortType::HEAP_SORT:
- sorter = HeapSorter::create_unique(sort_exec_exprs, limit, offset,
&pool, is_asc_order,
- nulls_first, *row_desc);
+ sorter = HeapSorter::create_unique(sort_exec_exprs, &_state,
limit, offset, &pool,
+ is_asc_order, nulls_first,
*row_desc);
break;
default:
break;
@@ -121,6 +122,8 @@ public:
std::vector<bool> is_asc_order {true};
std::vector<bool> nulls_first {false};
+ MockRuntimeState _state;
+
std::unique_ptr<vectorized::Sorter> sorter;
}; // class SortTestParam
@@ -196,8 +199,9 @@ TEST_F(SortTest, test_sorter) {
sort_exec_exprs._sort_tuple_slot_expr_ctxs =
MockSlotRef::create_mock_contexts(data_types);
+ MockRuntimeState _state;
sorter = FullSorter::create_unique(sort_exec_exprs, -1, 0, &pool,
is_asc_order, nulls_first,
- *row_desc, nullptr, nullptr);
+ *row_desc, &_state, nullptr);
{
Block src_block = ColumnHelper::create_block<DataTypeInt64>({4, 1, 2},
{10, 1, 3});
diff --git a/be/test/vec/exec/sort/topn_sort_test.cpp
b/be/test/vec/exec/sort/topn_sort_test.cpp
index 97e59b8962c..97c0b745afd 100644
--- a/be/test/vec/exec/sort/topn_sort_test.cpp
+++ b/be/test/vec/exec/sort/topn_sort_test.cpp
@@ -73,7 +73,7 @@ struct TopNSorterTest : public testing::Test {
TEST_F(TopNSorterTest, test_topn_sorter1) {
sorter = TopNSorter::create_unique(sort_exec_exprs, 3, 3, &pool,
is_asc_order, nulls_first,
- *row_desc, nullptr, nullptr);
+ *row_desc, &_state, nullptr);
sorter->init_profile(&_profile);
{
Block block = ColumnHelper::create_block<DataTypeInt64>({10, 9, 8, 7,
6, 5, 4, 3, 2, 1});
@@ -94,7 +94,7 @@ TEST_F(TopNSorterTest, test_topn_sorter1) {
TEST_F(TopNSorterTest, test_topn_sorter2) {
sorter = TopNSorter::create_unique(sort_exec_exprs, -1, 3, &pool,
is_asc_order, nulls_first,
- *row_desc, nullptr, nullptr);
+ *row_desc, &_state, nullptr);
sorter->init_profile(&_profile);
{
Block block = ColumnHelper::create_block<DataTypeInt64>({10, 9, 8, 7,
6, 5, 4, 3, 2, 1});
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 5ea2da385e6..11df0e53eb1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -649,6 +649,8 @@ public class SessionVariable implements Serializable,
Writable {
public static final String ENABLE_FUZZY_BLOCKABLE_TASK =
"enable_fuzzy_blockable_task";
+ public static final String ENABLE_USE_HYBRID_SORT =
"enable_use_hybrid_sort";
+
public static final String GENERATE_STATS_FACTOR = "generate_stats_factor";
public static final String HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS
@@ -2943,6 +2945,15 @@ public class SessionVariable implements Serializable,
Writable {
name = ENABLE_FUZZY_BLOCKABLE_TASK, fuzzy = true)
public boolean enableFuzzyBlockableTask = false;
+ @VariableMgr.VarAttr(
+ name = ENABLE_USE_HYBRID_SORT,
+ description = {"是否启用混合排序,动态选择 PdqSort 和 TimSort 以适应数据模式。默认为 true。",
+ "Enable hybrid sorting: dynamically selects between
PdqSort and TimSort "
+ + "based on runtime profiling to choose the most
efficient algorithm "
+ + "for the data pattern. The default value is
true."},
+ needForward = true, fuzzy = true)
+ public boolean enableUseHybridSort = true;
+
@VariableMgr.VarAttr(name = USE_MAX_LENGTH_OF_VARCHAR_IN_CTAS, needForward
= true, description = {
"在 CTAS 中,如果 CHAR / VARCHAR 列不来自于源表,是否是将这一列的长度设置为 MAX,即 65533。默认为
true。",
"In CTAS (Create Table As Select), if CHAR/VARCHAR columns do not
originate from the source table,"
@@ -3337,6 +3348,9 @@ public class SessionVariable implements Serializable,
Writable {
this.enableSpill = randomInt % 4 != 0;
this.enableForceSpill = randomInt % 3 == 0;
this.enableReserveMemory = randomInt % 5 != 0;
+
+ randomInt = random.nextInt(99);
+ this.enableUseHybridSort = randomInt % 3 != 0;
}
setFuzzyForCatalog(random);
@@ -4918,6 +4932,7 @@ public class SessionVariable implements Serializable,
Writable {
tResult.setDumpHeapProfileWhenMemLimitExceeded(dumpHeapProfileWhenMemLimitExceeded);
tResult.setDataQueueMaxBlocks(dataQueueMaxBlocks);
+ tResult.setEnableUseHybridSort(enableUseHybridSort);
tResult.setLowMemoryModeBufferLimit(lowMemoryModeBufferLimit);
tResult.setEnableSharedExchangeSinkBuffer(enableSharedExchangeSinkBuffer);
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index 4d9a7dcde2d..5622929166e 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -422,6 +422,10 @@ struct TQueryOptions {
182: optional i32 ivf_nprobe = 1;
+ // Enable hybrid sorting: dynamically selects between PdqSort and TimSort
based on
+ // runtime profiling to choose the most efficient algorithm for the data
pattern
+ 183: optional bool enable_use_hybrid_sort = false;
+
// For cloud, to control if the content would be written into file cache
// In write path, to control if the content would be written into file cache.
// In read path, read from file cache or remote storage when execute query.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]