This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 0fe67a8e763 [Performance](opt) opt the order by performance in
permutation (#39092)
0fe67a8e763 is described below
commit 0fe67a8e76397b120427f8105f19da23f387d724
Author: HappenLee <[email protected]>
AuthorDate: Sat Aug 10 19:37:30 2024 +0800
[Performance](opt) opt the order by performance in permutation (#39092)
Issue Number: cherry pick #38985
---
be/src/vec/columns/column_decimal.h | 25 +++++++++++++++++--------
be/src/vec/columns/column_string.cpp | 9 ++++-----
be/src/vec/columns/column_vector.cpp | 3 ++-
3 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/be/src/vec/columns/column_decimal.h
b/be/src/vec/columns/column_decimal.h
index 8d10fb806e4..26ec505e426 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -21,6 +21,7 @@
#pragma once
#include <glog/logging.h>
+#include <pdqsort.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
@@ -294,14 +295,22 @@ protected:
for (U i = 0; i < s; ++i) res[i] = i;
auto sort_end = res.end();
- if (limit && limit < s) sort_end = res.begin() + limit;
-
- if (reverse)
- std::partial_sort(res.begin(), sort_end, res.end(),
- [this](size_t a, size_t b) { return data[a] >
data[b]; });
- else
- std::partial_sort(res.begin(), sort_end, res.end(),
- [this](size_t a, size_t b) { return data[a] <
data[b]; });
+ if (limit && limit < s / 8.0) {
+ sort_end = res.begin() + limit;
+ if (reverse)
+ std::partial_sort(res.begin(), sort_end, res.end(),
+ [this](size_t a, size_t b) { return data[a]
> data[b]; });
+ else
+ std::partial_sort(res.begin(), sort_end, res.end(),
+ [this](size_t a, size_t b) { return data[a]
< data[b]; });
+ } else {
+ if (reverse)
+ pdqsort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] > data[b];
});
+ else
+ pdqsort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] < data[b];
});
+ }
}
void ALWAYS_INLINE decimalv2_do_crc(size_t i, uint64_t& hash) const {
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index 5d2670acb78..e5f900f62a0 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -381,9 +381,8 @@ void ColumnString::get_permutation(bool reverse, size_t
limit, int /*nan_directi
res[i] = i;
}
- if (limit >= s) {
- limit = 0;
- }
+ // std::partial_sort need limit << s can get performance benefit
+ if (limit > (s / 8.0)) limit = 0;
if (limit) {
if (reverse) {
@@ -393,9 +392,9 @@ void ColumnString::get_permutation(bool reverse, size_t
limit, int /*nan_directi
}
} else {
if (reverse) {
- std::sort(res.begin(), res.end(), less<false>(*this));
+ pdqsort(res.begin(), res.end(), less<false>(*this));
} else {
- std::sort(res.begin(), res.end(), less<true>(*this));
+ pdqsort(res.begin(), res.end(), less<true>(*this));
}
}
}
diff --git a/be/src/vec/columns/column_vector.cpp
b/be/src/vec/columns/column_vector.cpp
index 1c96f4f2e6c..c12b14dd57e 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -245,7 +245,8 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t
limit, int nan_direct
if (s == 0) return;
- if (limit >= s) limit = 0;
+ // std::partial_sort need limit << s can get performance benefit
+ if (limit > (s / 8.0)) limit = 0;
if (limit) {
for (size_t i = 0; i < s; ++i) res[i] = i;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]