This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 564d3cd647f [Performance](opt) opt the order by performance in
permutation (#39089)
564d3cd647f is described below
commit 564d3cd647fda28001d258ad06372d88ab2b3d1d
Author: HappenLee <[email protected]>
AuthorDate: Sat Aug 24 16:05:46 2024 +0800
[Performance](opt) opt the order by performance in permutation (#39089)
## Proposed changes
Issue Number: cherry pick #38985
<!--Describe your changes.-->
---
be/src/vec/columns/column_decimal.h | 25 +++++++++++++++++--------
be/src/vec/columns/column_string.cpp | 9 ++++-----
be/src/vec/columns/column_vector.cpp | 3 ++-
3 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/be/src/vec/columns/column_decimal.h
b/be/src/vec/columns/column_decimal.h
index 7f286699ab8..0922c6a0bf1 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -21,6 +21,7 @@
#pragma once
#include <glog/logging.h>
+#include <pdqsort.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
@@ -286,14 +287,22 @@ protected:
for (U i = 0; i < s; ++i) res[i] = i;
auto sort_end = res.end();
- if (limit && limit < s) sort_end = res.begin() + limit;
-
- if (reverse)
- std::partial_sort(res.begin(), sort_end, res.end(),
- [this](size_t a, size_t b) { return data[a] >
data[b]; });
- else
- std::partial_sort(res.begin(), sort_end, res.end(),
- [this](size_t a, size_t b) { return data[a] <
data[b]; });
+ if (limit && limit < s / 8.0) {
+ sort_end = res.begin() + limit;
+ if (reverse)
+ std::partial_sort(res.begin(), sort_end, res.end(),
+ [this](size_t a, size_t b) { return data[a]
> data[b]; });
+ else
+ std::partial_sort(res.begin(), sort_end, res.end(),
+ [this](size_t a, size_t b) { return data[a]
< data[b]; });
+ } else {
+ if (reverse)
+ pdqsort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] > data[b];
});
+ else
+ pdqsort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] < data[b];
});
+ }
}
void ALWAYS_INLINE decimalv2_do_crc(size_t i, uint32_t& hash) const {
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index 614425dfa13..3799bab34f6 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -494,9 +494,8 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t
limit, int /*nan_directi
res[i] = i;
}
- if (limit >= s) {
- limit = 0;
- }
+ // std::partial_sort need limit << s can get performance benefit
+ if (limit > (s / 8.0)) limit = 0;
if (limit) {
if (reverse) {
@@ -506,9 +505,9 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t
limit, int /*nan_directi
}
} else {
if (reverse) {
- std::sort(res.begin(), res.end(), less<false>(*this));
+ pdqsort(res.begin(), res.end(), less<false>(*this));
} else {
- std::sort(res.begin(), res.end(), less<true>(*this));
+ pdqsort(res.begin(), res.end(), less<true>(*this));
}
}
}
diff --git a/be/src/vec/columns/column_vector.cpp
b/be/src/vec/columns/column_vector.cpp
index 388b73dcfb9..879b750de1d 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -240,7 +240,8 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t
limit, int nan_direct
if (s == 0) return;
- if (limit >= s) limit = 0;
+ // std::partial_sort need limit << s can get performance benefit
+ if (limit > (s / 8.0)) limit = 0;
if (limit) {
for (size_t i = 0; i < s; ++i) res[i] = i;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]