This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new df5563971bc [Performance](opt) opt the order by performance in
permutation (#38985)
df5563971bc is described below
commit df5563971bcc9476bcab97501c325027d6729f16
Author: HappenLee <[email protected]>
AuthorDate: Thu Aug 8 10:48:53 2024 +0800
[Performance](opt) opt the order by performance in permutation (#38985)
## Proposed changes
Beforeļ¼
```
select l_quantity from lineitem order by l_quantity limit 10000020;
+--------------+
| ReturnedRows |
+--------------+
| 10000020 |
+--------------+
1 row in set (2 min 24.42 sec)
```
after:
```
mysql [tpch]>select l_quantity from lineitem order by l_quantity limit
10000020;
+--------------+
| ReturnedRows |
+--------------+
| 10000020 |
+--------------+
1 row in set (28.42 sec)
```
<!--Describe your changes.-->
---
be/src/vec/columns/column_decimal.h | 25 +++++++++++++++++--------
be/src/vec/columns/column_string.cpp | 9 ++++-----
be/src/vec/columns/column_vector.cpp | 3 ++-
3 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/be/src/vec/columns/column_decimal.h
b/be/src/vec/columns/column_decimal.h
index 24982b7504c..cc1661312a8 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -21,6 +21,7 @@
#pragma once
#include <glog/logging.h>
+#include <pdqsort.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
@@ -269,14 +270,22 @@ protected:
for (U i = 0; i < s; ++i) res[i] = i;
auto sort_end = res.end();
- if (limit && limit < s) sort_end = res.begin() + limit;
-
- if (reverse)
- std::partial_sort(res.begin(), sort_end, res.end(),
- [this](size_t a, size_t b) { return data[a] >
data[b]; });
- else
- std::partial_sort(res.begin(), sort_end, res.end(),
- [this](size_t a, size_t b) { return data[a] <
data[b]; });
+ if (limit && limit < s / 8.0) {
+ sort_end = res.begin() + limit;
+ if (reverse)
+ std::partial_sort(res.begin(), sort_end, res.end(),
+ [this](size_t a, size_t b) { return data[a]
> data[b]; });
+ else
+ std::partial_sort(res.begin(), sort_end, res.end(),
+ [this](size_t a, size_t b) { return data[a]
< data[b]; });
+ } else {
+ if (reverse)
+ pdqsort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] > data[b];
});
+ else
+ pdqsort(res.begin(), res.end(),
+ [this](size_t a, size_t b) { return data[a] < data[b];
});
+ }
}
void ALWAYS_INLINE decimalv2_do_crc(size_t i, uint32_t& hash) const {
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index db0902d15a1..952a1a97915 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -483,9 +483,8 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t
limit, int /*nan_directi
res[i] = i;
}
- if (limit >= s) {
- limit = 0;
- }
+ // std::partial_sort need limit << s can get performance benefit
+ if (limit > (s / 8.0)) limit = 0;
if (limit) {
if (reverse) {
@@ -495,9 +494,9 @@ void ColumnStr<T>::get_permutation(bool reverse, size_t
limit, int /*nan_directi
}
} else {
if (reverse) {
- std::sort(res.begin(), res.end(), less<false>(*this));
+ pdqsort(res.begin(), res.end(), less<false>(*this));
} else {
- std::sort(res.begin(), res.end(), less<true>(*this));
+ pdqsort(res.begin(), res.end(), less<true>(*this));
}
}
}
diff --git a/be/src/vec/columns/column_vector.cpp
b/be/src/vec/columns/column_vector.cpp
index ff7ab99d5de..f8d05c3d492 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -255,7 +255,8 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t
limit, int nan_direct
if (s == 0) return;
- if (limit >= s) limit = 0;
+ // std::partial_sort need limit << s can get performance benefit
+ if (limit > (s / 8.0)) limit = 0;
if (limit) {
for (size_t i = 0; i < s; ++i) res[i] = i;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]