This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b627088e8c [Optimization](String) Optimize q20 q21 q22 q23
LIKE_SUBSTRING (like '%xxx%') (#18309)
b627088e8c is described below
commit b627088e8c89da2e8cdacb439f0608df4ee734df
Author: ZhangYu0123 <[email protected]>
AuthorDate: Mon Apr 3 18:09:15 2023 +0800
[Optimization](String) Optimize q20 q21 q22 q23 LIKE_SUBSTRING (like
'%xxx%') (#18309)
Optimize q20, q21, q22, q23 LIKE_SUBSTRING (like '%xxxx%'). Idea is from
clickhouse stringsearcher:
Stringsearcher is about 10%~20% faster than volnitsky algorithm when needle
size is less than 10 using two chars at beginning search in SIMD .
Stringsearcher is faster than volnitsky algorithm, when needle size is less
than 21.
The changes are as follows:
Using first two chars of needle at beginning search. We can compare two
chars of needle and [n:n+17) chars in haystack in SIMD in one loop. Filter
efficiency will be higher.
When env support SIMD, we use stringsearcher.
Test result in clickbench:
q20 is about 15% up.
q20: SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
q21, q22 is about 1%~5% up.
q21: SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE
'%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT
10;
q22: SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c,
COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE
'%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT
10;
q23 is about 30%~40% up and not stable.
q23: SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT
10;
---
be/src/vec/common/string_searcher.h | 68 +++++++++++++++++++++++++++++++------
be/src/vec/common/volnitsky.h | 4 +++
2 files changed, 61 insertions(+), 11 deletions(-)
diff --git a/be/src/vec/common/string_searcher.h
b/be/src/vec/common/string_searcher.h
index 895ba538a9..19fd3d2c86 100644
--- a/be/src/vec/common/string_searcher.h
+++ b/be/src/vec/common/string_searcher.h
@@ -77,8 +77,10 @@ private:
uint8_t first {};
#ifdef __SSE4_1__
- /// vector filled `first` for determining leftmost position of the first
symbol
- __m128i pattern;
+ uint8_t second {};
+ /// vector filled `first` or `second` for determining leftmost position of
the first and second symbols
+ __m128i first_pattern;
+ __m128i second_pattern;
/// vector of first 16 characters of `needle`
__m128i cache = _mm_setzero_si128();
int cachemask {};
@@ -95,8 +97,11 @@ public:
first = *needle;
#ifdef __SSE4_1__
- pattern = _mm_set1_epi8(first);
-
+ first_pattern = _mm_set1_epi8(first);
+ if (needle + 1 < needle_end) {
+ second = *(needle + 1);
+ second_pattern = _mm_set1_epi8(second);
+ }
const auto* needle_pos = needle;
//for (const auto i : collections::range(0, n))
@@ -155,16 +160,57 @@ public:
const CharT* search(const CharT* haystack, const CharT* const
haystack_end) const {
if (needle == needle_end) return haystack;
- while (haystack < haystack_end) {
+ const auto needle_size = needle_end - needle;
#ifdef __SSE4_1__
- if (haystack + n <= haystack_end && page_safe(haystack)) {
- /// find first character
- const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const
__m128i*>(haystack));
- const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack,
pattern);
+ /// Here is the quick path when needle_size is 1.
+ if (needle_size == 1) {
+ while (haystack < haystack_end) {
+ if (haystack + n <= haystack_end && page_safe(haystack)) {
+ const auto v_haystack =
+ _mm_loadu_si128(reinterpret_cast<const
__m128i*>(haystack));
+ const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack,
first_pattern);
+ const auto mask = _mm_movemask_epi8(v_against_pattern);
+ if (mask == 0) {
+ haystack += n;
+ continue;
+ }
+
+ const auto offset = __builtin_ctz(mask);
+ haystack += offset;
+
+ return haystack;
+ }
- const auto mask = _mm_movemask_epi8(v_against_pattern);
+ if (haystack == haystack_end) {
+ return haystack_end;
+ }
- /// first character not present in 16 octets starting at
`haystack`
+ if (*haystack == first) {
+ return haystack;
+ }
+ ++haystack;
+ }
+ return haystack_end;
+ }
+#endif
+
+ while (haystack < haystack_end && haystack_end - haystack >=
needle_size) {
+#ifdef __SSE4_1__
+ if ((haystack + 1 + n) <= haystack_end && page_safe(haystack)) {
+ /// find first and second characters
+ const auto v_haystack_block_first =
+ _mm_loadu_si128(reinterpret_cast<const
__m128i*>(haystack));
+ const auto v_haystack_block_second =
+ _mm_loadu_si128(reinterpret_cast<const
__m128i*>(haystack + 1));
+
+ const auto v_against_pattern_first =
+ _mm_cmpeq_epi8(v_haystack_block_first, first_pattern);
+ const auto v_against_pattern_second =
+ _mm_cmpeq_epi8(v_haystack_block_second,
second_pattern);
+
+ const auto mask = _mm_movemask_epi8(
+ _mm_and_si128(v_against_pattern_first,
v_against_pattern_second));
+ /// first and second characters not present in 16 octets
starting at `haystack`
if (mask == 0) {
haystack += n;
continue;
diff --git a/be/src/vec/common/volnitsky.h b/be/src/vec/common/volnitsky.h
index 91daeffc83..c3d762e277 100644
--- a/be/src/vec/common/volnitsky.h
+++ b/be/src/vec/common/volnitsky.h
@@ -203,6 +203,10 @@ public:
const auto* haystack_end = haystack + haystack_size;
+#ifdef __SSE4_1__
+ return fallback_searcher.search(haystack, haystack_end);
+#endif
+
if (fallback || haystack_size <= needle_size ||
fallback_searcher.force_fallback)
return fallback_searcher.search(haystack, haystack_end);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]