This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new a4e7c76 [Enhancement] use std::search to replace custom search (#7999)
a4e7c76 is described below
commit a4e7c76336e7f7ebfa5449dc26de461b601c5a49
Author: Pxl <[email protected]>
AuthorDate: Fri Feb 11 10:47:58 2022 +0800
[Enhancement] use std::search to replace custom search (#7999)
---
be/src/runtime/string_search.hpp | 95 ++++------------------------------------
1 file changed, 9 insertions(+), 86 deletions(-)
diff --git a/be/src/runtime/string_search.hpp b/be/src/runtime/string_search.hpp
index 2525321..e35905b 100644
--- a/be/src/runtime/string_search.hpp
+++ b/be/src/runtime/string_search.hpp
@@ -51,10 +51,10 @@
// agrees to be bound by the terms and conditions of this License
// Agreement.
-#ifndef DORIS_BE_SRC_QUERY_BE_RUNTIME_STRING_SEARCH_H
-#define DORIS_BE_SRC_QUERY_BE_RUNTIME_STRING_SEARCH_H
+#pragma once
#include <cstring>
+#include <functional>
#include <vector>
#include "common/logging.h"
@@ -62,109 +62,32 @@
namespace doris {
-// TODO: This can be sped up with SIDD_CMP_EQUAL_ORDERED or at the very least
rewritten
-// from published algorithms.
class StringSearch {
public:
virtual ~StringSearch() {}
- StringSearch() : _pattern(NULL), _mask(0) {}
+ StringSearch() : _pattern(nullptr) {}
- // Initialize/Precompute a StringSearch object from the pattern
- StringSearch(const StringValue* pattern) : _pattern(pattern), _mask(0),
_skip(0) {
- // Special cases
- if (_pattern->len <= 1) {
- return;
- }
-
- // Build compressed lookup table
- int mlast = _pattern->len - 1;
- _skip = mlast - 1;
-
- for (int i = 0; i < mlast; ++i) {
- bloom_add(_pattern->ptr[i]);
-
- if (_pattern->ptr[i] == _pattern->ptr[mlast]) {
- _skip = mlast - i - 1;
- }
- }
-
- bloom_add(_pattern->ptr[mlast]);
- }
+ StringSearch(const StringValue* pattern) : _pattern(pattern) {}
// search for this pattern in str.
// Returns the offset into str if the pattern exists
// Returns -1 if the pattern is not found
int search(const StringValue* str) const {
- // Special cases
if (!str || !_pattern || _pattern->len == 0) {
return -1;
}
- int mlast = _pattern->len - 1;
- int w = str->len - _pattern->len;
- int n = str->len;
- int m = _pattern->len;
- const char* s = str->ptr;
- const char* p = _pattern->ptr;
-
- // Special case if pattern->len == 1
- if (m == 1) {
- const char* result = reinterpret_cast<const char*>(memchr(s, p[0],
n));
-
- if (result != NULL) {
- return result - s;
- }
-
+ auto it = std::search(str->ptr, str->ptr + str->len,
+ std::default_searcher(_pattern->ptr,
_pattern->ptr + _pattern->len));
+ if (it == str->ptr + str->len) {
return -1;
+ } else {
+ return it - str->ptr;
}
-
- // General case.
- int j;
- // TODO: the original code seems to have an off by one error. It is
possible
- // to index at w + m which is the length of the input string. Checks
have
- // been added to make sure that w + m < str->len.
- for (int i = 0; i <= w; i++) {
- // note: using mlast in the skip path slows things down on x86
- if (s[i + m - 1] == p[m - 1]) {
- // candidate match
- for (j = 0; j < mlast; j++)
- if (s[i + j] != p[j]) {
- break;
- }
-
- if (j == mlast) {
- return i;
- }
-
- // miss: check if next character is part of pattern
- if (i + m < n && !bloom_query(s[i + m])) {
- i = i + m;
- } else {
- i = i + _skip;
- }
- } else {
- // skip: check if next character is part of pattern
- if (i + m < n && !bloom_query(s[i + m])) {
- i = i + m;
- }
- }
- }
-
- return -1;
}
private:
- static const int BLOOM_WIDTH = 64;
-
- void bloom_add(char c) { _mask |= (1UL << (c & (BLOOM_WIDTH - 1))); }
-
- bool bloom_query(char c) const { return _mask & (1UL << (c & (BLOOM_WIDTH
- 1))); }
-
const StringValue* _pattern;
- int64_t _mask;
- int64_t _skip;
};
} // namespace doris
-
-#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]