This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene-2.0 by this push:
new e914a906 [opt](write)use more efficient string_compare from doris
(#155) (#156)
e914a906 is described below
commit e914a90639d7abb8ccf619122b76c6436112291a
Author: qiye <[email protected]>
AuthorDate: Tue Dec 19 19:49:42 2023 +0800
[opt](write)use more efficient string_compare from doris (#155) (#156)
pick from #155
---
src/core/CLucene/index/SDocumentWriter.cpp | 28 +++++--------
src/core/CLucene/util/SSEUtil.h | 63 ++++++++++++++++++++++++++++++
src/core/CLucene/util/stringUtil.h | 39 ++++++++++++++++++
3 files changed, 112 insertions(+), 18 deletions(-)
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp
b/src/core/CLucene/index/SDocumentWriter.cpp
index 446c0dc3..3c33a30b 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -736,25 +736,17 @@ void SDocumentsWriter<T>::ThreadState::resetPostings() {
template<typename T>
int32_t SDocumentsWriter<T>::ThreadState::comparePostings(Posting *p1, Posting
*p2) {
- const T *pos1 = scharPool->buffers[p1->textStart >> CHAR_BLOCK_SHIFT] +
(p1->textStart & CHAR_BLOCK_MASK);
- const T *pos2 = scharPool->buffers[p2->textStart >> CHAR_BLOCK_SHIFT] +
(p2->textStart & CHAR_BLOCK_MASK);
- while (true) {
- const auto c1 = static_cast<typename
std::make_unsigned<T>::type>(*pos1++);
- const auto c2 = static_cast<typename
std::make_unsigned<T>::type>(*pos2++);
- if (c1 < c2)
- if (CLUCENE_END_OF_WORD == c2)
- return 1;
- else
- return -1;
- else if (c2 < c1)
- if (CLUCENE_END_OF_WORD == c1)
- return -1;
- else
- return 1;
- else if (CLUCENE_END_OF_WORD == c1)
- return 0;
- }
+ if constexpr (std::is_same_v<T, char>) {
+ auto n1 = p1->term_.size();
+ auto n2 = p2->term_.size();
+ auto min = std::min(n1, n2);
+ auto s1 = p1->term_.data();
+ auto s2 = p2->term_.data();
+ return StringUtil::string_compare(s1, n1, s2, n2, min);
+ } else {
+ return p1->term_.compare(p2->term_);
}
+}
template<typename T>
void SDocumentsWriter<T>::ThreadState::quickSort(Posting **postings, int32_t
lo, int32_t hi) {
diff --git a/src/core/CLucene/util/SSEUtil.h b/src/core/CLucene/util/SSEUtil.h
new file mode 100644
index 00000000..fd3109af
--- /dev/null
+++ b/src/core/CLucene/util/SSEUtil.h
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/sse-util.hpp
+// and modified by Doris
+
+#pragma once
+
+#if defined(__aarch64__)
+#include <sse2neon.h> // IWYU pragma: export
+#elif defined(__x86_64__)
+#include <emmintrin.h> // IWYU pragma: export
+#include <immintrin.h> // IWYU pragma: export
+#include <mm_malloc.h> // IWYU pragma: export
+#include <smmintrin.h> // IWYU pragma: export
+#endif
+
+// This class contains constants useful for text processing with SSE4.2
+// intrinsics.
+namespace sse_util {
+// Number of characters that fit in 64/128 bit register.
+// SSE provides instructions for loading 64 or 128 bits into a register
+// at a time.
+static const int CHARS_PER_64_BIT_REGISTER = 8;
+static const int CHARS_PER_128_BIT_REGISTER = 16;
+
+// SSE4.2 adds instructions for textprocessing. The instructions accept
+// a flag to control what text operation to do.
+// - SIDD_CMP_EQUAL_ANY ~ strchr
+// - SIDD_CMP_EQUAL_EACH ~ strcmp
+// - SIDD_UBYTE_OPS - 8 bit chars (as opposed to 16 bit)
+// - SIDD_NEGATIVE_POLARITY - toggles whether to set result to 1 or 0 when a
+// match is found.
+
+// In this mode, sse text processing functions will return a mask of all the
characters that
+// matched
+static const int STRCHR_MODE = _SIDD_CMP_EQUAL_ANY | _SIDD_UBYTE_OPS;
+
+// In this mode, sse text processing functions will return the number of bytes
that match
+// consecutively from the beginning.
+static const int STRCMP_MODE = _SIDD_CMP_EQUAL_EACH | _SIDD_UBYTE_OPS |
_SIDD_NEGATIVE_POLARITY;
+
+// Precomputed mask values up to 16 bits.
+static const int SSE_BITMASK[CHARS_PER_128_BIT_REGISTER] = {
+ 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7,
+ 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15,
+};
+
+} // namespace sse_util
diff --git a/src/core/CLucene/util/stringUtil.h
b/src/core/CLucene/util/stringUtil.h
index 8e8ca1e9..7d97e735 100644
--- a/src/core/CLucene/util/stringUtil.h
+++ b/src/core/CLucene/util/stringUtil.h
@@ -12,6 +12,7 @@
#endif
#include <cstring>
+#include "SSEUtil.h"
template <typename T>
const T* LUCENE_BLANK_SSTRING();
@@ -204,6 +205,44 @@ public:
return compareSSE2(p1 + size - 16, p2 + size - 16);
}
+ // Compare two strings using sse4.2 intrinsics if they are available. This
code assumes
+ // that the trivial cases are already handled (i.e. one string is empty).
+ // Returns:
+ // < 0 if s1 < s2
+ // 0 if s1 == s2
+ // > 0 if s1 > s2
+ // The SSE code path is just under 2x faster than the non-sse code path.
+ // - s1/n1: ptr/len for the first string
+ // - s2/n2: ptr/len for the second string
+ // - len: min(n1, n2) - this can be more cheaply passed in by the caller
+ static inline int string_compare(const char* s1, int64_t n1, const char*
s2, int64_t n2, int64_t len) {
+ assert(len == std::min(n1, n2));
+ #if defined(__SSE4_2__) || defined(__aarch64__)
+ while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) {
+ __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const
__m128i*>(s1));
+ __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const
__m128i*>(s2));
+ int chars_match = _mm_cmpestri(xmm0,
sse_util::CHARS_PER_128_BIT_REGISTER, xmm1,
+ sse_util::CHARS_PER_128_BIT_REGISTER,
sse_util::STRCMP_MODE);
+ if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) {
+ return (unsigned char)s1[chars_match] - (unsigned
char)s2[chars_match];
+ }
+ len -= sse_util::CHARS_PER_128_BIT_REGISTER;
+ s1 += sse_util::CHARS_PER_128_BIT_REGISTER;
+ s2 += sse_util::CHARS_PER_128_BIT_REGISTER;
+ }
+ #endif
+ unsigned char u1, u2;
+ while (len-- > 0) {
+ u1 = (unsigned char)*s1++;
+ u2 = (unsigned char)*s2++;
+ if (u1 != u2) {
+ return u1 - u2;
+ }
+ }
+
+ return n1 - n2;
+ }
+
#endif
static inline int32_t utf8_byte_count(uint8_t c) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]