This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene-2.0 by this push:
     new e914a906 [opt](write)use more efficient string_compare from doris 
(#155) (#156)
e914a906 is described below

commit e914a90639d7abb8ccf619122b76c6436112291a
Author: qiye <[email protected]>
AuthorDate: Tue Dec 19 19:49:42 2023 +0800

    [opt](write)use more efficient string_compare from doris (#155) (#156)
    
    pick from #155
---
 src/core/CLucene/index/SDocumentWriter.cpp | 28 +++++--------
 src/core/CLucene/util/SSEUtil.h            | 63 ++++++++++++++++++++++++++++++
 src/core/CLucene/util/stringUtil.h         | 39 ++++++++++++++++++
 3 files changed, 112 insertions(+), 18 deletions(-)

diff --git a/src/core/CLucene/index/SDocumentWriter.cpp 
b/src/core/CLucene/index/SDocumentWriter.cpp
index 446c0dc3..3c33a30b 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -736,25 +736,17 @@ void SDocumentsWriter<T>::ThreadState::resetPostings() {
 
 template<typename T>
 int32_t SDocumentsWriter<T>::ThreadState::comparePostings(Posting *p1, Posting 
*p2) {
-    const T *pos1 = scharPool->buffers[p1->textStart >> CHAR_BLOCK_SHIFT] + 
(p1->textStart & CHAR_BLOCK_MASK);
-    const T *pos2 = scharPool->buffers[p2->textStart >> CHAR_BLOCK_SHIFT] + 
(p2->textStart & CHAR_BLOCK_MASK);
-    while (true) {
-        const auto c1 = static_cast<typename 
std::make_unsigned<T>::type>(*pos1++);
-        const auto c2 = static_cast<typename 
std::make_unsigned<T>::type>(*pos2++);
-        if (c1 < c2)
-            if (CLUCENE_END_OF_WORD == c2)
-                return 1;
-            else
-                return -1;
-        else if (c2 < c1)
-            if (CLUCENE_END_OF_WORD == c1)
-                return -1;
-            else
-                return 1;
-        else if (CLUCENE_END_OF_WORD == c1)
-            return 0;
-        }
+    if constexpr (std::is_same_v<T, char>) {
+        auto n1 = p1->term_.size();
+        auto n2 = p2->term_.size();
+        auto min = std::min(n1, n2);
+        auto s1 = p1->term_.data();
+        auto s2 = p2->term_.data();
+        return StringUtil::string_compare(s1, n1, s2, n2, min);
+    } else {
+        return p1->term_.compare(p2->term_);
     }
+}
 
 template<typename T>
 void SDocumentsWriter<T>::ThreadState::quickSort(Posting **postings, int32_t 
lo, int32_t hi) {
diff --git a/src/core/CLucene/util/SSEUtil.h b/src/core/CLucene/util/SSEUtil.h
new file mode 100644
index 00000000..fd3109af
--- /dev/null
+++ b/src/core/CLucene/util/SSEUtil.h
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/sse-util.hpp
+// and modified by Doris
+
+#pragma once
+
+#if defined(__aarch64__)
+#include <sse2neon.h> // IWYU pragma: export
+#elif defined(__x86_64__)
+#include <emmintrin.h> // IWYU pragma: export
+#include <immintrin.h> // IWYU pragma: export
+#include <mm_malloc.h> // IWYU pragma: export
+#include <smmintrin.h> // IWYU pragma: export
+#endif
+
+// This class contains constants useful for text processing with SSE4.2
+// intrinsics.
+namespace sse_util {
+// Number of characters that fit in 64/128 bit register.
+// SSE provides instructions for loading 64 or 128 bits into a register
+// at a time.
+static const int CHARS_PER_64_BIT_REGISTER = 8;
+static const int CHARS_PER_128_BIT_REGISTER = 16;
+
+// SSE4.2 adds instructions for textprocessing.  The instructions accept
+// a flag to control what text operation to do.
+//   - SIDD_CMP_EQUAL_ANY ~ strchr
+//   - SIDD_CMP_EQUAL_EACH ~ strcmp
+//   - SIDD_UBYTE_OPS - 8 bit chars (as opposed to 16 bit)
+//   - SIDD_NEGATIVE_POLARITY - toggles whether to set result to 1 or 0 when a
+//     match is found.
+
+// In this mode, sse text processing functions will return a mask of all the 
characters that
+// matched
+static const int STRCHR_MODE = _SIDD_CMP_EQUAL_ANY | _SIDD_UBYTE_OPS;
+
+// In this mode, sse text processing functions will return the number of bytes 
that match
+// consecutively from the beginning.
+static const int STRCMP_MODE = _SIDD_CMP_EQUAL_EACH | _SIDD_UBYTE_OPS | 
_SIDD_NEGATIVE_POLARITY;
+
+// Precomputed mask values up to 16 bits.
+static const int SSE_BITMASK[CHARS_PER_128_BIT_REGISTER] = {
+        1 << 0, 1 << 1, 1 << 2,  1 << 3,  1 << 4,  1 << 5,  1 << 6,  1 << 7,
+        1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15,
+};
+
+} // namespace sse_util
diff --git a/src/core/CLucene/util/stringUtil.h 
b/src/core/CLucene/util/stringUtil.h
index 8e8ca1e9..7d97e735 100644
--- a/src/core/CLucene/util/stringUtil.h
+++ b/src/core/CLucene/util/stringUtil.h
@@ -12,6 +12,7 @@
 #endif
 
 #include <cstring>
+#include "SSEUtil.h"
 
 template <typename T>
 const T* LUCENE_BLANK_SSTRING();
@@ -204,6 +205,44 @@ public:
         return compareSSE2(p1 + size - 16, p2 + size - 16);
     }
 
+    // Compare two strings using sse4.2 intrinsics if they are available. This 
code assumes
+    // that the trivial cases are already handled (i.e. one string is empty).
+    // Returns:
+    //   < 0 if s1 < s2
+    //   0 if s1 == s2
+    //   > 0 if s1 > s2
+    // The SSE code path is just under 2x faster than the non-sse code path.
+    //   - s1/n1: ptr/len for the first string
+    //   - s2/n2: ptr/len for the second string
+    //   - len: min(n1, n2) - this can be more cheaply passed in by the caller
+    static inline int string_compare(const char* s1, int64_t n1, const char* 
s2, int64_t n2, int64_t len) {
+        assert(len == std::min(n1, n2));
+    #if defined(__SSE4_2__) || defined(__aarch64__)
+        while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) {
+            __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const 
__m128i*>(s1));
+            __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const 
__m128i*>(s2));
+            int chars_match = _mm_cmpestri(xmm0, 
sse_util::CHARS_PER_128_BIT_REGISTER, xmm1,
+                                        sse_util::CHARS_PER_128_BIT_REGISTER, 
sse_util::STRCMP_MODE);
+            if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) {
+                return (unsigned char)s1[chars_match] - (unsigned 
char)s2[chars_match];
+            }
+            len -= sse_util::CHARS_PER_128_BIT_REGISTER;
+            s1 += sse_util::CHARS_PER_128_BIT_REGISTER;
+            s2 += sse_util::CHARS_PER_128_BIT_REGISTER;
+        }
+    #endif
+        unsigned char u1, u2;
+        while (len-- > 0) {
+            u1 = (unsigned char)*s1++;
+            u2 = (unsigned char)*s2++;
+            if (u1 != u2) {
+                return u1 - u2;
+            }
+        }
+
+        return n1 - n2;
+    }
+
 #endif
 
     static inline int32_t utf8_byte_count(uint8_t c) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to