(doris) branch master updated: [opt](reader) Use memchr to speed up BaseTextLineReaderContext.find_lf_crlf_line_sep function (#54957)

zclll Sat, 23 Aug 2025 12:21:35 -0700

This is an automated email from the ASF dual-hosted git repository.

zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new d01dcaa6bd5 [opt](reader) Use memchr to speed up 
BaseTextLineReaderContext.find_lf_crlf_line_sep function (#54957)
d01dcaa6bd5 is described below

commit d01dcaa6bd5e955fbd84dadca4b4e720f27d2f31
Author: Zhiguo Wu <[email protected]>
AuthorDate: Sun Aug 24 03:20:19 2025 +0800

    [opt](reader) Use memchr to speed up 
BaseTextLineReaderContext.find_lf_crlf_line_sep function (#54957)
    
    ### What problem does this PR solve?
    
    Use memchr to speed up BaseTextLineReaderContext.find_lf_crlf_line_sep
    function.
    
    ---------
    
    Co-authored-by: Zhiguo Wu <[email protected]>
---
 be/benchmark/benchmark_plain_text_line_reader.hpp  | 103 +++++++++++++++++++++
 .../file_reader/new_plain_text_line_reader.h       |  59 ++----------
 run-be-ut.sh                                       |   5 +
 3 files changed, 115 insertions(+), 52 deletions(-)

diff --git a/be/benchmark/benchmark_plain_text_line_reader.hpp 
b/be/benchmark/benchmark_plain_text_line_reader.hpp
new file mode 100644
index 00000000000..e70778d11f7
--- /dev/null
+++ b/be/benchmark/benchmark_plain_text_line_reader.hpp
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <benchmark/benchmark.h>
+#include <string>
+#include <vector>
+
+#include "vec/exec/format/file_reader/new_plain_text_line_reader.h"
+
+namespace doris {
+
+static std::string create_test_data(size_t length, const std::string& 
delimiter = "", char fill_char = 'a') {
+    return std::string(length, fill_char) + delimiter;
+}
+
+static void BM_FindLfCrlfLineSep(benchmark::State& state) {
+    size_t data_size = state.range(0);
+    size_t delimiter_type = state.range(1);
+    
+    std::string test_data;
+    switch(delimiter_type) {
+        case 0: // No delimiter
+            test_data = create_test_data(data_size);
+            break;
+        case 1: // Delimiter is \n
+            test_data = create_test_data(data_size, "\n");
+            break;
+        case 2: // Delimiter is \r\n
+            test_data = create_test_data(data_size, "\r\n");
+            break;
+        default:
+            test_data = create_test_data(data_size);
+            break;
+    }
+
+    PlainTextLineReaderCtx ctx("\n", 1, false);
+    const auto* data = reinterpret_cast<const uint8_t*>(test_data.c_str());
+    const size_t size = test_data.size();
+    
+    for (auto _ : state) {
+        const auto* result = ctx.find_lf_crlf_line_sep(data, size);
+        benchmark::DoNotOptimize(result);
+    }
+    
+    state.SetBytesProcessed(state.iterations() * test_data.size());
+    
+    std::string label = "size_" + std::to_string(data_size);
+    switch (delimiter_type) {
+        case 0: label += "_delim_no"; break;
+        case 1: label += "_delim_lf"; break;
+        case 2: label += "_delim_crlf"; break;
+        default: label += "_delim_no"; break;
+    }
+    state.SetLabel(label);
+}
+
+BENCHMARK(BM_FindLfCrlfLineSep)
+    ->Unit(benchmark::kNanosecond)
+    ->Args({16, 0}) // 16 bytes, no delimiter
+    ->Args({16, 1}) // 16 bytes, delimiter is \n
+    ->Args({16, 2}) // 16 bytes, delimiter is \r\n
+    ->Args({32, 0}) // 32 bytes, no delimiter
+    ->Args({32, 1}) // 32 bytes, delimiter is \n
+    ->Args({32, 2}) // 32 bytes, delimiter is \r\n
+    ->Args({64, 0}) // 64 bytes, no delimiter
+    ->Args({64, 1}) // 64 bytes, delimiter is \n
+    ->Args({64, 2}) // 64 bytes, delimiter is \r\n
+    ->Args({128, 0}) // 128 bytes, no delimiter
+    ->Args({128, 1}) // 128 bytes, delimiter is \n
+    ->Args({128, 2}) // 128 bytes, delimiter is \r\n
+    ->Args({256, 0}) // 256 bytes, no delimiter
+    ->Args({256, 1}) // 256 bytes, delimiter is \n
+    ->Args({256, 2}) // 256 bytes, delimiter is \r\n
+    ->Args({512, 0}) // 512 bytes, no delimiter
+    ->Args({512, 1}) // 512 bytes, delimiter is \n
+    ->Args({512, 2}) // 512 bytes, delimiter is \r\n
+    ->Args({1024, 0}) // 1KB, no delimiter
+    ->Args({1024, 1}) // 1KB, delimiter is \n
+    ->Args({1024, 2}) // 1KB, delimiter is \r\n
+    ->Args({64 * 1024, 0}) // 64KB, no delimiter
+    ->Args({64 * 1024, 1}) // 64KB, delimiter is \n
+    ->Args({64 * 1024, 2}) // 64KB, delimiter is \r\n
+    ->Args({1024 * 1024, 0}) // 1MB, no delimiter
+    ->Args({1024 * 1024, 1}) // 1MB, delimiter is \n
+    ->Args({1024 * 1024, 2}) // 1MB, delimiter is \r\n
+    ->Repetitions(5)
+    ->DisplayAggregatesOnly();
+
+} // namespace doris
diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h 
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
index 730dc2e9cd9..39b7aea456b 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.h
@@ -91,64 +91,19 @@ public:
         if (start == nullptr || length == 0) {
             return nullptr;
         }
-        size_t i = 0;
-#ifdef __AVX2__
-        // const uint8_t* end = start + length;
-        const __m256i newline = _mm256_set1_epi8('\n');
-        const __m256i carriage_return = _mm256_set1_epi8('\r');
-
-        const size_t simd_width = 32;
-        // Process 32 bytes at a time using AVX2
-        for (; i + simd_width <= length; i += simd_width) {
-            __m256i data = _mm256_loadu_si256(reinterpret_cast<const 
__m256i*>(start + i));
-
-            // Compare with '\n' and '\r'
-            __m256i cmp_newline = _mm256_cmpeq_epi8(data, newline);
-            __m256i cmp_carriage_return = _mm256_cmpeq_epi8(data, 
carriage_return);
-
-            // Check if there is a match
-            int mask_newline = _mm256_movemask_epi8(cmp_newline);
-            int mask_carriage_return = 
_mm256_movemask_epi8(cmp_carriage_return);
-
-            if (mask_newline != 0 || mask_carriage_return != 0) {
-                size_t pos_lf = (mask_newline != 0) ? i + 
__builtin_ctz(mask_newline) : INT32_MAX;
-                size_t pos_cr = (mask_carriage_return != 0)
-                                        ? i + 
__builtin_ctz(mask_carriage_return)
-                                        : INT32_MAX;
-                if (pos_lf < pos_cr) {
-                    return start + pos_lf;
-                } else if (pos_cr < pos_lf) {
-                    if (pos_lf != INT32_MAX) {
-                        if (pos_lf >= 1 && start[pos_lf - 1] == '\r') {
-                            //check   xxx\r\r\r\nxxx
-                            line_crlf = true;
-                            return start + pos_lf - 1;
-                        }
-                        // xxx\rxxxx\nxx
-                        return start + pos_lf;
-                    } else if (i + simd_width < length && start[i + simd_width 
- 1] == '\r' &&
-                               start[i + simd_width] == '\n') {
-                        //check [/r/r/r/r/r/r/rxxx/r]  [\nxxxx]
-                        line_crlf = true;
-                        return start + i + simd_width - 1;
-                    }
-                }
-            }
-        }
 
-        // Process remaining bytes
-#endif
-        for (; i < length; ++i) {
-            if (start[i] == '\n') {
-                return &start[i];
-            }
-            if (start[i] == '\r' && (i + 1 < length) && start[i + 1] == '\n') {
+        const auto* p = (const uint8_t*)memchr(start, '\n', length);
+        if (p) {
+            size_t i = p - start;
+            if (i > 0 && start[i - 1] == '\r') {
                 line_crlf = true;
-                return &start[i];
+                return p - 1;
             }
+            return p;
         }
         return nullptr;
     }
+
     const uint8_t* call_find_line_sep(const uint8_t* start, const size_t 
length) {
         return (this->*find_line_delimiter_func)(start, length);
     }
diff --git a/run-be-ut.sh b/run-be-ut.sh
index 62e62e7d406..f6f6a09d9cb 100755
--- a/run-be-ut.sh
+++ b/run-be-ut.sh
@@ -211,6 +211,10 @@ if [[ -z "${USE_AVX2}" ]]; then
     USE_AVX2='ON'
 fi
 
+if [[ -z "${ARM_MARCH}" ]]; then
+    ARM_MARCH='armv8-a+crc'
+fi
+
 if [[ -z "${USE_UNWIND}" ]]; then
     if [[ "$(uname -s)" != 'Darwin' ]]; then
         USE_UNWIND='ON'
@@ -242,6 +246,7 @@ cd "${CMAKE_BUILD_DIR}"
     -DUSE_UNWIND="${USE_UNWIND}" \
     -DUSE_JEMALLOC=OFF \
     -DUSE_AVX2="${USE_AVX2}" \
+    -DARM_MARCH="${ARM_MARCH}" \
     -DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
     -DENABLE_CLANG_COVERAGE="${DENABLE_CLANG_COVERAGE}" \
     ${CMAKE_USE_CCACHE:+${CMAKE_USE_CCACHE}} \


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [opt](reader) Use memchr to speed up BaseTextLineReaderContext.find_lf_crlf_line_sep function (#54957)

Reply via email to