This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git


The following commit(s) were added to refs/heads/main by this push:
     new c09f5b93 feat(RUST): String detection is performed using SIMD 
techniques (#1752)
c09f5b93 is described below

commit c09f5b930aa26f3433deb6302b7635dc7c06bf69
Author: kitty <[email protected]>
AuthorDate: Mon Jul 22 23:56:50 2024 +0800

    feat(RUST): String detection is performed using SIMD techniques (#1752)
    
    
    
    ## What does this PR do?
    
    Using SIMD technology to speed up string detection
    
    ## Related issues
    
    
    
    ## Does this PR introduce any user-facing change?
    
    - [x] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    
    ## Benchmark
    In fury/rust, run
    ```bash
    cargo bench
    ```
    result:
    ```bash
    SIMD sse short          time:   [992.07 ps 993.91 ps 995.81 ps]
                            change: [-1.1907% -0.7021% -0.2276%] (p = 0.00 < 
0.05)
                            Change within noise threshold.
    Found 5 outliers among 100 measurements (5.00%)
      2 (2.00%) high mild
      3 (3.00%) high severe
    
    SIMD sse long           time:   [368.88 ns 369.70 ns 370.59 ns]
                            change: [-1.1407% -0.4415% +0.1537%] (p = 0.20 > 
0.05)
                            No change in performance detected.
    Found 11 outliers among 100 measurements (11.00%)
      9 (9.00%) high mild
      2 (2.00%) high severe
    
    SIMD avx short          time:   [4.4313 ns 4.4425 ns 4.4566 ns]
                            change: [-0.4239% -0.0440% +0.3277%] (p = 0.82 > 
0.05)
                            No change in performance detected.
    Found 9 outliers among 100 measurements (9.00%)
      5 (5.00%) high mild
      4 (4.00%) high severe
    
    SIMD avx long           time:   [18.215 ns 18.277 ns 18.351 ns]
                            change: [+0.6658% +1.0962% +1.6058%] (p = 0.00 < 
0.05)
                            Change within noise threshold.
    Found 9 outliers among 100 measurements (9.00%)
      5 (5.00%) high mild
      4 (4.00%) high severe
    
    Standard short          time:   [5.1115 ns 5.1670 ns 5.2491 ns]
                            change: [+0.5348% +1.5193% +2.5623%] (p = 0.00 < 
0.05)
                            Change within noise threshold.
    Found 3 outliers among 100 measurements (3.00%)
      2 (2.00%) high mild
      1 (1.00%) high severe
    
    Standard long           time:   [3.6904 µs 3.7205 µs 3.7606 µs]
                            change: [+1.5445% +2.5638% +3.9167%] (p = 0.00 < 
0.05)
                            Performance has regressed.
    Found 13 outliers among 100 measurements (13.00%)
      7 (7.00%) high mild
      6 (6.00%) high severe
      ```
    
    ---------
    
    Co-authored-by: hezz <[email protected]>
---
 rust/fury/Cargo.toml              |  10 ++
 rust/fury/benches/simd_bench.rs   | 112 ++++++++++++++++++++++
 rust/fury/src/meta/meta_string.rs |   4 +-
 rust/fury/src/meta/mod.rs         |   1 +
 rust/fury/src/meta/string_util.rs | 189 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 315 insertions(+), 1 deletion(-)

diff --git a/rust/fury/Cargo.toml b/rust/fury/Cargo.toml
index d39d5375..bba04afb 100644
--- a/rust/fury/Cargo.toml
+++ b/rust/fury/Cargo.toml
@@ -30,3 +30,13 @@ lazy_static = { version = "1.4" }
 byteorder = { version = "1.4" }
 chrono = "0.4"
 thiserror = { default-features = false, version = "1.0" }
+
+
+[[bench]]
+name = "simd_bench"
+harness = false
+
+
+[dev-dependencies]
+criterion = "0.5.1"
+rand = "0.8.5"
\ No newline at end of file
diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury/benches/simd_bench.rs
new file mode 100644
index 00000000..7649533f
--- /dev/null
+++ b/rust/fury/benches/simd_bench.rs
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+#[cfg(target_feature = "avx2")]
+use std::arch::x86_64::*;
+
+#[cfg(target_feature = "sse2")]
+use std::arch::x86_64::*;
+
+#[cfg(target_feature = "avx2")]
+pub(crate) const MIN_DIM_SIZE_AVX: usize = 32;
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16;
+
+#[cfg(target_feature = "sse2")]
+unsafe fn is_latin_sse(s: &str) -> bool {
+    let bytes = s.as_bytes();
+    let len = s.len();
+    let ascii_mask = _mm_set1_epi8(0x80u8 as i8);
+    let remaining = len % MIN_DIM_SIZE_SIMD;
+    let range_end = len - remaining;
+    for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) {
+        let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
+        let masked = _mm_and_si128(chunk, ascii_mask);
+        let cmp = _mm_cmpeq_epi8(masked, _mm_setzero_si128());
+        if _mm_movemask_epi8(cmp) != 0xFFFF {
+            return false;
+        }
+    }
+    for item in bytes.iter().take(range_end).skip(range_end) {
+        if !item.is_ascii() {
+            return false;
+        }
+    }
+    true
+}
+
+#[cfg(target_feature = "avx2")]
+unsafe fn is_latin_avx(s: &str) -> bool {
+    let bytes = s.as_bytes();
+    let len = s.len();
+    let ascii_mask = _mm256_set1_epi8(0x80u8 as i8);
+    let remaining = len % MIN_DIM_SIZE_AVX;
+    let range_end = len - remaining;
+    for i in (0..(len - remaining)).step_by(MIN_DIM_SIZE_AVX) {
+        let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const 
__m256i);
+        let masked = _mm256_and_si256(chunk, ascii_mask);
+        let cmp = _mm256_cmpeq_epi8(masked, _mm256_setzero_si256());
+        if _mm256_movemask_epi8(cmp) != 0xFFFF {
+            return false;
+        }
+    }
+    for item in bytes.iter().take(range_end).skip(range_end) {
+        if !item.is_ascii() {
+            return false;
+        }
+    }
+    true
+}
+
+fn is_latin_std(s: &str) -> bool {
+    s.bytes().all(|b| b.is_ascii())
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let test_str_short = "Hello, World!";
+    let test_str_long = "Hello, World! ".repeat(1000);
+
+    #[cfg(target_feature = "sse2")]
+    c.bench_function("SIMD sse short", |b| {
+        b.iter(|| unsafe { is_latin_sse(black_box(test_str_short)) })
+    });
+    #[cfg(target_feature = "sse2")]
+    c.bench_function("SIMD sse long", |b| {
+        b.iter(|| unsafe { is_latin_sse(black_box(&test_str_long)) })
+    });
+    #[cfg(target_feature = "avx2")]
+    c.bench_function("SIMD avx short", |b| {
+        b.iter(|| unsafe { is_latin_avx(black_box(test_str_short)) })
+    });
+    #[cfg(target_feature = "avx2")]
+    c.bench_function("SIMD avx long", |b| {
+        b.iter(|| unsafe { is_latin_avx(black_box(&test_str_long)) })
+    });
+
+    c.bench_function("Standard short", |b| {
+        b.iter(|| is_latin_std(black_box(test_str_short)))
+    });
+
+    c.bench_function("Standard long", |b| {
+        b.iter(|| is_latin_std(black_box(&test_str_long)))
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/rust/fury/src/meta/meta_string.rs 
b/rust/fury/src/meta/meta_string.rs
index 3b46265e..627433c9 100644
--- a/rust/fury/src/meta/meta_string.rs
+++ b/rust/fury/src/meta/meta_string.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::meta::string_util;
+
 #[derive(Debug, PartialEq)]
 pub enum Encoding {
     Utf8 = 0x00,
@@ -102,7 +104,7 @@ impl MetaStringEncoder {
     }
 
     fn is_latin(&self, s: &str) -> bool {
-        s.bytes().all(|b| b.is_ascii())
+        string_util::is_latin(s)
     }
 
     pub fn encode(&self, input: &str) -> Result<MetaString, Error> {
diff --git a/rust/fury/src/meta/mod.rs b/rust/fury/src/meta/mod.rs
index 4e4d40b2..02871e82 100644
--- a/rust/fury/src/meta/mod.rs
+++ b/rust/fury/src/meta/mod.rs
@@ -16,4 +16,5 @@
 // under the License.
 
 mod meta_string;
+mod string_util;
 pub use meta_string::{Encoding, MetaStringDecoder, MetaStringEncoder};
diff --git a/rust/fury/src/meta/string_util.rs 
b/rust/fury/src/meta/string_util.rs
new file mode 100644
index 00000000..ea865911
--- /dev/null
+++ b/rust/fury/src/meta/string_util.rs
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(target_feature = "neon")]
+use std::arch::aarch64::*;
+
+#[cfg(target_feature = "avx2")]
+use std::arch::x86_64::*;
+
+#[cfg(target_feature = "sse2")]
+use std::arch::x86_64::*;
+
+#[cfg(target_arch = "x86_64")]
+pub(crate) const MIN_DIM_SIZE_AVX: usize = 32;
+
+#[cfg(any(
+    target_arch = "x86",
+    target_arch = "x86_64",
+    all(target_arch = "aarch64", target_feature = "neon")
+))]
+pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16;
+
+#[cfg(target_arch = "x86_64")]
+unsafe fn is_latin_avx(s: &str) -> bool {
+    let bytes = s.as_bytes();
+    let len = bytes.len();
+    let ascii_mask = _mm256_set1_epi8(0x80u8 as i8);
+    let remaining = len % MIN_DIM_SIZE_AVX;
+    let range_end = len - remaining;
+
+    for i in (0..range_end).step_by(MIN_DIM_SIZE_AVX) {
+        let chunk = _mm256_loadu_si256(bytes.as_ptr().add(i) as *const 
__m256i);
+        let masked = _mm256_and_si256(chunk, ascii_mask);
+        let cmp = _mm256_cmpeq_epi8(masked, _mm256_setzero_si256());
+        if _mm256_movemask_epi8(cmp) != -1 {
+            return false;
+        }
+    }
+    for item in bytes.iter().take(len).skip(range_end) {
+        if !item.is_ascii() {
+            return false;
+        }
+    }
+    true
+}
+
+#[cfg(target_feature = "sse2")]
+unsafe fn is_latin_sse(s: &str) -> bool {
+    let bytes = s.as_bytes();
+    let len = bytes.len();
+    let ascii_mask = _mm_set1_epi8(0x80u8 as i8);
+    let remaining = len % MIN_DIM_SIZE_SIMD;
+    let range_end = len - remaining;
+    for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) {
+        let chunk = _mm_loadu_si128(bytes.as_ptr().add(i) as *const __m128i);
+        let masked = _mm_and_si128(chunk, ascii_mask);
+        let cmp = _mm_cmpeq_epi8(masked, _mm_setzero_si128());
+        if _mm_movemask_epi8(cmp) != 0xFFFF {
+            return false;
+        }
+    }
+    for item in bytes.iter().take(len).skip(range_end) {
+        if !item.is_ascii() {
+            return false;
+        }
+    }
+    true
+}
+
+#[cfg(target_feature = "neon")]
+unsafe fn is_latin_neon(s: &str) -> bool {
+    let bytes = s.as_bytes();
+    let len = bytes.len();
+    let ascii_mask = vdupq_n_u8(0x80);
+    let remaining = len % MIN_DIM_SIZE_SIMD;
+    let range_end = len - remaining;
+    for i in (0..range_end).step_by(MIN_DIM_SIZE_SIMD) {
+        let chunk = vld1q_u8(bytes.as_ptr().add(i));
+        let masked = vandq_u8(chunk, ascii_mask);
+        let cmp = vceqq_u8(masked, vdupq_n_u8(0));
+        if vminvq_u8(cmp) == 0 {
+            return false;
+        }
+    }
+    for item in bytes.iter().take(len).skip(range_end) {
+        if !item.is_ascii() {
+            return false;
+        }
+    }
+    true
+}
+
+fn is_latin_standard(s: &str) -> bool {
+    s.bytes().all(|b| b.is_ascii())
+}
+
+pub(crate) fn is_latin(s: &str) -> bool {
+    #[cfg(target_arch = "x86_64")]
+    {
+        if is_x86_feature_detected!("avx")
+            && is_x86_feature_detected!("fma")
+            && s.len() >= MIN_DIM_SIZE_AVX
+        {
+            return unsafe { is_latin_avx(s) };
+        }
+    }
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    {
+        if is_x86_feature_detected!("sse") && s.len() >= MIN_DIM_SIZE_SIMD {
+            return unsafe { is_latin_sse(s) };
+        }
+    }
+
+    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
+    {
+        if std::arch::is_aarch64_feature_detected!("neon") && s.len() >= 
MIN_DIM_SIZE_SIMD {
+            return unsafe { is_latin_neon(s) };
+        }
+    }
+    is_latin_standard(s)
+}
+
+#[cfg(test)]
+mod tests {
+    // 导入外部模块中的内容
+    use super::*;
+    use rand::Rng;
+
+    fn generate_random_string(length: usize) -> String {
+        const CHARSET: &[u8] = 
b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+        let mut rng = rand::thread_rng();
+
+        let result: String = (0..length)
+            .map(|_| {
+                let idx = rng.gen_range(0..CHARSET.len());
+                CHARSET[idx] as char
+            })
+            .collect();
+
+        result
+    }
+
+    #[test]
+    fn test_is_latin() {
+        let s = generate_random_string(1000);
+        let not_latin_str = generate_random_string(1000) + "abc\u{1234}";
+
+        #[cfg(target_arch = "x86_64")]
+        {
+            if is_x86_feature_detected!("avx") && 
is_x86_feature_detected!("fma") {
+                assert!(unsafe { is_latin_avx(&s) });
+                assert!(!unsafe { is_latin_avx(&not_latin_str) });
+            }
+        }
+
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        {
+            if is_x86_feature_detected!("sse") && s.len() >= MIN_DIM_SIZE_SIMD 
{
+                assert!(unsafe { is_latin_sse(&s) });
+                assert!(!unsafe { is_latin_sse(&not_latin_str) });
+            }
+        }
+
+        #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
+        {
+            if std::arch::is_aarch64_feature_detected!("neon") && s.len() >= 
MIN_DIM_SIZE_SIMD {
+                assert!(unsafe { is_latin_neon(&s) });
+                assert!(!unsafe { is_latin_neon(&not_latin_str) });
+            }
+        }
+        assert!(is_latin_standard(&s));
+        assert!(!is_latin_standard(&not_latin_str));
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to