The branch stable/15 has been updated by fuz:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f56c68448d429687a81ce4d97b16e7b9d7c57808

commit f56c68448d429687a81ce4d97b16e7b9d7c57808
Author:     Strahinja Stanišić <[email protected]>
AuthorDate: 2024-07-19 17:58:04 +0000
Commit:     Robert Clausecker <[email protected]>
CommitDate: 2025-11-30 00:43:06 +0000

    libc: scalar strchrnul() in RISC-V assembly
    
    Scalar implementation of strchrnul() in RISC-V assembly and changes to the
    corresponding manpage.
    
    Performance was benchmarked on a HiFive Unmatched (SiFive HF105-001) board
    using: https://github.com/clausecker/strperf
    
    os: FreeBSD
    arch: riscv
            │ strchrnul_baseline │          strchrnul_scalar           │
            │       sec/op       │   sec/op     vs base                │
    Short            680.2µ ± 5%   435.3µ ± 0%  -36.01% (p=0.000 n=20)
    Mid              314.7µ ± 3%   221.4µ ± 0%  -29.63% (p=0.000 n=20)
    Long             152.3µ ± 0%   138.5µ ± 0%   -9.08% (p=0.000 n=20)
    geomean          319.5µ        237.2µ       -25.75%
    
            │ strchrnul_baseline │          strchrnul_scalar          │
            │       MiB/s        │   MiB/s     vs base                │
    Short             183.8 ± 5%   287.2 ± 0%  +56.27% (p=0.000 n=20)
    Mid               397.3 ± 3%   564.6 ± 0%  +42.12% (p=0.000 n=20)
    Long              820.5 ± 0%   902.5 ± 0%   +9.99% (p=0.000 n=20)
    geomean           391.3        527.0       +34.68%
    
    MFC after:      1 month
    MFC to:         stable/15
    Approved by:    markj (mentor)
    Reviewed by:    fuz
    Sponsored by:   Google LLC (GSoC 2024)
    Differential Revision:  https://reviews.freebsd.org/D46047
    
    (cherry picked from commit 08af0bbc9c7d71bbaadb31ad31f8492f40537c5c)
---
 lib/libc/riscv/string/Makefile.inc |   1 +
 lib/libc/riscv/string/strchrnul.S  | 116 +++++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)

diff --git a/lib/libc/riscv/string/Makefile.inc 
b/lib/libc/riscv/string/Makefile.inc
index 4b97490a5494..719f22f6077f 100644
--- a/lib/libc/riscv/string/Makefile.inc
+++ b/lib/libc/riscv/string/Makefile.inc
@@ -4,4 +4,5 @@ MDSRCS+= \
        memset.S \
        strlen.S \
        strnlen.S \
+       strchrnul.S \
        strrchr.S
diff --git a/lib/libc/riscv/string/strchrnul.S 
b/lib/libc/riscv/string/strchrnul.S
new file mode 100644
index 000000000000..8abba71c4199
--- /dev/null
+++ b/lib/libc/riscv/string/strchrnul.S
@@ -0,0 +1,116 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Strahinja Stanisic <[email protected]>
+ */
+
+#include <machine/asm.h>
+
+        .weak   strchrnul
+        .set    strchrnul, __strchrnul
+
+/*
+ * a0 - const char *str
+ * a1 - int c;
+ */
+ENTRY(__strchrnul)
+       /*
+        * a0 - const char *ptr;
+        * a1 - char cccccccc[8];
+        * a2 - char iter[8];
+        * a3 - char mask_end
+        */
+
+       /* int to char */
+       andi a1, a1, 0xFF
+
+       /* t0 = 0x0101010101010101 */
+       li t0, 0x01010101
+       slli t1, t0, 32
+       or t0, t0, t1
+
+       /* t1 = 0x8080808080808080 */
+       slli t1, t0, 7
+
+       /* spread char across bytes */
+       mul a1, a1, t0
+
+       /* align_offset */
+       andi t2, a0, 0b111
+
+       /* align pointer */
+       andi a0, a0, ~0b111
+
+       /* if pointer is aligned skip to loop */
+       beqz t2, .Lloop
+
+       ld a2, (a0)
+
+       /* mask_start calculation */
+       slli t2, t2, 3
+       neg t2, t2
+       srl t2, t0, t2
+
+       /* fill bytes before start with non-zero */
+       or a3, a2, t2
+
+       xor a2, a2, a1
+       or a2, a2, t2
+
+       /* has_zero for \0 */
+       not t3, a3
+       not t2, a2
+       sub a3, a3, t0
+       sub a2, a2, t0
+       and a3, a3, t3
+       and a2, a2, t2
+       and a3, a3, t1
+       and a2, a2, t1
+
+
+       /* if \0 or c was found, exit */
+       or a2, a2, a3
+       addi a0, a0, 8
+       bnez a2, .Lfind_char
+
+
+.Lloop:
+       ld a2, (a0)
+
+       /* has_zero for both \0 or c */
+       xor a3, a2, a1
+
+       not t2, a2
+       not t3, a3
+       sub a2, a2, t0
+       sub a3, a3, t0
+       and a2, a2, t2
+       and a3, a3, t3
+       and a2, a2, t1
+       and a3, a3, t1
+
+       /* if \0 or c was found, exit */
+       or a2, a2, a3
+       addi a0, a0, 8
+       beqz a2, .Lloop
+
+.Lfind_char:
+       addi a0, a0, -8
+
+       /* isolate lowest set bit */
+       neg t0, a2
+       and a2, a2, t0
+
+       li t0, 0x0001020304050607
+       srli a2, a2, 7
+
+       /* lowest set bit is 2^(8*k)
+        * multiplying by it shifts the idx array in t0 by k bytes to the left 
*/
+       mul     a2, a2, t0
+
+       /* highest byte contains idx of first zero */
+       srli a2, a2, 56
+
+       add a0, a0, a2
+       ret
+END(__strchrnul)

Reply via email to