From: Milan Tripkovic <[email protected]>

This implementation leverages word-at-a-time comparisons and the RISC-V
Zbb extension to accelerate character searching, falling back to a
generic byte-by-byte loop on systems without Zbb support.

Benchmark results (QEMU TCG, rv64):

| len  | zbb    | Wozbb | default | % Wozbb |  % zbb  |
|------|--------|-------|---------|---------|---------|
| 1    | 22.9   | 26    | 22.1    | 17.64 % |  3.62  %|
| 7    | 115.5  | 130.6 | 100.1   | 30.46 % | 15.38  %|
| 8    | 184.3  | 143.3 | 117.2   | 22.26 % | 57.25  %|
| 16   | 322    | 205.1 | 179.7   | 14.13 % | 79.19  %|
| 31   | 361    | 247.4 | 221.7   | 11.59 % | 62.83  %|
| 64   | 880    | 280.6 | 233.2   | 20.32 % | 277.35 %|
| 127  | 961.3  | 307.9 | 271.4   | 13.44 % | 254.20 %|
| 512  | 1812.9 | 325.6 | 294.1   | 10.71 % | 516.42 %|
| 1024 | 1973.2 | 335.4 | 273.8   | 22.49 % | 620.67 %|
| 3173 | 2245.9 | 338.7 | 288.6   | 17.35 % | 678.20 %|
| 4096 | 2327.5 | 345   | 317.5   | 8.661 % | 633.07 %|

Signed-off-by: Milan Tripkovic <[email protected]>
---
 arch/riscv/include/asm/string.h |   3 +-
 arch/riscv/lib/Makefile         |   1 +
 arch/riscv/lib/memchr.S         | 126 ++++++++++++++++++++++++++++++++
 arch/riscv/purgatory/Makefile   |   5 +-
 4 files changed, 133 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/lib/memchr.S

diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 764ffe8f6479..cfcf1193b446 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -18,7 +18,8 @@ extern asmlinkage void *__memcpy(void *, const void *, 
size_t);
 #define __HAVE_ARCH_MEMMOVE
 extern asmlinkage void *memmove(void *, const void *, size_t);
 extern asmlinkage void *__memmove(void *, const void *, size_t);
-
+#define __HAVE_ARCH_MEMCHR
+extern asmlinkage void *memchr(const void *s, int c, size_t n);
 #if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
 #define __HAVE_ARCH_STRCMP
 extern asmlinkage int strcmp(const char *cs, const char *ct);
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 6f767b2a349d..bbc54dabbb81 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,6 +3,7 @@ lib-y                   += delay.o
 lib-y                  += memcpy.o
 lib-y                  += memset.o
 lib-y                  += memmove.o
+lib-y                  += memchr.o
 ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
 lib-y                  += strcmp.o
 lib-y                  += strlen.o
diff --git a/arch/riscv/lib/memchr.S b/arch/riscv/lib/memchr.S
new file mode 100644
index 000000000000..0e971cf51410
--- /dev/null
+++ b/arch/riscv/lib/memchr.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+/*  void *memchr(const void *s, int c, size_t n) */
+SYM_FUNC_START(memchr)
+
+       __ALTERNATIVE_CFG("nop", "j memchr_zbb", 0, RISCV_ISA_EXT_ZBB,
+               IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && 
IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
+/*
+ *
+ * Parameters
+ *     a0 - Pointer to memory area (s), also return value
+ *     a1 - Character to search for (c)
+ *     a2 - Number of bytes to search (n)
+ *
+ * Returns
+ *     a0 - Pointer to the matched character, or NULL (0) if not found
+ *
+ * Clobbers
+ *     t0, t1
+ */
+       beqz    a2, 3f
+       andi    a1, a1, 0xff
+       add     t1, a0, a2
+
+1:
+       lbu     t0, 0(a0)
+       beq     t0, a1, 4f
+       addi    a0, a0, 1
+       bne     a0, t1, 1b
+
+3:
+       li      a0, 0
+4:
+       ret
+
+#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
+.balign 4
+memchr_zbb:
+.option push
+.option arch, +zbb
+/*
+ * Parameters
+ *     a0 - Pointer to memory area (s), also return value
+ *     a1 - Character to search for (c)
+ *     a2 - Number of bytes to search (n)
+ *
+ * Returns
+ *     a0 - Pointer to the matched character, or NULL (0) if not found
+ *
+ * Clobbers
+ *     t0, t1, t2, t3, t4, t5, t6
+ */
+       beqz    a2, 3b
+       add     t6, a0, a2
+       andi    t3, a1, 0xff
+
+       li      t0, SZREG
+       bltu    a2, t0, 8f
+
+#if __riscv_xlen == 64
+       li      t0, 0x0101010101010101
+#else
+       li      t0, 0x01010101
+#endif
+       mul     a1, t3, t0
+
+       andi    t1, a0, SZREG - 1
+       beqz    t1, 5f
+
+1:
+       lbu     t2, 0(a0)
+       beq     t2, t3, 4b
+       addi    a0, a0, 1
+       andi    t1, a0, SZREG - 1
+       bnez    t1, 1b
+
+5:
+       sub     t5, t6, a0
+       andi    t5, t5, -SZREG
+       add     t5, a0, t5
+
+       li      t4, -1
+
+       beq     a0, t5, 8f
+
+6:
+       REG_L   t0, 0(a0)
+       xor     t1, t0, a1
+       orc.b   t1, t1
+       bne     t1, t4, 7f
+       addi    a0, a0, SZREG
+       bltu    a0, t5, 6b
+
+8:
+       beq     a0, t6, 3b
+
+2:
+       lbu     t0, 0(a0)
+       beq     t0, t3, 4b
+       addi    a0, a0, 1
+       bltu    a0, t6, 2b
+
+       j       3b
+
+7:
+       not     t1, t1
+#ifndef CONFIG_CPU_BIG_ENDIAN
+       ctz     t1, t1
+#else
+       clz     t1, t1
+#endif
+       srli    t1, t1, 3
+       add     a0, a0, t1
+       ret
+
+.option pop
+#endif
+
+SYM_FUNC_END(memchr)
+SYM_FUNC_ALIAS(__pi_memchr, memchr)
+EXPORT_SYMBOL(memchr)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index b0358a78f11a..42e840fad6df 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
+purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o 
memchr.o
 ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
 purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o
 endif
@@ -17,6 +17,9 @@ $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE
 $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE
        $(call if_changed_rule,as_o_S)
 
+$(obj)/memchr.o: $(srctree)/arch/riscv/lib/memchr.S FORCE
+       $(call if_changed_rule,as_o_S)
+
 $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE
        $(call if_changed_rule,as_o_S)
 
-- 
2.43.0


Reply via email to