From: Milan Tripkovic <[email protected]> This implementation leverages word-at-a-time comparisons and the RISC-V Zbb extension to accelerate character searching, falling back to a generic byte-by-byte loop on systems without Zbb support.
Benchmark results (QEMU TCG, rv64): | len | zbb | Wozbb | default | % Wozbb | % zbb | |------|--------|-------|---------|---------|---------| | 1 | 22.9 | 26 | 22.1 | 17.64 % | 3.62 %| | 7 | 115.5 | 130.6 | 100.1 | 30.46 % | 15.38 %| | 8 | 184.3 | 143.3 | 117.2 | 22.26 % | 57.25 %| | 16 | 322 | 205.1 | 179.7 | 14.13 % | 79.19 %| | 31 | 361 | 247.4 | 221.7 | 11.59 % | 62.83 %| | 64 | 880 | 280.6 | 233.2 | 20.32 % | 277.35 %| | 127 | 961.3 | 307.9 | 271.4 | 13.44 % | 254.20 %| | 512 | 1812.9 | 325.6 | 294.1 | 10.71 % | 516.42 %| | 1024 | 1973.2 | 335.4 | 273.8 | 22.49 % | 620.67 %| | 3173 | 2245.9 | 338.7 | 288.6 | 17.35 % | 678.20 %| | 4096 | 2327.5 | 345 | 317.5 | 8.661 % | 633.07 %| Signed-off-by: Milan Tripkovic <[email protected]> --- arch/riscv/include/asm/string.h | 3 +- arch/riscv/lib/Makefile | 1 + arch/riscv/lib/memchr.S | 126 ++++++++++++++++++++++++++++++++ arch/riscv/purgatory/Makefile | 5 +- 4 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/lib/memchr.S diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 764ffe8f6479..cfcf1193b446 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -18,7 +18,8 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); #define __HAVE_ARCH_MEMMOVE extern asmlinkage void *memmove(void *, const void *, size_t); extern asmlinkage void *__memmove(void *, const void *, size_t); - +#define __HAVE_ARCH_MEMCHR +extern asmlinkage void *memchr(const void *s, int c, size_t n); #if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) #define __HAVE_ARCH_STRCMP extern asmlinkage int strcmp(const char *cs, const char *ct); diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 6f767b2a349d..bbc54dabbb81 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,7 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o lib-y += memmove.o +lib-y += memchr.o ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) lib-y += strcmp.o lib-y += strlen.o diff --git a/arch/riscv/lib/memchr.S b/arch/riscv/lib/memchr.S new file mode 100644 index 000000000000..0e971cf51410 --- /dev/null +++ b/arch/riscv/lib/memchr.S @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> + +/* void *memchr(const void *s, int c, size_t n) */ +SYM_FUNC_START(memchr) + + __ALTERNATIVE_CFG("nop", "j memchr_zbb", 0, RISCV_ISA_EXT_ZBB, + IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) +/* + * + * Parameters + * a0 - Pointer to memory area (s), also return value + * a1 - Character to search for (c) + * a2 - Number of bytes to search (n) + * + * Returns + * a0 - Pointer to the matched character, or NULL (0) if not found + * + * Clobbers + * t0, t1 + */ + beqz a2, 3f + andi a1, a1, 0xff + add t1, a0, a2 + +1: + lbu t0, 0(a0) + beq t0, a1, 4f + addi a0, a0, 1 + bne a0, t1, 1b + +3: + li a0, 0 +4: + ret + +#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB) +.balign 4 +memchr_zbb: +.option push +.option arch, +zbb +/* + * Parameters + * a0 - Pointer to memory area (s), also return value + * a1 - Character to search for (c) + * a2 - Number of bytes to search (n) + * + * Returns + * a0 - Pointer to the matched character, or NULL (0) if not found + * + * Clobbers + * t0, t1, t2, t3, t4, t5, t6 + */ + beqz a2, 3b + add t6, a0, a2 + andi t3, a1, 0xff + + li t0, SZREG + bltu a2, t0, 8f + +#if __riscv_xlen == 64 + li t0, 0x0101010101010101 +#else + li t0, 0x01010101 +#endif + mul a1, t3, t0 + + andi t1, a0, SZREG - 1 + beqz t1, 5f + +1: + lbu t2, 0(a0) + beq t2, t3, 4b + addi a0, a0, 1 + andi t1, a0, SZREG - 1 + bnez t1, 1b + +5: + sub t5, t6, a0 + andi t5, t5, -SZREG + add t5, a0, t5 + + li t4, -1 + + beq a0, t5, 8f + +6: + REG_L t0, 0(a0) + xor t1, t0, a1 + orc.b t1, t1 + bne t1, t4, 7f + addi a0, a0, SZREG + bltu a0, t5, 6b + +8: + beq a0, t6, 3b + +2: + lbu t0, 0(a0) + beq t0, t3, 4b + addi a0, a0, 1 + bltu a0, t6, 2b + + j 3b + +7: + not t1, t1 +#ifndef CONFIG_CPU_BIG_ENDIAN + ctz t1, t1 +#else + clz t1, t1 +#endif + srli t1, t1, 3 + add a0, a0, t1 + ret + +.option pop +#endif + +SYM_FUNC_END(memchr) +SYM_FUNC_ALIAS(__pi_memchr, memchr) +EXPORT_SYMBOL(memchr) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index b0358a78f11a..42e840fad6df 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o +purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memchr.o ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),) purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o endif @@ -17,6 +17,9 @@ $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE $(call if_changed_rule,as_o_S) +$(obj)/memchr.o: $(srctree)/arch/riscv/lib/memchr.S FORCE + $(call if_changed_rule,as_o_S) + $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE $(call if_changed_rule,as_o_S) -- 2.43.0

