This is an automated email from the ASF dual-hosted git repository. pkarashchenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nuttx.git
The following commit(s) were added to refs/heads/master by this push: new 57df1ddcbb Add armv7m assembly strcpy. 57df1ddcbb is described below commit 57df1ddcbb22c1fe25caf1b1c4c7d8556a4df885 Author: XinStellaris <tianx...@xiaomi.com> AuthorDate: Tue Mar 14 21:02:03 2023 +0800 Add armv7m assembly strcpy. Signed-off-by: XinStellaris <tianx...@xiaomi.com> --- libs/libc/machine/arm/armv7-m/Kconfig | 8 + libs/libc/machine/arm/armv7-m/Make.defs | 4 + libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S | 308 ++++++++++++++++++++++++ 3 files changed, 320 insertions(+) diff --git a/libs/libc/machine/arm/armv7-m/Kconfig b/libs/libc/machine/arm/armv7-m/Kconfig index 60ac9d4b34..10ab6ee02a 100644 --- a/libs/libc/machine/arm/armv7-m/Kconfig +++ b/libs/libc/machine/arm/armv7-m/Kconfig @@ -45,6 +45,14 @@ config ARMV7M_STRCMP ---help--- Enable optimized ARMv7-M specific strcmp() library function +config ARMV7M_STRCPY + bool "Enable optimized strcpy() for ARMv7-M" + default n + select LIBC_ARCH_STRCPY + depends on ARCH_TOOLCHAIN_GNU + ---help--- + Enable optimized ARMv7-M specific strcpy() library function + config ARMV7M_STRLEN bool "Enable optimized strlen() for ARMv7-M" default n diff --git a/libs/libc/machine/arm/armv7-m/Make.defs b/libs/libc/machine/arm/armv7-m/Make.defs index 75c9109cd8..e86b896453 100644 --- a/libs/libc/machine/arm/armv7-m/Make.defs +++ b/libs/libc/machine/arm/armv7-m/Make.defs @@ -38,6 +38,10 @@ ifeq ($(CONFIG_ARMV7M_STRCMP),y) ASRCS += arch_strcmp.S endif +ifeq ($(CONFIG_ARMV7M_STRCPY),y) +ASRCS += arch_strcpy.S +endif + ifeq ($(CONFIG_ARMV7M_STRLEN),y) ASRCS += arch_strlen.S endif diff --git a/libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S b/libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S new file mode 100644 index 0000000000..873279e16a --- /dev/null +++ b/libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S @@ -0,0 +1,308 @@ +/*************************************************************************** + * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ***************************************************************************/ + +/* This strcpy borrowed some ideas from arch_strcmp.S(). */ + +/* Parameters and result. */ +#define dst r0 +#define src r1 +#define result r0 + +/* Internal variables, or callee saved registers */ +#define tmp1 r4 +#define tmp2 r5 +#define tmp3 r6 +#define src_offset r7 + +#ifdef __ARM_BIG_ENDIAN +# define MASK_0 0xff000000 +# define MASK_1 0xff0000 +# define MASK_2 0xff00 +# define MASK_3 0xff +# define BYTE_0_SHIFT 24 +# define BYTE_1_SHIFT 16 +# define BYTE_2_SHIFT 8 +# define BYTE_3_SHIFT 0 +#else +# define MASK_0 0xff +# define MASK_1 0xff00 +# define MASK_2 0xff0000 +# define MASK_3 0xff000000 +# define BYTE_0_SHIFT 0 +# define BYTE_1_SHIFT 8 +# define BYTE_2_SHIFT 16 +# define BYTE_3_SHIFT 24 +#endif + + + .syntax unified + .text + .align 2 + .global strcpy + .thumb + .type strcpy, %function + +strcpy: + push {result, tmp1, tmp2, tmp3, src_offset} + eor tmp1, dst, src + tst tmp1, #3 + /* If dst and src not at same byte offset from a word boundary */ + bne .Lstrs_diff_offset + /* Process same byte offset then, get the offset */ + ands tmp1, src, #3 + beq .Ldst_src_aligned + /* get number of bytes unaligned */ + rsb tmp1, #4 + +.Lbyte_copy_until_dsr_src_aligned: + ldrb tmp2, [src], #1 + cmp tmp2, #0 + beq .Lcopy_done + strb tmp2, [dst], #1 + subs tmp1, #1 + bne .Lbyte_copy_until_dsr_src_aligned + +.Ldst_src_aligned: + /* Now dst and src are aligned */ + ldr tmp1, [src], #4 + sub tmp2, tmp1, #0x01010101 + bic tmp2, tmp1 + tst tmp2, #0x80808080 + /* All zero means no zero byte is detected */ + it eq + streq tmp1, [dst], #4 + beq .Ldst_src_aligned + + /* There is a zero in the word, copy until zero */ + sub src, #4 +.Lbyte_copy_until_zero: + ldrb tmp2, [src], #1 + cmp tmp2, #0 + beq .Lcopy_done + strb tmp2, [dst], #1 + b .Lbyte_copy_until_zero + +/* Make dst aligned, so we won't write anything before dst. + * If we attempt to write before dst, atomic read-write must + * be ensured. Atomic operation complicates things. + * So the solution here is byte by byte copy until dst aligned. + */ +.Lstrs_diff_offset: + ands tmp1, dst, #3 + beq .Ldiff_offset_loop_begin + /* get number of dst bytes unaligned */ + rsb tmp1, #4 + +.Lbyte_copy_until_dst_aligned: + ldrb tmp2, [src], #1 + cmp tmp2, #0 + beq .Lcopy_done + strb tmp2, [dst], #1 + subs tmp1, #1 + bne .Lbyte_copy_until_dst_aligned + +.Ldiff_offset_loop_begin: + /* src_offset mustn't be 0 here */ + and src_offset, src, 3 + lsls src_offset, #3 + bic src, #3 +/* first word logic + * prepend 0xff to make the algorithm simpler + * only the first word needs to be prepended + */ + ldr tmp1, [src], #4 + mov tmp2, #0xffffffff + rsb tmp3, src_offset, #32 + +#ifdef __ARM_BIG_ENDIAN + lsls tmp2, tmp3 +#else + lsrs tmp2, tmp3 +#endif + orr tmp1, tmp1, tmp2 + /* Test if the first word contains zero */ + sub tmp3, tmp1, #0x01010101 + bic tmp3, tmp1 + tst tmp3, #0x80808080 + /* non-zero means zero byte is detected */ + bne .Ltail_copy + + /* before loop, set tmp2=tmp1 to simplify the logic in the loop */ + mov tmp2, tmp1 +.Ldiff_offset_loop: + mov tmp1, tmp2 + ldr tmp2, [src], #4 + /* Test if contains zero */ + sub tmp3, tmp2, #0x01010101 + bic tmp3, tmp2 + tst tmp3, #0x80808080 + /* non-zero means zero byte is detected */ + bne .Ltail_copy + /* Now let's fill dst */ +#ifdef __ARM_BIG_ENDIAN + lsls tmp1, src_offset + rsb tmp3, src_offset, #32 + lsrs tmp3, tmp2, tmp3 + orr tmp1, tmp1, tmp3 +#else + lsrs tmp1, src_offset + rsb tmp3, src_offset, #32 + lsls tmp3, tmp2, tmp3 + orr tmp1, tmp1, tmp3 +#endif + str tmp1, [dst], #4 + b .Ldiff_offset_loop + +.Ltail_copy: + cmp src_offset, #24 + beq .Loffset_3 + cmp src_offset, #16 + beq .Loffset_2 + /* src_offset == 8 here */ + ands tmp3, tmp1, MASK_1 + beq .Lcopy_done + lsrs tmp3, BYTE_1_SHIFT + strb tmp3, [dst], #1 +.Loffset_2: + ands tmp3, tmp1, MASK_2 + beq .Lcopy_done + lsrs tmp3, BYTE_2_SHIFT + strb tmp3, [dst], #1 +.Loffset_3: + ands tmp3, tmp1, MASK_3 + beq .Lcopy_done + lsrs tmp3, BYTE_3_SHIFT + strb tmp3, [dst], #1 + ands tmp3, tmp2, MASK_0 + beq .Lcopy_done + lsrs tmp3, BYTE_0_SHIFT + strb tmp3, [dst], #1 + ands tmp3, tmp2, MASK_1 + beq .Lcopy_done + lsrs tmp3, BYTE_1_SHIFT + strb tmp3, [dst], #1 + ands tmp3, tmp2, MASK_2 + beq .Lcopy_done + lsrs tmp3, BYTE_2_SHIFT + strb tmp3, [dst], #1 +.Lcopy_done: + mov tmp3, #0 + strb tmp3, [dst] + pop {result, tmp1, tmp2, tmp3, src_offset} + bx lr + +#if 0 +/* Pseudo Code of strcpy when dst/src not at same byte offset */ + +/* Make dst aligned, so we won't write anything before dst. + * If we attempt to write before dst, atomic read-write must + * be ensured. Atomic operation complicates things. + * So the solution here is byte by byte copy until dst aligned. + */ + if (dst & 3 == 0) + goto diff_offset_loop_begin; + ByteCopyUntilDstAligned(); + +.diff_offset_loop_begin: +/* src_offset mustn't be 0 here */ + src_offset = src & 3; + src_offset = src_offset * 8; + src = src & 0xfffffffc; + tmp1 = *src; + src +=4; +/* first word logic + * prepend 0xff to make the algorithm simpler + * only the first word needs to be prepended + */ + if (src_offset != 0) + { + tmp2 = 0xffffffff +#if big endian + tmp2 = tmp2 << (32 - src_offset) +#else + tmp2 = tmp2 >> (32 - src_offset) +#endif + tmp1 |= tmp2 + } + if (HasZeroByte(tmp1)) + { + goto .tail_copy; + } + +/* before loop, set tmp2=tmp1 to simplify the logic in the loop */ + tmp2 = tmp1 +.diff_offset_loop: + tmp1 = tmp2; + tmp2 = *src; + src += 4; + + /* double word tail means we have to copy from tmp1 and tmp2 to dst */ + if (HasZeroByte(tmp2)) + { + goto .tail_copy; + } +/* Now let's fill dst */ +#if big endian + tmp1 = tmp1 << (src_offset); + tmp1 |= tmp2 >> (32 - src_offset); + *dst = tmp1; +#else + tmp1 = tmp1 >> (src_offset); + tmp1 |= tmp2 << (32 - src_offset); + *dst = tmp1; +#endif + dst +=4; + goto .diff_offset_loop; + +/* byte by byte copy at the tail */ +.tail_copy: + if (src_offset == 3) + goto offset_3; + if (src_offset == 2) + goto offset_2; + +/* src_offset mustn't be 0 here */ +/* default src_offset == 1 */ + if (tmp1 & MASK_1 == 0) + goto cpy_done; + *dst++ = tmp1 & MASK_1; +offset_2: + if (tmp1 & MASK_2 == 0) + goto cpy_done; + *dst++ = tmp1 & MASK_2; +offset_3: + if (tmp1 & MASK_3 == 0) + goto cpy_done; + *dst++ = tmp1 & MASK_3; + if (tmp2 & MASK_0 == 0) + goto cpy_done; + *dst++ = tmp2 & MASK_0; + if (tmp2 & MASK_1 == 0) + goto cpy_done; + *dst++ = tmp2 & MASK_1; + if (tmp2 & MASK_2 == 0) + goto cpy_done; + *dst++ = tmp2 & MASK_2; +/* tmp2 BYTE3 must be zero here */ + +.cpy_done: + *dst++ = 0; +#endif /* Pseudo code end */ +