This is an automated email from the ASF dual-hosted git repository.
pkarashchenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git
The following commit(s) were added to refs/heads/master by this push:
new 57df1ddcbb Add armv7m assembly strcpy.
57df1ddcbb is described below
commit 57df1ddcbb22c1fe25caf1b1c4c7d8556a4df885
Author: XinStellaris <[email protected]>
AuthorDate: Tue Mar 14 21:02:03 2023 +0800
Add armv7m assembly strcpy.
Signed-off-by: XinStellaris <[email protected]>
---
libs/libc/machine/arm/armv7-m/Kconfig | 8 +
libs/libc/machine/arm/armv7-m/Make.defs | 4 +
libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S | 308 ++++++++++++++++++++++++
3 files changed, 320 insertions(+)
diff --git a/libs/libc/machine/arm/armv7-m/Kconfig
b/libs/libc/machine/arm/armv7-m/Kconfig
index 60ac9d4b34..10ab6ee02a 100644
--- a/libs/libc/machine/arm/armv7-m/Kconfig
+++ b/libs/libc/machine/arm/armv7-m/Kconfig
@@ -45,6 +45,14 @@ config ARMV7M_STRCMP
---help---
Enable optimized ARMv7-M specific strcmp() library function
+config ARMV7M_STRCPY
+ bool "Enable optimized strcpy() for ARMv7-M"
+ default n
+ select LIBC_ARCH_STRCPY
+ depends on ARCH_TOOLCHAIN_GNU
+ ---help---
+ Enable optimized ARMv7-M specific strcpy() library function
+
config ARMV7M_STRLEN
bool "Enable optimized strlen() for ARMv7-M"
default n
diff --git a/libs/libc/machine/arm/armv7-m/Make.defs
b/libs/libc/machine/arm/armv7-m/Make.defs
index 75c9109cd8..e86b896453 100644
--- a/libs/libc/machine/arm/armv7-m/Make.defs
+++ b/libs/libc/machine/arm/armv7-m/Make.defs
@@ -38,6 +38,10 @@ ifeq ($(CONFIG_ARMV7M_STRCMP),y)
ASRCS += arch_strcmp.S
endif
+ifeq ($(CONFIG_ARMV7M_STRCPY),y)
+ASRCS += arch_strcpy.S
+endif
+
ifeq ($(CONFIG_ARMV7M_STRLEN),y)
ASRCS += arch_strlen.S
endif
diff --git a/libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
b/libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
new file mode 100644
index 0000000000..873279e16a
--- /dev/null
+++ b/libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
@@ -0,0 +1,308 @@
+/***************************************************************************
+ * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ***************************************************************************/
+
+/* This strcpy borrowed some ideas from arch_strcmp.S(). */
+
+/* Parameters and result. */
+#define dst r0
+#define src r1
+#define result r0
+
+/* Internal variables, or callee saved registers */
+#define tmp1 r4
+#define tmp2 r5
+#define tmp3 r6
+#define src_offset r7
+
+#ifdef __ARM_BIG_ENDIAN
+# define MASK_0 0xff000000
+# define MASK_1 0xff0000
+# define MASK_2 0xff00
+# define MASK_3 0xff
+# define BYTE_0_SHIFT 24
+# define BYTE_1_SHIFT 16
+# define BYTE_2_SHIFT 8
+# define BYTE_3_SHIFT 0
+#else
+# define MASK_0 0xff
+# define MASK_1 0xff00
+# define MASK_2 0xff0000
+# define MASK_3 0xff000000
+# define BYTE_0_SHIFT 0
+# define BYTE_1_SHIFT 8
+# define BYTE_2_SHIFT 16
+# define BYTE_3_SHIFT 24
+#endif
+
+
+ .syntax unified
+ .text
+ .align 2
+ .global strcpy
+ .thumb
+ .type strcpy, %function
+
+strcpy:
+ push {result, tmp1, tmp2, tmp3, src_offset}
+ eor tmp1, dst, src
+ tst tmp1, #3
+ /* If dst and src not at same byte offset from a word boundary */
+ bne .Lstrs_diff_offset
+ /* Process same byte offset then, get the offset */
+ ands tmp1, src, #3
+ beq .Ldst_src_aligned
+ /* get number of bytes unaligned */
+ rsb tmp1, #4
+
+.Lbyte_copy_until_dsr_src_aligned:
+ ldrb tmp2, [src], #1
+ cmp tmp2, #0
+ beq .Lcopy_done
+ strb tmp2, [dst], #1
+ subs tmp1, #1
+ bne .Lbyte_copy_until_dsr_src_aligned
+
+.Ldst_src_aligned:
+ /* Now dst and src are aligned */
+ ldr tmp1, [src], #4
+ sub tmp2, tmp1, #0x01010101
+ bic tmp2, tmp1
+ tst tmp2, #0x80808080
+ /* All zero means no zero byte is detected */
+ it eq
+ streq tmp1, [dst], #4
+ beq .Ldst_src_aligned
+
+ /* There is a zero in the word, copy until zero */
+ sub src, #4
+.Lbyte_copy_until_zero:
+ ldrb tmp2, [src], #1
+ cmp tmp2, #0
+ beq .Lcopy_done
+ strb tmp2, [dst], #1
+ b .Lbyte_copy_until_zero
+
+/* Make dst aligned, so we won't write anything before dst.
+ * If we attempt to write before dst, atomic read-write must
+ * be ensured. Atomic operation complicates things.
+ * So the solution here is byte by byte copy until dst aligned.
+ */
+.Lstrs_diff_offset:
+ ands tmp1, dst, #3
+ beq .Ldiff_offset_loop_begin
+ /* get number of dst bytes unaligned */
+ rsb tmp1, #4
+
+.Lbyte_copy_until_dst_aligned:
+ ldrb tmp2, [src], #1
+ cmp tmp2, #0
+ beq .Lcopy_done
+ strb tmp2, [dst], #1
+ subs tmp1, #1
+ bne .Lbyte_copy_until_dst_aligned
+
+.Ldiff_offset_loop_begin:
+ /* src_offset mustn't be 0 here */
+ and src_offset, src, 3
+ lsls src_offset, #3
+ bic src, #3
+/* first word logic
+ * prepend 0xff to make the algorithm simpler
+ * only the first word needs to be prepended
+ */
+ ldr tmp1, [src], #4
+ mov tmp2, #0xffffffff
+ rsb tmp3, src_offset, #32
+
+#ifdef __ARM_BIG_ENDIAN
+ lsls tmp2, tmp3
+#else
+ lsrs tmp2, tmp3
+#endif
+ orr tmp1, tmp1, tmp2
+ /* Test if the first word contains zero */
+ sub tmp3, tmp1, #0x01010101
+ bic tmp3, tmp1
+ tst tmp3, #0x80808080
+ /* non-zero means zero byte is detected */
+ bne .Ltail_copy
+
+ /* before loop, set tmp2=tmp1 to simplify the logic in the loop */
+ mov tmp2, tmp1
+.Ldiff_offset_loop:
+ mov tmp1, tmp2
+ ldr tmp2, [src], #4
+ /* Test if contains zero */
+ sub tmp3, tmp2, #0x01010101
+ bic tmp3, tmp2
+ tst tmp3, #0x80808080
+ /* non-zero means zero byte is detected */
+ bne .Ltail_copy
+ /* Now let's fill dst */
+#ifdef __ARM_BIG_ENDIAN
+ lsls tmp1, src_offset
+ rsb tmp3, src_offset, #32
+ lsrs tmp3, tmp2, tmp3
+ orr tmp1, tmp1, tmp3
+#else
+ lsrs tmp1, src_offset
+ rsb tmp3, src_offset, #32
+ lsls tmp3, tmp2, tmp3
+ orr tmp1, tmp1, tmp3
+#endif
+ str tmp1, [dst], #4
+ b .Ldiff_offset_loop
+
+.Ltail_copy:
+ cmp src_offset, #24
+ beq .Loffset_3
+ cmp src_offset, #16
+ beq .Loffset_2
+ /* src_offset == 8 here */
+ ands tmp3, tmp1, MASK_1
+ beq .Lcopy_done
+ lsrs tmp3, BYTE_1_SHIFT
+ strb tmp3, [dst], #1
+.Loffset_2:
+ ands tmp3, tmp1, MASK_2
+ beq .Lcopy_done
+ lsrs tmp3, BYTE_2_SHIFT
+ strb tmp3, [dst], #1
+.Loffset_3:
+ ands tmp3, tmp1, MASK_3
+ beq .Lcopy_done
+ lsrs tmp3, BYTE_3_SHIFT
+ strb tmp3, [dst], #1
+ ands tmp3, tmp2, MASK_0
+ beq .Lcopy_done
+ lsrs tmp3, BYTE_0_SHIFT
+ strb tmp3, [dst], #1
+ ands tmp3, tmp2, MASK_1
+ beq .Lcopy_done
+ lsrs tmp3, BYTE_1_SHIFT
+ strb tmp3, [dst], #1
+ ands tmp3, tmp2, MASK_2
+ beq .Lcopy_done
+ lsrs tmp3, BYTE_2_SHIFT
+ strb tmp3, [dst], #1
+.Lcopy_done:
+ mov tmp3, #0
+ strb tmp3, [dst]
+ pop {result, tmp1, tmp2, tmp3, src_offset}
+ bx lr
+
+#if 0
+/* Pseudo Code of strcpy when dst/src not at same byte offset */
+
+/* Make dst aligned, so we won't write anything before dst.
+ * If we attempt to write before dst, atomic read-write must
+ * be ensured. Atomic operation complicates things.
+ * So the solution here is byte by byte copy until dst aligned.
+ */
+ if (dst & 3 == 0)
+ goto diff_offset_loop_begin;
+ ByteCopyUntilDstAligned();
+
+.diff_offset_loop_begin:
+/* src_offset mustn't be 0 here */
+ src_offset = src & 3;
+ src_offset = src_offset * 8;
+ src = src & 0xfffffffc;
+ tmp1 = *src;
+ src +=4;
+/* first word logic
+ * prepend 0xff to make the algorithm simpler
+ * only the first word needs to be prepended
+ */
+ if (src_offset != 0)
+ {
+ tmp2 = 0xffffffff
+#if big endian
+ tmp2 = tmp2 << (32 - src_offset)
+#else
+ tmp2 = tmp2 >> (32 - src_offset)
+#endif
+ tmp1 |= tmp2
+ }
+ if (HasZeroByte(tmp1))
+ {
+ goto .tail_copy;
+ }
+
+/* before loop, set tmp2=tmp1 to simplify the logic in the loop */
+ tmp2 = tmp1
+.diff_offset_loop:
+ tmp1 = tmp2;
+ tmp2 = *src;
+ src += 4;
+
+ /* double word tail means we have to copy from tmp1 and tmp2 to dst */
+ if (HasZeroByte(tmp2))
+ {
+ goto .tail_copy;
+ }
+/* Now let's fill dst */
+#if big endian
+ tmp1 = tmp1 << (src_offset);
+ tmp1 |= tmp2 >> (32 - src_offset);
+ *dst = tmp1;
+#else
+ tmp1 = tmp1 >> (src_offset);
+ tmp1 |= tmp2 << (32 - src_offset);
+ *dst = tmp1;
+#endif
+ dst +=4;
+ goto .diff_offset_loop;
+
+/* byte by byte copy at the tail */
+.tail_copy:
+ if (src_offset == 3)
+ goto offset_3;
+ if (src_offset == 2)
+ goto offset_2;
+
+/* src_offset mustn't be 0 here */
+/* default src_offset == 1 */
+ if (tmp1 & MASK_1 == 0)
+ goto cpy_done;
+ *dst++ = tmp1 & MASK_1;
+offset_2:
+ if (tmp1 & MASK_2 == 0)
+ goto cpy_done;
+ *dst++ = tmp1 & MASK_2;
+offset_3:
+ if (tmp1 & MASK_3 == 0)
+ goto cpy_done;
+ *dst++ = tmp1 & MASK_3;
+ if (tmp2 & MASK_0 == 0)
+ goto cpy_done;
+ *dst++ = tmp2 & MASK_0;
+ if (tmp2 & MASK_1 == 0)
+ goto cpy_done;
+ *dst++ = tmp2 & MASK_1;
+ if (tmp2 & MASK_2 == 0)
+ goto cpy_done;
+ *dst++ = tmp2 & MASK_2;
+/* tmp2 BYTE3 must be zero here */
+
+.cpy_done:
+ *dst++ = 0;
+#endif /* Pseudo code end */
+