pkarashchenko commented on code in PR #8814:
URL: https://github.com/apache/nuttx/pull/8814#discussion_r1136305990


##########
libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S:
##########
@@ -0,0 +1,307 @@
+/***************************************************************************
+ * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.  The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ***************************************************************************/
+
+/* This strcpy borrowed some ideas from arch_strcmp.S(). */
+
+/* Parameters and result. */
+#define dst     r0
+#define src     r1
+#define result  r0
+
+/* Internal variables, or callee saved registers */
+#define tmp1          r4
+#define tmp2          r5
+#define tmp3          r6
+#define src_offset    r7
+
+#ifdef __ARM_BIG_ENDIAN
+#define MASK_0          0xff000000
+#define MASK_1          0xff0000
+#define MASK_2          0xff00
+#define MASK_3          0xff
+#define BYTE_0_SHIFT    24
+#define BYTE_1_SHIFT    16
+#define BYTE_2_SHIFT    8
+#define BYTE_3_SHIFT    0
+#else
+#define MASK_0          0xff
+#define MASK_1          0xff00
+#define MASK_2          0xff0000
+#define MASK_3          0xff000000
+#define BYTE_0_SHIFT    0
+#define BYTE_1_SHIFT    8
+#define BYTE_2_SHIFT    16
+#define BYTE_3_SHIFT    24

Review Comment:
   ```suggestion
   #  define MASK_0          0xff
   #  define MASK_1          0xff00
   #  define MASK_2          0xff0000
   #  define MASK_3          0xff000000
   #  define BYTE_0_SHIFT    0
   #  define BYTE_1_SHIFT    8
   #  define BYTE_2_SHIFT    16
   #  define BYTE_3_SHIFT    24
   
   ```



##########
libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S:
##########
@@ -0,0 +1,307 @@
+/***************************************************************************
+ * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.  The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ***************************************************************************/
+
+/* This strcpy borrowed some ideas from arch_strcmp.S(). */
+
+/* Parameters and result. */
+#define dst     r0
+#define src     r1
+#define result  r0
+
+/* Internal variables, or callee saved registers */
+#define tmp1          r4
+#define tmp2          r5
+#define tmp3          r6
+#define src_offset    r7
+
+#ifdef __ARM_BIG_ENDIAN
+#define MASK_0          0xff000000
+#define MASK_1          0xff0000
+#define MASK_2          0xff00
+#define MASK_3          0xff
+#define BYTE_0_SHIFT    24
+#define BYTE_1_SHIFT    16
+#define BYTE_2_SHIFT    8
+#define BYTE_3_SHIFT    0

Review Comment:
   ```suggestion
   #  define MASK_0          0xff000000
   #  define MASK_1          0xff0000
   #  define MASK_2          0xff00
   #  define MASK_3          0xff
   #  define BYTE_0_SHIFT    24
   #  define BYTE_1_SHIFT    16
   #  define BYTE_2_SHIFT    8
   #  define BYTE_3_SHIFT    0
   
   ```



##########
libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S:
##########
@@ -0,0 +1,307 @@
+/***************************************************************************
+ * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.  The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ***************************************************************************/
+
+/* This strcpy borrowed some ideas from arch_strcmp.S(). */
+
+/* Parameters and result. */
+#define dst     r0
+#define src     r1
+#define result  r0
+
+/* Internal variables, or callee saved registers */
+#define tmp1          r4
+#define tmp2          r5
+#define tmp3          r6
+#define src_offset    r7
+
+#ifdef __ARM_BIG_ENDIAN
+#define MASK_0          0xff000000
+#define MASK_1          0xff0000
+#define MASK_2          0xff00
+#define MASK_3          0xff
+#define BYTE_0_SHIFT    24
+#define BYTE_1_SHIFT    16
+#define BYTE_2_SHIFT    8
+#define BYTE_3_SHIFT    0
+#else
+#define MASK_0          0xff
+#define MASK_1          0xff00
+#define MASK_2          0xff0000
+#define MASK_3          0xff000000
+#define BYTE_0_SHIFT    0
+#define BYTE_1_SHIFT    8
+#define BYTE_2_SHIFT    16
+#define BYTE_3_SHIFT    24
+#endif
+
+
+    .syntax unified
+    .text
+    .align  2
+    .global strcpy
+    .thumb
+    .type   strcpy, %function
+
+strcpy:
+    push    {result, tmp1, tmp2, tmp3, src_offset}
+    eor     tmp1, dst, src
+    tst     tmp1, #3 
+    /* If dst and src not at same byte offset from a word boundary */
+    bne     .Lstrs_diff_offset
+    /* Process same byte offset then, get the offset */
+    ands    tmp1, src, #3
+    beq     .Ldst_src_aligned
+    /* get number of bytes unaligned */
+    rsb     tmp1, #4
+
+.Lbyte_copy_until_dsr_src_aligned:
+    ldrb    tmp2, [src], #1
+    cmp     tmp2, #0
+    beq     .Lcopy_done
+    strb    tmp2, [dst], #1
+    subs    tmp1, #1
+    bne     .Lbyte_copy_until_dsr_src_aligned
+
+.Ldst_src_aligned:
+    /* Now dst and src are aligned */
+    ldr     tmp1, [src], #4
+    sub     tmp2, tmp1, #0x01010101
+    bic     tmp2, tmp1
+    tst     tmp2, #0x80808080
+    /* All zero means no zero byte is detected */
+    it      eq
+    streq   tmp1, [dst], #4
+    beq     .Ldst_src_aligned
+
+    /* There is a zero in the word, copy until zero */
+    sub     src, #4
+.Lbyte_copy_until_zero:
+    ldrb    tmp2, [src], #1
+    cmp     tmp2, #0
+    beq     .Lcopy_done
+    strb    tmp2, [dst], #1
+    b       .Lbyte_copy_until_zero
+
+/* Make dst aligned, so we won't write anything before dst.
+ * If we attempt to write before dst, atomic read-write must
+ * be ensured. Atomic operation complicates things.
+ * So the solution here is byte by byte copy until dst aligned.
+ */
+.Lstrs_diff_offset:
+    ands    tmp1, dst, #3
+    beq     .Ldiff_offset_loop_begin
+    /* get number of dst bytes unaligned */
+    rsb     tmp1, #4
+
+.Lbyte_copy_until_dst_aligned:
+    ldrb    tmp2, [src], #1
+    cmp     tmp2, #0
+    beq     .Lcopy_done
+    strb    tmp2, [dst], #1
+    subs    tmp1, #1
+    bne     .Lbyte_copy_until_dst_aligned
+
+.Ldiff_offset_loop_begin:
+    /* src_offset mustn't be 0 here */
+    and     src_offset, src, 3
+    lsls    src_offset, #3
+    bic     src, #3
+/* first word logic
+ * prepend 0xff to make the algorithm simpler
+ * only the first word needs to be prepended
+ */
+    ldr     tmp1, [src], #4
+    mov     tmp2, #0xffffffff
+    rsb     tmp3, src_offset, #32
+    
+#ifdef __ARM_BIG_ENDIAN
+    lsls    tmp2, tmp3
+#else
+    lsrs    tmp2, tmp3
+#endif
+    orr     tmp1, tmp1, tmp2
+    /* Test if the first word contains zero */
+    sub     tmp3, tmp1, #0x01010101
+    bic     tmp3, tmp1
+    tst     tmp3, #0x80808080
+    /* non-zero means zero byte is detected */
+    bne     .Ltail_copy
+
+    /* before loop, set tmp2=tmp1 to simplify the logic in the loop */
+    mov     tmp2, tmp1
+.Ldiff_offset_loop:
+    mov     tmp1, tmp2
+    ldr     tmp2, [src], #4
+    /* Test if  contains zero */
+    sub     tmp3, tmp2, #0x01010101
+    bic     tmp3, tmp2
+    tst     tmp3, #0x80808080
+    /* non-zero means zero byte is detected */
+    bne     .Ltail_copy
+    /* Now let's fill dst */
+#ifdef __ARM_BIG_ENDIAN
+    lsls    tmp1, src_offset
+    rsb     tmp3, src_offset, #32
+    lsrs    tmp3, tmp2, tmp3
+    orr     tmp1, tmp1, tmp3
+#else
+    lsrs    tmp1, src_offset
+    rsb     tmp3, src_offset, #32
+    lsls    tmp3, tmp2, tmp3
+    orr     tmp1, tmp1, tmp3
+#endif
+    str     tmp1, [dst], #4
+    b       .Ldiff_offset_loop
+
+.Ltail_copy:
+    cmp     src_offset, #24
+    beq     .Loffset_3
+    cmp     src_offset, #16
+    beq     .Loffset_2
+    /*  src_offset == 8 here */
+    ands    tmp3, tmp1, MASK_1
+    beq     .Lcopy_done
+    lsrs    tmp3, BYTE_1_SHIFT
+    strb    tmp3, [dst], #1
+.Loffset_2:
+    ands    tmp3, tmp1, MASK_2
+    beq     .Lcopy_done
+    lsrs    tmp3, BYTE_2_SHIFT
+    strb    tmp3, [dst], #1
+.Loffset_3:
+    ands    tmp3, tmp1, MASK_3
+    beq     .Lcopy_done
+    lsrs    tmp3, BYTE_3_SHIFT
+    strb    tmp3, [dst], #1
+    ands    tmp3, tmp2, MASK_0
+    beq     .Lcopy_done
+    lsrs    tmp3, BYTE_0_SHIFT
+    strb    tmp3, [dst], #1
+    ands    tmp3, tmp2, MASK_1
+    beq     .Lcopy_done
+    lsrs    tmp3, BYTE_1_SHIFT
+    strb    tmp3, [dst], #1
+    ands    tmp3, tmp2, MASK_2
+    beq     .Lcopy_done
+    lsrs    tmp3, BYTE_2_SHIFT
+    strb    tmp3, [dst], #1
+.Lcopy_done:
+    mov     tmp3, #0
+    strb    tmp3, [dst]
+    pop     {result, tmp1, tmp2, tmp3, src_offset}
+    bx      lr
+
+#if 0
+/* Pseudo Code of strcpy when dst/src not at same byte offset */
+
+/* Make dst aligned, so we won't write anything before dst.
+ * If we attempt to write before dst, atomic read-write must
+ * be ensured. Atomic operation complicates things.
+ * So the solution here is byte by byte copy until dst aligned.
+ */
+    if (dst & 3 == 0)
+        goto diff_offset_loop_begin;
+    ByteCopyUntilDstAligned();
+
+.diff_offset_loop_begin:
+/* src_offset mustn't be 0 here */
+    src_offset = src & 3;
+    src_offset = src_offset * 8;
+    src = src & 0xfffffffc;
+    tmp1 = *src;
+    src +=4;
+/* first word logic
+ * prepend 0xff to make the algorithm simpler
+ * only the first word needs to be prepended
+ */
+    if (src_offset != 0)
+    {
+        tmp2 = 0xffffffff
+#if  big endian
+        tmp2 = tmp2 << (32 - src_offset)
+#else
+        tmp2 = tmp2 >> (32 - src_offset)
+#endif
+        tmp1 |= tmp2
+    }
+    if (HasZeroByte(tmp1)) 
+    {
+        goto .tail_copy;
+    }
+
+/* before loop, set tmp2=tmp1 to simplify the logic in the loop */
+    tmp2 = tmp1
+.diff_offset_loop:
+    tmp1 = tmp2;
+    tmp2 = *src;
+    src += 4;
+
+ /* double word tail means we have to copy from tmp1 and tmp2 to dst */
+    if (HasZeroByte(tmp2))
+    {
+        goto .tail_copy;
+    }
+/* Now let's fill dst */
+#if  big endian
+    tmp1 = tmp1 << (src_offset);
+    tmp1 |= tmp2 >> (32 - src_offset);
+    *dst = tmp1;
+#else
+    tmp1 = tmp1 >> (src_offset);
+    tmp1 |= tmp2 << (32 - src_offset);
+    *dst = tmp1;
+#endif
+    dst +=4;
+    goto .diff_offset_loop;
+
+/* byte by byte copy at the tail */
+.tail_copy:
+    if (src_offset == 3)
+        goto offset_3;
+    if (src_offset == 2)
+        goto offset_2;
+
+/* src_offset mustn't be 0 here */
+/* default src_offset == 1 */
+    if (tmp1 & MASK_1 == 0)
+        goto cpy_done;
+    *dst++ = tmp1 & MASK_1;
+offset_2:
+    if (tmp1 & MASK_2 == 0)
+        goto cpy_done;
+    *dst++ = tmp1 & MASK_2;
+offset_3:
+    if (tmp1 & MASK_3 == 0)
+        goto cpy_done;
+    *dst++ = tmp1 & MASK_3;
+    if (tmp2 & MASK_0 == 0)
+        goto cpy_done;
+    *dst++ = tmp2 & MASK_0;
+    if (tmp2 & MASK_1 == 0)
+        goto cpy_done;
+    *dst++ = tmp2 & MASK_1;
+    if (tmp2 & MASK_2 == 0)
+        goto cpy_done;
+    *dst++ = tmp2 & MASK_2;
+/* tmp2 BYTE3 must be zero here */
+
+.cpy_done:
+    *dst++ = 0;
+#endif  /* Pseudo code end */

Review Comment:
   let's add newline at the end of the file



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to