pkarashchenko commented on code in PR #8814: URL: https://github.com/apache/nuttx/pull/8814#discussion_r1136305990
########## libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S: ########## @@ -0,0 +1,307 @@ +/*************************************************************************** + * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ***************************************************************************/ + +/* This strcpy borrowed some ideas from arch_strcmp.S(). */ + +/* Parameters and result. */ +#define dst r0 +#define src r1 +#define result r0 + +/* Internal variables, or callee saved registers */ +#define tmp1 r4 +#define tmp2 r5 +#define tmp3 r6 +#define src_offset r7 + +#ifdef __ARM_BIG_ENDIAN +#define MASK_0 0xff000000 +#define MASK_1 0xff0000 +#define MASK_2 0xff00 +#define MASK_3 0xff +#define BYTE_0_SHIFT 24 +#define BYTE_1_SHIFT 16 +#define BYTE_2_SHIFT 8 +#define BYTE_3_SHIFT 0 +#else +#define MASK_0 0xff +#define MASK_1 0xff00 +#define MASK_2 0xff0000 +#define MASK_3 0xff000000 +#define BYTE_0_SHIFT 0 +#define BYTE_1_SHIFT 8 +#define BYTE_2_SHIFT 16 +#define BYTE_3_SHIFT 24 Review Comment: ```suggestion # define MASK_0 0xff # define MASK_1 0xff00 # define MASK_2 0xff0000 # define MASK_3 0xff000000 # define BYTE_0_SHIFT 0 # define BYTE_1_SHIFT 8 # define BYTE_2_SHIFT 16 # define BYTE_3_SHIFT 24 ``` ########## libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S: ########## @@ -0,0 +1,307 @@ +/*************************************************************************** + * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ***************************************************************************/ + +/* This strcpy borrowed some ideas from arch_strcmp.S(). */ + +/* Parameters and result. */ +#define dst r0 +#define src r1 +#define result r0 + +/* Internal variables, or callee saved registers */ +#define tmp1 r4 +#define tmp2 r5 +#define tmp3 r6 +#define src_offset r7 + +#ifdef __ARM_BIG_ENDIAN +#define MASK_0 0xff000000 +#define MASK_1 0xff0000 +#define MASK_2 0xff00 +#define MASK_3 0xff +#define BYTE_0_SHIFT 24 +#define BYTE_1_SHIFT 16 +#define BYTE_2_SHIFT 8 +#define BYTE_3_SHIFT 0 Review Comment: ```suggestion # define MASK_0 0xff000000 # define MASK_1 0xff0000 # define MASK_2 0xff00 # define MASK_3 0xff # define BYTE_0_SHIFT 24 # define BYTE_1_SHIFT 16 # define BYTE_2_SHIFT 8 # define BYTE_3_SHIFT 0 ``` ########## libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S: ########## @@ -0,0 +1,307 @@ +/*************************************************************************** + * libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ***************************************************************************/ + +/* This strcpy borrowed some ideas from arch_strcmp.S(). */ + +/* Parameters and result. */ +#define dst r0 +#define src r1 +#define result r0 + +/* Internal variables, or callee saved registers */ +#define tmp1 r4 +#define tmp2 r5 +#define tmp3 r6 +#define src_offset r7 + +#ifdef __ARM_BIG_ENDIAN +#define MASK_0 0xff000000 +#define MASK_1 0xff0000 +#define MASK_2 0xff00 +#define MASK_3 0xff +#define BYTE_0_SHIFT 24 +#define BYTE_1_SHIFT 16 +#define BYTE_2_SHIFT 8 +#define BYTE_3_SHIFT 0 +#else +#define MASK_0 0xff +#define MASK_1 0xff00 +#define MASK_2 0xff0000 +#define MASK_3 0xff000000 +#define BYTE_0_SHIFT 0 +#define BYTE_1_SHIFT 8 +#define BYTE_2_SHIFT 16 +#define BYTE_3_SHIFT 24 +#endif + + + .syntax unified + .text + .align 2 + .global strcpy + .thumb + .type strcpy, %function + +strcpy: + push {result, tmp1, tmp2, tmp3, src_offset} + eor tmp1, dst, src + tst tmp1, #3 + /* If dst and src not at same byte offset from a word boundary */ + bne .Lstrs_diff_offset + /* Process same byte offset then, get the offset */ + ands tmp1, src, #3 + beq .Ldst_src_aligned + /* get number of bytes unaligned */ + rsb tmp1, #4 + +.Lbyte_copy_until_dsr_src_aligned: + ldrb tmp2, [src], #1 + cmp tmp2, #0 + beq .Lcopy_done + strb tmp2, [dst], #1 + subs tmp1, #1 + bne .Lbyte_copy_until_dsr_src_aligned + +.Ldst_src_aligned: + /* Now dst and src are aligned */ + ldr tmp1, [src], #4 + sub tmp2, tmp1, #0x01010101 + bic tmp2, tmp1 + tst tmp2, #0x80808080 + /* All zero means no zero byte is detected */ + it eq + streq tmp1, [dst], #4 + beq .Ldst_src_aligned + + /* There is a zero in the word, copy until zero */ + sub src, #4 +.Lbyte_copy_until_zero: + ldrb tmp2, [src], #1 + cmp tmp2, #0 + beq .Lcopy_done + strb tmp2, [dst], #1 + b .Lbyte_copy_until_zero + +/* Make dst aligned, so we won't write anything before dst. + * If we attempt to write before dst, atomic read-write must + * be ensured. Atomic operation complicates things. + * So the solution here is byte by byte copy until dst aligned. + */ +.Lstrs_diff_offset: + ands tmp1, dst, #3 + beq .Ldiff_offset_loop_begin + /* get number of dst bytes unaligned */ + rsb tmp1, #4 + +.Lbyte_copy_until_dst_aligned: + ldrb tmp2, [src], #1 + cmp tmp2, #0 + beq .Lcopy_done + strb tmp2, [dst], #1 + subs tmp1, #1 + bne .Lbyte_copy_until_dst_aligned + +.Ldiff_offset_loop_begin: + /* src_offset mustn't be 0 here */ + and src_offset, src, 3 + lsls src_offset, #3 + bic src, #3 +/* first word logic + * prepend 0xff to make the algorithm simpler + * only the first word needs to be prepended + */ + ldr tmp1, [src], #4 + mov tmp2, #0xffffffff + rsb tmp3, src_offset, #32 + +#ifdef __ARM_BIG_ENDIAN + lsls tmp2, tmp3 +#else + lsrs tmp2, tmp3 +#endif + orr tmp1, tmp1, tmp2 + /* Test if the first word contains zero */ + sub tmp3, tmp1, #0x01010101 + bic tmp3, tmp1 + tst tmp3, #0x80808080 + /* non-zero means zero byte is detected */ + bne .Ltail_copy + + /* before loop, set tmp2=tmp1 to simplify the logic in the loop */ + mov tmp2, tmp1 +.Ldiff_offset_loop: + mov tmp1, tmp2 + ldr tmp2, [src], #4 + /* Test if contains zero */ + sub tmp3, tmp2, #0x01010101 + bic tmp3, tmp2 + tst tmp3, #0x80808080 + /* non-zero means zero byte is detected */ + bne .Ltail_copy + /* Now let's fill dst */ +#ifdef __ARM_BIG_ENDIAN + lsls tmp1, src_offset + rsb tmp3, src_offset, #32 + lsrs tmp3, tmp2, tmp3 + orr tmp1, tmp1, tmp3 +#else + lsrs tmp1, src_offset + rsb tmp3, src_offset, #32 + lsls tmp3, tmp2, tmp3 + orr tmp1, tmp1, tmp3 +#endif + str tmp1, [dst], #4 + b .Ldiff_offset_loop + +.Ltail_copy: + cmp src_offset, #24 + beq .Loffset_3 + cmp src_offset, #16 + beq .Loffset_2 + /* src_offset == 8 here */ + ands tmp3, tmp1, MASK_1 + beq .Lcopy_done + lsrs tmp3, BYTE_1_SHIFT + strb tmp3, [dst], #1 +.Loffset_2: + ands tmp3, tmp1, MASK_2 + beq .Lcopy_done + lsrs tmp3, BYTE_2_SHIFT + strb tmp3, [dst], #1 +.Loffset_3: + ands tmp3, tmp1, MASK_3 + beq .Lcopy_done + lsrs tmp3, BYTE_3_SHIFT + strb tmp3, [dst], #1 + ands tmp3, tmp2, MASK_0 + beq .Lcopy_done + lsrs tmp3, BYTE_0_SHIFT + strb tmp3, [dst], #1 + ands tmp3, tmp2, MASK_1 + beq .Lcopy_done + lsrs tmp3, BYTE_1_SHIFT + strb tmp3, [dst], #1 + ands tmp3, tmp2, MASK_2 + beq .Lcopy_done + lsrs tmp3, BYTE_2_SHIFT + strb tmp3, [dst], #1 +.Lcopy_done: + mov tmp3, #0 + strb tmp3, [dst] + pop {result, tmp1, tmp2, tmp3, src_offset} + bx lr + +#if 0 +/* Pseudo Code of strcpy when dst/src not at same byte offset */ + +/* Make dst aligned, so we won't write anything before dst. + * If we attempt to write before dst, atomic read-write must + * be ensured. Atomic operation complicates things. + * So the solution here is byte by byte copy until dst aligned. + */ + if (dst & 3 == 0) + goto diff_offset_loop_begin; + ByteCopyUntilDstAligned(); + +.diff_offset_loop_begin: +/* src_offset mustn't be 0 here */ + src_offset = src & 3; + src_offset = src_offset * 8; + src = src & 0xfffffffc; + tmp1 = *src; + src +=4; +/* first word logic + * prepend 0xff to make the algorithm simpler + * only the first word needs to be prepended + */ + if (src_offset != 0) + { + tmp2 = 0xffffffff +#if big endian + tmp2 = tmp2 << (32 - src_offset) +#else + tmp2 = tmp2 >> (32 - src_offset) +#endif + tmp1 |= tmp2 + } + if (HasZeroByte(tmp1)) + { + goto .tail_copy; + } + +/* before loop, set tmp2=tmp1 to simplify the logic in the loop */ + tmp2 = tmp1 +.diff_offset_loop: + tmp1 = tmp2; + tmp2 = *src; + src += 4; + + /* double word tail means we have to copy from tmp1 and tmp2 to dst */ + if (HasZeroByte(tmp2)) + { + goto .tail_copy; + } +/* Now let's fill dst */ +#if big endian + tmp1 = tmp1 << (src_offset); + tmp1 |= tmp2 >> (32 - src_offset); + *dst = tmp1; +#else + tmp1 = tmp1 >> (src_offset); + tmp1 |= tmp2 << (32 - src_offset); + *dst = tmp1; +#endif + dst +=4; + goto .diff_offset_loop; + +/* byte by byte copy at the tail */ +.tail_copy: + if (src_offset == 3) + goto offset_3; + if (src_offset == 2) + goto offset_2; + +/* src_offset mustn't be 0 here */ +/* default src_offset == 1 */ + if (tmp1 & MASK_1 == 0) + goto cpy_done; + *dst++ = tmp1 & MASK_1; +offset_2: + if (tmp1 & MASK_2 == 0) + goto cpy_done; + *dst++ = tmp1 & MASK_2; +offset_3: + if (tmp1 & MASK_3 == 0) + goto cpy_done; + *dst++ = tmp1 & MASK_3; + if (tmp2 & MASK_0 == 0) + goto cpy_done; + *dst++ = tmp2 & MASK_0; + if (tmp2 & MASK_1 == 0) + goto cpy_done; + *dst++ = tmp2 & MASK_1; + if (tmp2 & MASK_2 == 0) + goto cpy_done; + *dst++ = tmp2 & MASK_2; +/* tmp2 BYTE3 must be zero here */ + +.cpy_done: + *dst++ = 0; +#endif /* Pseudo code end */ Review Comment: let's add newline at the end of the file -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
