Module Name: src
Committed By: martin
Date: Sat Jul 20 14:52:04 UTC 2024
Modified Files:
src/common/lib/libc/arch/arm/string [netbsd-10]: memcpy_arm.S
memcpy_neon.S memcpy_xscale.S memmove.S memset.S memset_naive.S
strlen_neon.S
Log Message:
Pull up following revision(s) (requested by rin in ticket #745):
common/lib/libc/arch/arm/string/memcpy_arm.S: revision 1.6
common/lib/libc/arch/arm/string/memcpy_arm.S: revision 1.7
common/lib/libc/arch/arm/string/memcpy_xscale.S: revision 1.6
common/lib/libc/arch/arm/string/memcpy_neon.S: revision 1.2
common/lib/libc/arch/arm/string/memset_naive.S: revision 1.2
common/lib/libc/arch/arm/string/memmove.S: revision 1.11
common/lib/libc/arch/arm/string/strlen_neon.S: revision 1.4
common/lib/libc/arch/arm/string/memset.S: revision 1.9
Use unsigned comparisons for pointers and size_t values.
Fix two signed comparisons that were missed in the last patch.
Found be rillig@
To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.5.40.1 \
src/common/lib/libc/arch/arm/string/memcpy_arm.S \
src/common/lib/libc/arch/arm/string/memcpy_xscale.S
cvs rdiff -u -r1.1 -r1.1.52.1 \
src/common/lib/libc/arch/arm/string/memcpy_neon.S \
src/common/lib/libc/arch/arm/string/memset_naive.S
cvs rdiff -u -r1.10 -r1.10.26.1 src/common/lib/libc/arch/arm/string/memmove.S
cvs rdiff -u -r1.8 -r1.8.30.1 src/common/lib/libc/arch/arm/string/memset.S
cvs rdiff -u -r1.3 -r1.3.52.1 \
src/common/lib/libc/arch/arm/string/strlen_neon.S
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/common/lib/libc/arch/arm/string/memcpy_arm.S
diff -u src/common/lib/libc/arch/arm/string/memcpy_arm.S:1.5 src/common/lib/libc/arch/arm/string/memcpy_arm.S:1.5.40.1
--- src/common/lib/libc/arch/arm/string/memcpy_arm.S:1.5 Mon Dec 2 21:21:33 2013
+++ src/common/lib/libc/arch/arm/string/memcpy_arm.S Sat Jul 20 14:52:04 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: memcpy_arm.S,v 1.5 2013/12/02 21:21:33 joerg Exp $ */
+/* $NetBSD: memcpy_arm.S,v 1.5.40.1 2024/07/20 14:52:04 martin Exp $ */
/*-
* Copyright (c) 1997 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@ ENTRY(memcpy)
push {r0, lr} /* memcpy() returns dest addr */
subs r2, r2, #4
- blt .Lmemcpy_l4 /* less than 4 bytes */
+ blo .Lmemcpy_l4 /* less than 4 bytes */
ands r12, r0, #3
bne .Lmemcpy_destul /* oh unaligned destination addr */
ands r12, r1, #3
@@ -75,9 +75,9 @@ ENTRY(memcpy)
.Lmemcpy_t8:
/* We have aligned source and destination */
subs r2, r2, #8
- blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
+ blo .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
subs r2, r2, #0x14
- blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
+ blo .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
push {r4} /* borrow r4 */
/* blat 32 bytes at a time */
@@ -88,12 +88,12 @@ ENTRY(memcpy)
ldmia r1!, {r3, r4, r12, lr}
stmia r0!, {r3, r4, r12, lr}
subs r2, r2, #0x20
- bge .Lmemcpy_loop32
+ bhs .Lmemcpy_loop32
cmn r2, #0x10
- ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmiage r0!, {r3, r4, r12, lr}
- subge r2, r2, #0x10
+ ldmiahs r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
+ stmiahs r0!, {r3, r4, r12, lr}
+ subhs r2, r2, #0x10
pop {r4} /* return r4 */
.Lmemcpy_l32:
@@ -101,21 +101,21 @@ ENTRY(memcpy)
/* blat 12 bytes at a time */
.Lmemcpy_loop12:
- ldmiage r1!, {r3, r12, lr}
- stmiage r0!, {r3, r12, lr}
- subsge r2, r2, #0x0c
- bge .Lmemcpy_loop12
+ ldmiahs r1!, {r3, r12, lr}
+ stmiahs r0!, {r3, r12, lr}
+ subshs r2, r2, #0x0c
+ bhs .Lmemcpy_loop12
.Lmemcpy_l12:
adds r2, r2, #8
- blt .Lmemcpy_l4
+ blo .Lmemcpy_l4
subs r2, r2, #4
- ldrlt r3, [r1], #4
- strlt r3, [r0], #4
- ldmiage r1!, {r3, r12}
- stmiage r0!, {r3, r12}
- subge r2, r2, #4
+ ldrlo r3, [r1], #4
+ strlo r3, [r0], #4
+ ldmiahs r1!, {r3, r12}
+ stmiahs r0!, {r3, r12}
+ subhs r2, r2, #4
.Lmemcpy_l4:
/* less than 4 bytes to go */
@@ -129,10 +129,10 @@ ENTRY(memcpy)
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0], #1
- ldrbge r3, [r1], #1
- strbge r3, [r0], #1
- ldrbgt r3, [r1], #1
- strbgt r3, [r0], #1
+ ldrbhs r3, [r1], #1
+ strbhs r3, [r0], #1
+ ldrbhi r3, [r1], #1
+ strbhi r3, [r0], #1
pop {r0, pc}
/* erg - unaligned destination */
@@ -143,12 +143,12 @@ ENTRY(memcpy)
/* align destination with byte copies */
ldrb r3, [r1], #1
strb r3, [r0], #1
- ldrbge r3, [r1], #1
- strbge r3, [r0], #1
- ldrbgt r3, [r1], #1
- strbgt r3, [r0], #1
+ ldrbhs r3, [r1], #1
+ strbhs r3, [r0], #1
+ ldrbhi r3, [r1], #1
+ strbhi r3, [r0], #1
subs r2, r2, r12
- blt .Lmemcpy_l4 /* less the 4 bytes */
+ blo .Lmemcpy_l4 /* less the 4 bytes */
ands r12, r1, #3
beq .Lmemcpy_t8 /* we have an aligned source */
@@ -159,10 +159,10 @@ ENTRY(memcpy)
bic r1, r1, #3
ldr lr, [r1], #4
cmp r12, #2
- bgt .Lmemcpy_srcul3
+ bhi .Lmemcpy_srcul3
beq .Lmemcpy_srcul2
cmp r2, #0x0c
- blt .Lmemcpy_srcul1loop4
+ blo .Lmemcpy_srcul1loop4
sub r2, r2, #0x0c
push {r4, r5}
@@ -192,10 +192,10 @@ ENTRY(memcpy)
#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
- bge .Lmemcpy_srcul1loop16
+ bhs .Lmemcpy_srcul1loop16
pop {r4, r5}
adds r2, r2, #0x0c
- blt .Lmemcpy_srcul1l4
+ blo .Lmemcpy_srcul1l4
.Lmemcpy_srcul1loop4:
#ifdef __ARMEB__
@@ -211,7 +211,7 @@ ENTRY(memcpy)
#endif
str r12, [r0], #4
subs r2, r2, #4
- bge .Lmemcpy_srcul1loop4
+ bhs .Lmemcpy_srcul1loop4
.Lmemcpy_srcul1l4:
sub r1, r1, #3
@@ -219,7 +219,7 @@ ENTRY(memcpy)
.Lmemcpy_srcul2:
cmp r2, #0x0c
- blt .Lmemcpy_srcul2loop4
+ blo .Lmemcpy_srcul2loop4
sub r2, r2, #0x0c
push {r4, r5}
@@ -249,10 +249,10 @@ ENTRY(memcpy)
#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
- bge .Lmemcpy_srcul2loop16
+ bhs .Lmemcpy_srcul2loop16
pop {r4, r5}
adds r2, r2, #0x0c
- blt .Lmemcpy_srcul2l4
+ blo .Lmemcpy_srcul2l4
.Lmemcpy_srcul2loop4:
#ifdef __ARMEB__
@@ -268,7 +268,7 @@ ENTRY(memcpy)
#endif
str r12, [r0], #4
subs r2, r2, #4
- bge .Lmemcpy_srcul2loop4
+ bhs .Lmemcpy_srcul2loop4
.Lmemcpy_srcul2l4:
sub r1, r1, #2
@@ -276,7 +276,7 @@ ENTRY(memcpy)
.Lmemcpy_srcul3:
cmp r2, #0x0c
- blt .Lmemcpy_srcul3loop4
+ blo .Lmemcpy_srcul3loop4
sub r2, r2, #0x0c
push {r4, r5}
@@ -306,10 +306,10 @@ ENTRY(memcpy)
#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
- bge .Lmemcpy_srcul3loop16
+ bhs .Lmemcpy_srcul3loop16
pop {r4, r5}
adds r2, r2, #0x0c
- blt .Lmemcpy_srcul3l4
+ blo .Lmemcpy_srcul3l4
.Lmemcpy_srcul3loop4:
#ifdef __ARMEB__
@@ -325,7 +325,7 @@ ENTRY(memcpy)
#endif
str r12, [r0], #4
subs r2, r2, #4
- bge .Lmemcpy_srcul3loop4
+ bhs .Lmemcpy_srcul3loop4
.Lmemcpy_srcul3l4:
sub r1, r1, #1
Index: src/common/lib/libc/arch/arm/string/memcpy_xscale.S
diff -u src/common/lib/libc/arch/arm/string/memcpy_xscale.S:1.5 src/common/lib/libc/arch/arm/string/memcpy_xscale.S:1.5.40.1
--- src/common/lib/libc/arch/arm/string/memcpy_xscale.S:1.5 Tue Dec 17 01:27:21 2013
+++ src/common/lib/libc/arch/arm/string/memcpy_xscale.S Sat Jul 20 14:52:04 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: memcpy_xscale.S,v 1.5 2013/12/17 01:27:21 joerg Exp $ */
+/* $NetBSD: memcpy_xscale.S,v 1.5.40.1 2024/07/20 14:52:04 martin Exp $ */
/*
* Copyright 2003 Wasabi Systems, Inc.
@@ -41,7 +41,7 @@
ENTRY(memcpy)
pld [r1]
cmp r2, #0x0c
- ble .Lmemcpy_short /* <= 12 bytes */
+ bls .Lmemcpy_short /* <= 12 bytes */
mov r3, r0 /* We must not clobber r0 */
/* Word-align the destination buffer */
@@ -51,12 +51,12 @@ ENTRY(memcpy)
ldrb ip, [r1], #0x01
sub r2, r2, #0x01
strb ip, [r3], #0x01
- ldrble ip, [r1], #0x01
- suble r2, r2, #0x01
- strble ip, [r3], #0x01
- ldrblt ip, [r1], #0x01
- sublt r2, r2, #0x01
- strblt ip, [r3], #0x01
+ ldrbls ip, [r1], #0x01
+ subls r2, r2, #0x01
+ strbls ip, [r3], #0x01
+ ldrblo ip, [r1], #0x01
+ sublo r2, r2, #0x01
+ strblo ip, [r3], #0x01
/* Destination buffer is now word aligned */
.Lmemcpy_wordaligned:
@@ -72,7 +72,7 @@ ENTRY(memcpy)
/* Destination buffer quad aligned, source is at least word aligned */
subs r2, r2, #0x80
- blt .Lmemcpy_w_lessthan128
+ blo .Lmemcpy_w_lessthan128
/* Copy 128 bytes at a time */
.Lmemcpy_w_loop128:
@@ -129,14 +129,14 @@ ENTRY(memcpy)
strd r8, r9, [r3], #0x08 /* ST:70-77 */
subs r2, r2, #0x80
strd r4, r5, [r3], #0x08 /* ST:78-7f */
- bge .Lmemcpy_w_loop128
+ bhs .Lmemcpy_w_loop128
.Lmemcpy_w_lessthan128:
adds r2, r2, #0x80 /* Adjust for extra sub */
popeq {r4-r9}
RETc(eq) /* Return now if done */
subs r2, r2, #0x20
- blt .Lmemcpy_w_lessthan32
+ blo .Lmemcpy_w_lessthan32
/* Copy 32 bytes at a time */
.Lmemcpy_w_loop32:
@@ -154,7 +154,7 @@ ENTRY(memcpy)
strd r8, r9, [r3], #0x08
subs r2, r2, #0x20
strd r4, r5, [r3], #0x08
- bge .Lmemcpy_w_loop32
+ bhs .Lmemcpy_w_loop32
.Lmemcpy_w_lessthan32:
adds r2, r2, #0x20 /* Adjust for extra sub */
@@ -188,17 +188,17 @@ ENTRY(memcpy)
pop {r4-r9}
RETc(eq) /* Return now if done */
subs r2, r2, #0x04
- ldrge ip, [r1], #0x04
- strge ip, [r3], #0x04
+ ldrhs ip, [r1], #0x04
+ strhs ip, [r3], #0x04
RETc(eq) /* Return now if done */
- addlt r2, r2, #0x04
+ addlo r2, r2, #0x04
ldrb ip, [r1], #0x01
cmp r2, #0x02
- ldrbge r2, [r1], #0x01
+ ldrbhs r2, [r1], #0x01
strb ip, [r3], #0x01
- ldrbgt ip, [r1]
- strbge r2, [r3], #0x01
- strbgt ip, [r3]
+ ldrbhi ip, [r1]
+ strbhs r2, [r3], #0x01
+ strbhi ip, [r3]
RET
@@ -211,7 +211,7 @@ ENTRY(memcpy)
bic r1, r1, #0x03
cmp ip, #2
ldr ip, [r1], #0x04
- bgt .Lmemcpy_bad3
+ bhi .Lmemcpy_bad3
beq .Lmemcpy_bad2
b .Lmemcpy_bad1
@@ -251,9 +251,9 @@ ENTRY(memcpy)
.Lmemcpy_bad1:
cmp r2, #0x20
- bge .Lmemcpy_bad1_loop16
+ bhs .Lmemcpy_bad1_loop16
cmp r2, #0x10
- blt .Lmemcpy_bad1_loop16_short
+ blo .Lmemcpy_bad1_loop16_short
/* copy last 16 bytes (without preload) */
#ifdef __ARMEB__
@@ -292,8 +292,8 @@ ENTRY(memcpy)
.Lmemcpy_bad1_loop16_short:
subs r2, r2, #0x04
- sublt r1, r1, #0x03
- blt .Lmemcpy_bad_done
+ sublo r1, r1, #0x03
+ blo .Lmemcpy_bad_done
.Lmemcpy_bad1_loop4:
#ifdef __ARMEB__
@@ -309,7 +309,7 @@ ENTRY(memcpy)
orr r4, r4, ip, lsl #24
#endif
str r4, [r3], #0x04
- bge .Lmemcpy_bad1_loop4
+ bhs .Lmemcpy_bad1_loop4
sub r1, r1, #0x03
b .Lmemcpy_bad_done
@@ -349,9 +349,9 @@ ENTRY(memcpy)
.Lmemcpy_bad2:
cmp r2, #0x20
- bge .Lmemcpy_bad2_loop16
+ bhs .Lmemcpy_bad2_loop16
cmp r2, #0x10
- blt .Lmemcpy_bad2_loop16_short
+ blo .Lmemcpy_bad2_loop16_short
/* copy last 16 bytes (without preload) */
#ifdef __ARMEB__
@@ -390,8 +390,8 @@ ENTRY(memcpy)
.Lmemcpy_bad2_loop16_short:
subs r2, r2, #0x04
- sublt r1, r1, #0x02
- blt .Lmemcpy_bad_done
+ sublo r1, r1, #0x02
+ blo .Lmemcpy_bad_done
.Lmemcpy_bad2_loop4:
#ifdef __ARMEB__
@@ -407,7 +407,7 @@ ENTRY(memcpy)
orr r4, r4, ip, lsl #16
#endif
str r4, [r3], #0x04
- bge .Lmemcpy_bad2_loop4
+ bhs .Lmemcpy_bad2_loop4
sub r1, r1, #0x02
b .Lmemcpy_bad_done
@@ -447,9 +447,9 @@ ENTRY(memcpy)
.Lmemcpy_bad3:
cmp r2, #0x20
- bge .Lmemcpy_bad3_loop16
+ bhs .Lmemcpy_bad3_loop16
cmp r2, #0x10
- blt .Lmemcpy_bad3_loop16_short
+ blo .Lmemcpy_bad3_loop16_short
/* copy last 16 bytes (without preload) */
#ifdef __ARMEB__
@@ -488,8 +488,8 @@ ENTRY(memcpy)
.Lmemcpy_bad3_loop16_short:
subs r2, r2, #0x04
- sublt r1, r1, #0x01
- blt .Lmemcpy_bad_done
+ sublo r1, r1, #0x01
+ blo .Lmemcpy_bad_done
.Lmemcpy_bad3_loop4:
#ifdef __ARMEB__
@@ -505,7 +505,7 @@ ENTRY(memcpy)
orr r4, r4, ip, lsl #8
#endif
str r4, [r3], #0x04
- bge .Lmemcpy_bad3_loop4
+ bhs .Lmemcpy_bad3_loop4
sub r1, r1, #0x01
.Lmemcpy_bad_done:
@@ -514,11 +514,11 @@ ENTRY(memcpy)
RETc(eq)
ldrb ip, [r1], #0x01
cmp r2, #0x02
- ldrbge r2, [r1], #0x01
+ ldrbhs r2, [r1], #0x01
strb ip, [r3], #0x01
- ldrbgt ip, [r1]
- strbge r2, [r3], #0x01
- strbgt ip, [r3]
+ ldrbhi ip, [r1]
+ strbhs r2, [r3], #0x01
+ strbhi ip, [r3]
RET
Index: src/common/lib/libc/arch/arm/string/memcpy_neon.S
diff -u src/common/lib/libc/arch/arm/string/memcpy_neon.S:1.1 src/common/lib/libc/arch/arm/string/memcpy_neon.S:1.1.52.1
--- src/common/lib/libc/arch/arm/string/memcpy_neon.S:1.1 Thu Jan 3 09:34:44 2013
+++ src/common/lib/libc/arch/arm/string/memcpy_neon.S Sat Jul 20 14:52:04 2024
@@ -29,7 +29,7 @@
#include <machine/asm.h>
-RCSID("$NetBSD: memcpy_neon.S,v 1.1 2013/01/03 09:34:44 matt Exp $")
+RCSID("$NetBSD: memcpy_neon.S,v 1.1.52.1 2024/07/20 14:52:04 martin Exp $")
.text
ENTRY(memcpy)
@@ -40,7 +40,7 @@ ENTRY(memcpy)
mov r3, r0 /* keep r0 unchanged */
#if 0
cmp r2, #16 /* copy less than 8 bytes? */
- bge .Ldst_aligner /* nope, do it the long way */
+ bhs .Ldst_aligner /* nope, do it the long way */
1: ldrb ip, [r1], #1 /* load a byte from src */
subs r2, r2, #1 /* and more to transfer? */
@@ -78,7 +78,7 @@ ENTRY(memcpy)
vld1.64 {d1}, [r1:64]! /* load a dword from src */
cmp r2, r5 /* do we already have enough? */
- bgt .Lincongruent /* no, so read more */
+ bhi .Lincongruent /* no, so read more */
.Lincongruent_finish:
vtbl.8 d0, {d1-d2}, d0 /* merge last dwords */
@@ -86,14 +86,14 @@ ENTRY(memcpy)
#ifdef __ARMEB__
vrev64.32 d0, d0 /* word swap to LE */
#endif
- blt .Lfinish /* no, write final partial dword */
+ blo .Lfinish /* no, write final partial dword */
vst1.32 {d0}, [r3:64] /* yes, write final full dword */
b .Ldone /* and we're done! */
.Lincongruent:
vld1.64 {d2}, [r1:64]! /* load a dword */
cmp r2, #8 /* can we write a full dword? */
- blt .Lincongruent_finish /* no, finish it. */
+ blo .Lincongruent_finish /* no, finish it. */
vtbl.8 d1, {d1-d2}, d0 /* reorder */
vst1.64 {d1}, [r3:64]! /* store a dword */
subs r2, r2, #8 /* have we written everything? */
@@ -109,10 +109,10 @@ ENTRY(memcpy)
* last byte).
*/
cmp r2, #32 /* can we write 4 more dwords? */
- blt .Lincongruent_dword /* no, handle dword by dword */
+ blo .Lincongruent_dword /* no, handle dword by dword */
vld1.64 {d2-d5}, [r1:64]! /* read 4 dwords */
cmp r2, #64 /* can we write 4 more dwords? */
- blt .Lincongruent_4dword /* no, handle it */
+ blo .Lincongruent_4dword /* no, handle it */
1: vld1.64 {d7-d10}, [r1:64]! /* read 4 dwords */
vtbl.8 d1, {d1-d2}, d0 /* reorder */
@@ -122,7 +122,7 @@ ENTRY(memcpy)
vst1.64 {d1-d4}, [r3:64]! /* write 4 dwords */
vmov d6, d5 /* move out of the way the load */
cmp r2, #96 /* have 8+4 dwords to write? */
- blt 2f /* no more data, skip the load */
+ blo 2f /* no more data, skip the load */
vld1.64 {d2-d5}, [r1:64]! /* more data, load 4 dwords */
2: vtbl.8 d6, {d6-d7}, d0 /* reorder */
vtbl.8 d7, {d7-d8}, d0 /* reorder */
@@ -133,14 +133,14 @@ ENTRY(memcpy)
beq .Ldone
vmov d1, d10
cmp r2, #64
- bge 1b
+ bhs 1b
/*
* we have leftovers in d1 and new untranslated date in d2-d5.
*/
.Lincongruent_4dword:
cmp r2, #32
- blt .Lincongruent_dword
+ blo .Lincongruent_dword
vtbl.8 d1, {d1-d2}, d0 /* reorder */
vtbl.8 d2, {d2-d3}, d0 /* reorder */
@@ -154,10 +154,10 @@ ENTRY(memcpy)
.Lincongruent_dword:
#if 0
cmp r2, r5 /* enough in leftovers? */
- ble .Lincongruent_finish /* yes, finish it. */
+ bls .Lincongruent_finish /* yes, finish it. */
vld1.64 {d2}, [r1:64]! /* load a dword */
cmp r2, #8 /* can we write a full dword? */
- blt .Lincongruent_finish /* no, finish it. */
+ blo .Lincongruent_finish /* no, finish it. */
vtbl.8 d1, {d1-d2}, d0 /* reorder */
vst1.64 {d1}, [r3:64]! /* store a dword */
subs r2, r2, #8 /* have we written everything? */
@@ -165,7 +165,7 @@ ENTRY(memcpy)
b .Lincongruent_dword /* and go get it */
#else
cmp r2, r5 /* are the bytes we have enough? */
- ble .Lincongruent_finish /* yes, finish it. */
+ bls .Lincongruent_finish /* yes, finish it. */
mov ip, r2 /* get remaining count */
bic ip, ip, #7 /* truncate to a dword */
rsb ip, ip, #32 /* subtract from 32 */
@@ -196,7 +196,7 @@ ENTRY(memcpy)
.Lcongruent_main:
vld1.32 {d0}, [r1:64]! /* load next dword */
cmp r2, #8 /* compare current ptr against end */
- blt .Lfinish /* greater so write final dword */
+ blo .Lfinish /* greater so write final dword */
vst1.32 {d0}, [r3:64]! /* store dword */
subs r2, r2, #8 /* compare current ptr against end */
beq .Ldone /* equal? we're done! */
@@ -204,10 +204,10 @@ ENTRY(memcpy)
bne .Lcongruent_main /* no, write next word */
cmp r2, #64 /* can we write 4 dwords? */
- blt .Lcongruent_loop /* no, this dword by dword */
+ blo .Lcongruent_loop /* no, this dword by dword */
vldm r1!, {d0-d7} /* load next 7 dwords */
cmp r2, #128 /* can we write 16 dwords */
- blt 3f /* no, then deal with 8 dwords */
+ blo 3f /* no, then deal with 8 dwords */
/*
* The following writes two 64-byte interleaving stores and loads.
@@ -215,15 +215,15 @@ ENTRY(memcpy)
1: vldm r1!, {d8-d15} /* load next 8 dwords */
vstm r3!, {d0-d7} /* store 8 more dwords */
cmp r2, #192 /* can we write 16+8 dwords? */
- blt 2f /* no, don't load the next 8 dwords */
+ blo 2f /* no, don't load the next 8 dwords */
vldm r1!, {d0-d7} /* yes, load next 8 dwords */
2: vstm r3!, {d8-d15} /* store 8 more dwords */
sub r2, r2, #128 /* we just stored 16 (8+8) dwords */
beq .Ldone /* if 0, we're done! */
cmp r2, #128 /* can we write 16 dwords */
- bge 1b /* yes, do it again */
+ bhs 1b /* yes, do it again */
cmp r2, #64 /* have we loaded 8 dwords? */
- blt .Lcongruent_loop /* no, proceed to do it dword */
+ blo .Lcongruent_loop /* no, proceed to do it dword */
/*
* We now have 8 dwords we can write in d0-d7.
@@ -235,14 +235,14 @@ ENTRY(memcpy)
.Lcongruent_loop:
vld1.32 {d0}, [r1]! /* load dword from src */
cmp r2, #8 /* can we write a full dword? */
- blt .Lfinish /* no, write last partial dword */
+ blo .Lfinish /* no, write last partial dword */
.Lcongruent_loop_start:
vst1.32 {d0}, [r3]! /* store dword into dst */
subs r2, r2, #8 /* subtract it from length */
beq .Ldone /* if 0, we're done! */
vld1.32 {d0}, [r1]! /* load dword from src */
cmp r2, #8 /* can we write a full dword? */
- bge .Lcongruent_loop_start /* yes, so do it */
+ bhs .Lcongruent_loop_start /* yes, so do it */
.Lfinish:
vmov r4, r5, d0 /* get last dword from NEON */
Index: src/common/lib/libc/arch/arm/string/memset_naive.S
diff -u src/common/lib/libc/arch/arm/string/memset_naive.S:1.1 src/common/lib/libc/arch/arm/string/memset_naive.S:1.1.52.1
--- src/common/lib/libc/arch/arm/string/memset_naive.S:1.1 Tue Jan 8 20:15:00 2013
+++ src/common/lib/libc/arch/arm/string/memset_naive.S Sat Jul 20 14:52:04 2024
@@ -29,7 +29,7 @@
#include <machine/asm.h>
-RCSID("$NetBSD: memset_naive.S,v 1.1 2013/01/08 20:15:00 matt Exp $")
+RCSID("$NetBSD: memset_naive.S,v 1.1.52.1 2024/07/20 14:52:04 martin Exp $")
/*
* This isn't quite as simple/short as it could be but the truly trivial
@@ -40,7 +40,7 @@ ENTRY(memset)
/* LINTSTUB: void *memset(void *, int, size_t) */
mov ip, r0 /* need to preserve r0 */
cmp r2, #10 /* 10 bytes or less? */
- ble .Lbyte_by_byte /* yes, bytewise is faster */
+ bls .Lbyte_by_byte /* yes, bytewise is faster */
ands r3, r1, #0xff /* we are dealing with bytes */
orrne r3, r3, r3, lsl #8 /* move value into 2nd byte lane */
orrne r3, r3, r3, lsl #16 /* move value into all byte lanes */
@@ -60,9 +60,9 @@ ENTRY(memset)
*/
1: mov r2, r3 /* duplicate fill value */
2: subs r1, r1, #16 /* can we write 16 bytes? */
- stmgeia ip!, {r2,r3} /* yes, write the first 8 of them */
- stmgeia ip!, {r2,r3} /* yes, write the second 8 of them */
- bgt 2b /* more left to fill */
+ stmhsia ip!, {r2,r3} /* yes, write the first 8 of them */
+ stmhsia ip!, {r2,r3} /* yes, write the second 8 of them */
+ bhi 2b /* more left to fill */
RETc(eq) /* no, return */
/*
* Our count went negative but the bits below 16 haven't changed.
@@ -80,7 +80,7 @@ ENTRY(memset)
.Lbyte_by_byte:
subs r2, r2, #1 /* can we write a byte? */
- RETc(lt) /* no, return */
+ RETc(lo) /* no, return */
strb r3, [ip], #1 /* write a byte */
b .Lbyte_by_byte /* do next byte */
END(memset)
Index: src/common/lib/libc/arch/arm/string/memmove.S
diff -u src/common/lib/libc/arch/arm/string/memmove.S:1.10 src/common/lib/libc/arch/arm/string/memmove.S:1.10.26.1
--- src/common/lib/libc/arch/arm/string/memmove.S:1.10 Thu Apr 13 07:49:52 2017
+++ src/common/lib/libc/arch/arm/string/memmove.S Sat Jul 20 14:52:04 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: memmove.S,v 1.10 2017/04/13 07:49:52 skrll Exp $ */
+/* $NetBSD: memmove.S,v 1.10.26.1 2024/07/20 14:52:04 martin Exp $ */
/*-
* Copyright (c) 1997 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@ ENTRY(bcopy)
push {r0, lr} /* memmove() returns dest addr */
subs r2, r2, #4
- blt .Lmemmove_fl4 /* less than 4 bytes */
+ blo .Lmemmove_fl4 /* less than 4 bytes */
ands r12, r0, #3
bne .Lmemmove_fdestul /* oh unaligned destination addr */
ands r12, r1, #3
@@ -69,9 +69,9 @@ ENTRY(bcopy)
.Lmemmove_ft8:
/* We have aligned source and destination */
subs r2, r2, #8
- blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
+ blo .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
subs r2, r2, #0x14
- blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
+ blo .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
push {r4} /* borrow r4 */
/* blat 32 bytes at a time */
@@ -82,12 +82,12 @@ ENTRY(bcopy)
ldmia r1!, {r3, r4, r12, lr}
stmia r0!, {r3, r4, r12, lr}
subs r2, r2, #0x20
- bge .Lmemmove_floop32
+ bhs .Lmemmove_floop32
cmn r2, #0x10
- ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmiage r0!, {r3, r4, r12, lr}
- subge r2, r2, #0x10
+ ldmiahs r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
+ stmiahs r0!, {r3, r4, r12, lr}
+ subhs r2, r2, #0x10
pop {r4} /* return r4 */
.Lmemmove_fl32:
@@ -95,21 +95,21 @@ ENTRY(bcopy)
/* blat 12 bytes at a time */
.Lmemmove_floop12:
- ldmiage r1!, {r3, r12, lr}
- stmiage r0!, {r3, r12, lr}
- subsge r2, r2, #0x0c
- bge .Lmemmove_floop12
+ ldmiahs r1!, {r3, r12, lr}
+ stmiahs r0!, {r3, r12, lr}
+ subshs r2, r2, #0x0c
+ bhs .Lmemmove_floop12
.Lmemmove_fl12:
adds r2, r2, #8
- blt .Lmemmove_fl4
+ blo .Lmemmove_fl4
subs r2, r2, #4
- ldrlt r3, [r1], #4
- strlt r3, [r0], #4
- ldmiage r1!, {r3, r12}
- stmiage r0!, {r3, r12}
- subge r2, r2, #4
+ ldrlo r3, [r1], #4
+ strlo r3, [r0], #4
+ ldmiahs r1!, {r3, r12}
+ stmiahs r0!, {r3, r12}
+ subhs r2, r2, #4
.Lmemmove_fl4:
/* less than 4 bytes to go */
@@ -120,10 +120,10 @@ ENTRY(bcopy)
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0], #1
- ldrbge r3, [r1], #1
- strbge r3, [r0], #1
- ldrbgt r3, [r1], #1
- strbgt r3, [r0], #1
+ ldrbhs r3, [r1], #1
+ strbhs r3, [r0], #1
+ ldrbhi r3, [r1], #1
+ strbhi r3, [r0], #1
pop {r0, pc}
/* erg - unaligned destination */
@@ -134,12 +134,12 @@ ENTRY(bcopy)
/* align destination with byte copies */
ldrb r3, [r1], #1
strb r3, [r0], #1
- ldrbge r3, [r1], #1
- strbge r3, [r0], #1
- ldrbgt r3, [r1], #1
- strbgt r3, [r0], #1
+ ldrbhs r3, [r1], #1
+ strbhs r3, [r0], #1
+ ldrbhi r3, [r1], #1
+ strbhi r3, [r0], #1
subs r2, r2, r12
- blt .Lmemmove_fl4 /* less the 4 bytes */
+ blo .Lmemmove_fl4 /* less the 4 bytes */
ands r12, r1, #3
beq .Lmemmove_ft8 /* we have an aligned source */
@@ -150,10 +150,10 @@ ENTRY(bcopy)
bic r1, r1, #3
ldr lr, [r1], #4
cmp r12, #2
- bgt .Lmemmove_fsrcul3
+ bhi .Lmemmove_fsrcul3
beq .Lmemmove_fsrcul2
cmp r2, #0x0c
- blt .Lmemmove_fsrcul1loop4
+ blo .Lmemmove_fsrcul1loop4
sub r2, r2, #0x0c
push {r4, r5}
@@ -183,10 +183,10 @@ ENTRY(bcopy)
#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
- bge .Lmemmove_fsrcul1loop16
+ bhs .Lmemmove_fsrcul1loop16
pop {r4, r5}
adds r2, r2, #0x0c
- blt .Lmemmove_fsrcul1l4
+ blo .Lmemmove_fsrcul1l4
.Lmemmove_fsrcul1loop4:
#ifdef __ARMEB__
@@ -202,7 +202,7 @@ ENTRY(bcopy)
#endif
str r12, [r0], #4
subs r2, r2, #4
- bge .Lmemmove_fsrcul1loop4
+ bhs .Lmemmove_fsrcul1loop4
.Lmemmove_fsrcul1l4:
sub r1, r1, #3
@@ -210,7 +210,7 @@ ENTRY(bcopy)
.Lmemmove_fsrcul2:
cmp r2, #0x0c
- blt .Lmemmove_fsrcul2loop4
+ blo .Lmemmove_fsrcul2loop4
sub r2, r2, #0x0c
push {r4, r5}
@@ -240,10 +240,10 @@ ENTRY(bcopy)
#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
- bge .Lmemmove_fsrcul2loop16
+ bhs .Lmemmove_fsrcul2loop16
pop {r4, r5}
adds r2, r2, #0x0c
- blt .Lmemmove_fsrcul2l4
+ blo .Lmemmove_fsrcul2l4
.Lmemmove_fsrcul2loop4:
#ifdef __ARMEB__
@@ -259,7 +259,7 @@ ENTRY(bcopy)
#endif
str r12, [r0], #4
subs r2, r2, #4
- bge .Lmemmove_fsrcul2loop4
+ bhs .Lmemmove_fsrcul2loop4
.Lmemmove_fsrcul2l4:
sub r1, r1, #2
@@ -267,7 +267,7 @@ ENTRY(bcopy)
.Lmemmove_fsrcul3:
cmp r2, #0x0c
- blt .Lmemmove_fsrcul3loop4
+ blo .Lmemmove_fsrcul3loop4
sub r2, r2, #0x0c
push {r4, r5}
@@ -297,10 +297,10 @@ ENTRY(bcopy)
#endif
stmia r0!, {r3-r5, r12}
subs r2, r2, #0x10
- bge .Lmemmove_fsrcul3loop16
+ bhs .Lmemmove_fsrcul3loop16
pop {r4, r5}
adds r2, r2, #0x0c
- blt .Lmemmove_fsrcul3l4
+ blo .Lmemmove_fsrcul3l4
.Lmemmove_fsrcul3loop4:
#ifdef __ARMEB__
@@ -316,7 +316,7 @@ ENTRY(bcopy)
#endif
str r12, [r0], #4
subs r2, r2, #4
- bge .Lmemmove_fsrcul3loop4
+ bhs .Lmemmove_fsrcul3loop4
.Lmemmove_fsrcul3l4:
sub r1, r1, #1
@@ -326,7 +326,7 @@ ENTRY(bcopy)
add r1, r1, r2
add r0, r0, r2
subs r2, r2, #4
- blt .Lmemmove_bl4 /* less than 4 bytes */
+ blo .Lmemmove_bl4 /* less than 4 bytes */
ands r12, r0, #3
bne .Lmemmove_bdestul /* oh unaligned destination addr */
ands r12, r1, #3
@@ -335,10 +335,10 @@ ENTRY(bcopy)
.Lmemmove_bt8:
/* We have aligned source and destination */
subs r2, r2, #8
- blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
+ blo .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
push {r4, lr}
subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
- blt .Lmemmove_bl32
+ blo .Lmemmove_bl32
/* blat 32 bytes at a time */
/* XXX for really big copies perhaps we should use more registers */
@@ -348,28 +348,28 @@ ENTRY(bcopy)
ldmdb r1!, {r3, r4, r12, lr}
stmdb r0!, {r3, r4, r12, lr}
subs r2, r2, #0x20
- bge .Lmemmove_bloop32
+ bhs .Lmemmove_bloop32
.Lmemmove_bl32:
cmn r2, #0x10
- ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmdbge r0!, {r3, r4, r12, lr}
- subge r2, r2, #0x10
+ ldmdbhs r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
+ stmdbhs r0!, {r3, r4, r12, lr}
+ subhs r2, r2, #0x10
adds r2, r2, #0x14
- ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
- stmdbge r0!, {r3, r12, lr}
- subge r2, r2, #0x0c
+ ldmdbhs r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
+ stmdbhs r0!, {r3, r12, lr}
+ subhs r2, r2, #0x0c
pop {r4, lr}
.Lmemmove_bl12:
adds r2, r2, #8
- blt .Lmemmove_bl4
+ blo .Lmemmove_bl4
subs r2, r2, #4
- ldrlt r3, [r1, #-4]!
- strlt r3, [r0, #-4]!
- ldmdbge r1!, {r3, r12}
- stmdbge r0!, {r3, r12}
- subge r2, r2, #4
+ ldrlo r3, [r1, #-4]!
+ strlo r3, [r0, #-4]!
+ ldmdbhs r1!, {r3, r12}
+ stmdbhs r0!, {r3, r12}
+ subhs r2, r2, #4
.Lmemmove_bl4:
/* less than 4 bytes to go */
@@ -380,10 +380,10 @@ ENTRY(bcopy)
cmp r2, #2
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
- ldrbge r3, [r1, #-1]!
- strbge r3, [r0, #-1]!
- ldrbgt r3, [r1, #-1]!
- strbgt r3, [r0, #-1]!
+ ldrbhs r3, [r1, #-1]!
+ strbhs r3, [r0, #-1]!
+ ldrbhi r3, [r1, #-1]!
+ strbhi r3, [r0, #-1]!
RET
/* erg - unaligned destination */
@@ -393,12 +393,12 @@ ENTRY(bcopy)
/* align destination with byte copies */
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
- ldrbge r3, [r1, #-1]!
- strbge r3, [r0, #-1]!
- ldrbgt r3, [r1, #-1]!
- strbgt r3, [r0, #-1]!
+ ldrbhs r3, [r1, #-1]!
+ strbhs r3, [r0, #-1]!
+ ldrbhi r3, [r1, #-1]!
+ strbhi r3, [r0, #-1]!
subs r2, r2, r12
- blt .Lmemmove_bl4 /* less than 4 bytes to go */
+ blo .Lmemmove_bl4 /* less than 4 bytes to go */
ands r12, r1, #3
beq .Lmemmove_bt8 /* we have an aligned source */
@@ -408,10 +408,10 @@ ENTRY(bcopy)
bic r1, r1, #3
ldr r3, [r1, #0]
cmp r12, #2
- blt .Lmemmove_bsrcul1
+ blo .Lmemmove_bsrcul1
beq .Lmemmove_bsrcul2
cmp r2, #0x0c
- blt .Lmemmove_bsrcul3loop4
+ blo .Lmemmove_bsrcul3loop4
sub r2, r2, #0x0c
push {r4, r5, lr}
@@ -441,10 +441,10 @@ ENTRY(bcopy)
#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
- bge .Lmemmove_bsrcul3loop16
+ bhs .Lmemmove_bsrcul3loop16
pop {r4, r5, lr}
adds r2, r2, #0x0c
- blt .Lmemmove_bsrcul3l4
+ blo .Lmemmove_bsrcul3l4
.Lmemmove_bsrcul3loop4:
#ifdef __ARMEB__
@@ -460,7 +460,7 @@ ENTRY(bcopy)
#endif
str r12, [r0, #-4]!
subs r2, r2, #4
- bge .Lmemmove_bsrcul3loop4
+ bhs .Lmemmove_bsrcul3loop4
.Lmemmove_bsrcul3l4:
add r1, r1, #3
@@ -468,7 +468,7 @@ ENTRY(bcopy)
.Lmemmove_bsrcul2:
cmp r2, #0x0c
- blt .Lmemmove_bsrcul2loop4
+ blo .Lmemmove_bsrcul2loop4
sub r2, r2, #0x0c
push {r4, r5, lr}
@@ -498,10 +498,10 @@ ENTRY(bcopy)
#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
- bge .Lmemmove_bsrcul2loop16
+ bhs .Lmemmove_bsrcul2loop16
pop {r4, r5, lr}
adds r2, r2, #0x0c
- blt .Lmemmove_bsrcul2l4
+ blo .Lmemmove_bsrcul2l4
.Lmemmove_bsrcul2loop4:
#ifdef __ARMEB__
@@ -517,7 +517,7 @@ ENTRY(bcopy)
#endif
str r12, [r0, #-4]!
subs r2, r2, #4
- bge .Lmemmove_bsrcul2loop4
+ bhs .Lmemmove_bsrcul2loop4
.Lmemmove_bsrcul2l4:
add r1, r1, #2
@@ -525,7 +525,7 @@ ENTRY(bcopy)
.Lmemmove_bsrcul1:
cmp r2, #0x0c
- blt .Lmemmove_bsrcul1loop4
+ blo .Lmemmove_bsrcul1loop4
sub r2, r2, #0x0c
push {r4, r5, lr}
@@ -555,10 +555,10 @@ ENTRY(bcopy)
#endif
stmdb r0!, {r4, r5, r12, lr}
subs r2, r2, #0x10
- bge .Lmemmove_bsrcul1loop32
+ bhs .Lmemmove_bsrcul1loop32
pop {r4, r5, lr}
adds r2, r2, #0x0c
- blt .Lmemmove_bsrcul1l4
+ blo .Lmemmove_bsrcul1l4
.Lmemmove_bsrcul1loop4:
#ifdef __ARMEB__
@@ -574,7 +574,7 @@ ENTRY(bcopy)
#endif
str r12, [r0, #-4]!
subs r2, r2, #4
- bge .Lmemmove_bsrcul1loop4
+ bhs .Lmemmove_bsrcul1loop4
.Lmemmove_bsrcul1l4:
add r1, r1, #1
Index: src/common/lib/libc/arch/arm/string/memset.S
diff -u src/common/lib/libc/arch/arm/string/memset.S:1.8 src/common/lib/libc/arch/arm/string/memset.S:1.8.30.1
--- src/common/lib/libc/arch/arm/string/memset.S:1.8 Thu Mar 26 13:34:51 2015
+++ src/common/lib/libc/arch/arm/string/memset.S Sat Jul 20 14:52:04 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: memset.S,v 1.8 2015/03/26 13:34:51 justin Exp $ */
+/* $NetBSD: memset.S,v 1.8.30.1 2024/07/20 14:52:04 martin Exp $ */
/*
* Copyright 2003 Wasabi Systems, Inc.
@@ -110,7 +110,7 @@ ENTRY(memset)
#endif
cmp r1, #0x04 /* Do we have less than 4 bytes */
mov ip, r0
- blt .Lmemset_lessthanfour
+ blo .Lmemset_lessthanfour
/* Ok first we will word align the address */
ands r2, ip, #0x03 /* Get the bottom two bits */
@@ -134,50 +134,50 @@ ENTRY(memset)
strne r3, [ip], #0x04
cmp r1, #0x10
#endif
- blt .Lmemset_loop4 /* If less than 16 then use words */
+ blo .Lmemset_loop4 /* If less than 16 then use words */
mov r2, r3 /* Duplicate data */
cmp r1, #0x80 /* If < 128 then skip the big loop */
- blt .Lmemset_loop32
+ blo .Lmemset_loop32
/* Do 128 bytes at a time */
.Lmemset_loop128:
subs r1, r1, #0x80
#ifdef _ARM_ARCH_DWORD_OK
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
#else
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
#endif
- bgt .Lmemset_loop128
+ bhi .Lmemset_loop128
RETc(eq) /* Zero length so just exit */
add r1, r1, #0x80 /* Adjust for extra sub */
@@ -186,38 +186,38 @@ ENTRY(memset)
.Lmemset_loop32:
subs r1, r1, #0x20
#ifdef _ARM_ARCH_DWORD_OK
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
#else
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
#endif
- bgt .Lmemset_loop32
+ bhi .Lmemset_loop32
RETc(eq) /* Zero length so just exit */
adds r1, r1, #0x10 /* Partially adjust for extra sub */
/* Deal with 16 bytes or more */
#ifdef _ARM_ARCH_DWORD_OK
- strdge r2, r3, [ip], #0x08
- strdge r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
+ strdhs r2, r3, [ip], #0x08
#else
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
+ stmiahs ip!, {r2-r3}
#endif
RETc(eq) /* Zero length so just exit */
- addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
+ addlo r1, r1, #0x10 /* Possibly adjust for extra sub */
/* We have at least 4 bytes so copy as words */
.Lmemset_loop4:
subs r1, r1, #0x04
- strge r3, [ip], #0x04
- bgt .Lmemset_loop4
+ strhs r3, [ip], #0x04
+ bhi .Lmemset_loop4
RETc(eq) /* Zero length so just exit */
#ifdef _ARM_ARCH_DWORD_OK
@@ -230,27 +230,27 @@ ENTRY(memset)
#endif
strb r3, [ip], #0x01 /* Set 1 byte */
- strbge r3, [ip], #0x01 /* Set another byte */
- strbgt r3, [ip] /* and a third */
+ strbhs r3, [ip], #0x01 /* Set another byte */
+ strbhi r3, [ip] /* and a third */
RET /* Exit */
.Lmemset_wordunaligned:
rsb r2, r2, #0x004
strb r3, [ip], #0x01 /* Set 1 byte */
cmp r2, #0x02
- strbge r3, [ip], #0x01 /* Set another byte */
+ strbhs r3, [ip], #0x01 /* Set another byte */
sub r1, r1, r2
- strbgt r3, [ip], #0x01 /* and a third */
+ strbhi r3, [ip], #0x01 /* and a third */
cmp r1, #0x04 /* More than 4 bytes left? */
- bge .Lmemset_wordaligned /* Yup */
+ bhs .Lmemset_wordaligned /* Yup */
.Lmemset_lessthanfour:
cmp r1, #0x00
RETc(eq) /* Zero length so exit */
strb r3, [ip], #0x01 /* Set 1 byte */
cmp r1, #0x02
- strbge r3, [ip], #0x01 /* Set another byte */
- strbgt r3, [ip] /* and a third */
+ strbhs r3, [ip], #0x01 /* Set another byte */
+ strbhi r3, [ip] /* and a third */
RET /* Exit */
#ifdef _BZERO
END(bzero)
Index: src/common/lib/libc/arch/arm/string/strlen_neon.S
diff -u src/common/lib/libc/arch/arm/string/strlen_neon.S:1.3 src/common/lib/libc/arch/arm/string/strlen_neon.S:1.3.52.1
--- src/common/lib/libc/arch/arm/string/strlen_neon.S:1.3 Fri Dec 28 05:15:08 2012
+++ src/common/lib/libc/arch/arm/string/strlen_neon.S Sat Jul 20 14:52:04 2024
@@ -29,7 +29,7 @@
#include <machine/asm.h>
-RCSID("$NetBSD: strlen_neon.S,v 1.3 2012/12/28 05:15:08 matt Exp $")
+RCSID("$NetBSD: strlen_neon.S,v 1.3.52.1 2024/07/20 14:52:04 martin Exp $")
.text
ENTRY(strlen)
@@ -47,10 +47,10 @@ ENTRY(strlen)
bic ip, ip, #15 /* qword align string address */
lsl r1, r1, #3 /* convert to bits */
cmp r1, #64
- rsbgt r1, r1, #128 /* > 64? BE so we are shifting LSW right */
- movgt r2, #0 /* > 64? leave MSW alone */
- rsble r2, r1, #64 /* <=64? BE so we are shifting MSW right */
- movle r1, #64 /* <=64? clear LSW */
+ rsbhi r1, r1, #128 /* > 64? BE so we are shifting LSW right */
+ movhi r2, #0 /* > 64? leave MSW alone */
+ rsbls r2, r1, #64 /* <=64? BE so we are shifting MSW right */
+ movls r1, #64 /* <=64? clear LSW */
vmov d0, r1, r2 /* set shifts for lower and upper halves */
vmovl.u32 q0, d0 /* 2 U32 -> 2 U64 */
vshl.u64 q2, q2, q0 /* shift */