On x86, the xchg operation between reg and mem has an implicit lock prefix, i.e. it is a relatively expensive atomic operation. This is not needed here.

--- a/sys/arch/i386/i386/locore.s
+++ b/sys/arch/i386/i386/locore.s
@@ -802,8 +802,9 @@ ENTRY(bcopy)
  */
 ENTRY(memcpy)
        movl    4(%esp),%ecx
-       xchg    8(%esp),%ecx
-       movl    %ecx,4(%esp)
+       movl    8(%esp),%eax
+       movl    %ecx,8(%esp)
+       movl    %eax,4(%esp)
        jmp     _C_LABEL(bcopy)

Reply via email to