Module Name: src Committed By: snj Date: Wed May 13 00:35:16 UTC 2009
Modified Files: src/sys/arch/amd64/amd64 [netbsd-5]: lock_stubs.S src/sys/arch/i386/i386 [netbsd-5]: lock_stubs.S src/sys/arch/x86/x86 [netbsd-5]: patch.c Log Message: Pull up following revision(s) (requested by ad in ticket #725): sys/arch/amd64/amd64/lock_stubs.S: revision 1.22 sys/arch/i386/i386/lock_stubs.S: revision 1.23 sys/arch/x86/x86/patch.c: revision 1.18 A workaround for a bug with some Opteron revisions where locked operations sometimes do not serve as memory barriers, allowing memory references to bleed outside of critical sections. It is possible that this is the reason for pkgbuild's longstanding crashiness. This is not complete (atomic ops need some work too). To generate a diff of this commit: cvs rdiff -u -r1.20.6.1 -r1.20.6.2 src/sys/arch/amd64/amd64/lock_stubs.S cvs rdiff -u -r1.21.6.1 -r1.21.6.2 src/sys/arch/i386/i386/lock_stubs.S cvs rdiff -u -r1.14.4.3 -r1.14.4.4 src/sys/arch/x86/x86/patch.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/lock_stubs.S diff -u src/sys/arch/amd64/amd64/lock_stubs.S:1.20.6.1 src/sys/arch/amd64/amd64/lock_stubs.S:1.20.6.2 --- src/sys/arch/amd64/amd64/lock_stubs.S:1.20.6.1 Mon Feb 2 03:01:12 2009 +++ src/sys/arch/amd64/amd64/lock_stubs.S Wed May 13 00:35:16 2009 @@ -1,7 +1,7 @@ -/* $NetBSD: lock_stubs.S,v 1.20.6.1 2009/02/02 03:01:12 snj Exp $ */ +/* $NetBSD: lock_stubs.S,v 1.20.6.2 2009/05/13 00:35:16 snj Exp $ */ /*- - * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -48,6 +48,7 @@ #define ENDLABEL(name,a) .align a; LABEL(name) #define LOCK(num) .Lpatch/**/num: lock +#define RET(num) .Lret/**/num: ret; nop; nop; ret #ifndef LOCKDEBUG @@ -64,7 +65,7 @@ LOCK(1) cmpxchgq %rcx, (%rdi) jnz 1f - ret + RET(1) 1: jmp _C_LABEL(mutex_vector_enter) @@ -106,7 +107,7 @@ testb %al, %al jnz 1f #endif - ret + RET(2) 1: jmp _C_LABEL(mutex_spin_retry) /* failed; hard case */ @@ -186,7 +187,7 @@ LOCK(2) cmpxchgq %rdx, (%rdi) jnz 1f - ret + RET(3) 1: jmp 0b @@ -199,7 +200,7 @@ LOCK(3) cmpxchgq %rcx, (%rdi) jnz 3f - ret + RET(4) 3: jmp _C_LABEL(rw_vector_enter) @@ -256,13 +257,13 @@ movq (%rdi), %rax 0: testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 3f + jnz 4f leaq RW_READ_INCR(%rax), %rdx LOCK(8) cmpxchgq %rdx, (%rdi) jnz 1f movl %edx, %eax /* nonzero */ - ret + RET(5) 1: jmp 0b @@ -276,10 +277,12 @@ cmpxchgq %rcx, (%rdi) movl $0, %eax setz %al +3: + RET(6) ret - -3: xorl %eax, %eax - ret +4: + xorl %eax, %eax + jmp 3b #endif /* LOCKDEBUG */ @@ -296,7 +299,7 @@ LOCK(6) cmpxchgb %ah, (%rdi) jnz 2f - ret + RET(7) 2: movl $0x0100, %eax pause @@ -315,8 +318,8 @@ LOCK(7) cmpxchgb %ah, (%rdi) movl $0, %eax - setz %al - ret + setz %al + RET(8) /* * Patchpoints to replace with NOP when ncpu == 1. @@ -328,3 +331,10 @@ .quad .Lpatch9 .quad 0 #endif + +LABEL(x86_retpatch) +#ifndef LOCKDEBUG + .long .Lret1, .Lret2, .Lret3, .Lret4, .Lret5, .Lret6 +#endif + .long .Lret7, .Lret8 + .long 0 Index: src/sys/arch/i386/i386/lock_stubs.S diff -u src/sys/arch/i386/i386/lock_stubs.S:1.21.6.1 src/sys/arch/i386/i386/lock_stubs.S:1.21.6.2 --- src/sys/arch/i386/i386/lock_stubs.S:1.21.6.1 Mon Feb 2 03:01:12 2009 +++ src/sys/arch/i386/i386/lock_stubs.S Wed May 13 00:35:16 2009 @@ -1,7 +1,7 @@ -/* $NetBSD: lock_stubs.S,v 1.21.6.1 2009/02/02 03:01:12 snj Exp $ */ +/* $NetBSD: lock_stubs.S,v 1.21.6.2 2009/05/13 00:35:16 snj Exp $ */ /*- - * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -35,7 +35,7 @@ */ #include <machine/asm.h> -__KERNEL_RCSID(0, "$NetBSD: lock_stubs.S,v 1.21.6.1 2009/02/02 03:01:12 snj Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lock_stubs.S,v 1.21.6.2 2009/05/13 00:35:16 snj Exp $"); #include "opt_lockdebug.h" @@ -46,6 +46,7 @@ #define ALIGN64 .align 64 #define ALIGN32 .align 32 #define LOCK(num) .Lpatch/**/num: lock +#define RET(num) .Lret/**/num: ret; nop; nop; ret #define ENDLABEL(name,a) .align a; LABEL(name) #if !defined(LOCKDEBUG) @@ -59,12 +60,12 @@ ENTRY(mutex_enter) movl 4(%esp), %edx - movl CPUVAR(CURLWP), %ecx xorl %eax, %eax + movl %fs:CPU_INFO_CURLWP(%eax), %ecx LOCK(1) cmpxchgl %ecx, (%edx) jnz 1f - ret + RET(1) 1: jmp _C_LABEL(mutex_vector_enter) END(mutex_enter) @@ -80,8 +81,8 @@ */ ENTRY(mutex_exit) movl 4(%esp), %edx - movl CPUVAR(CURLWP), %eax xorl %ecx, %ecx + movl %fs:CPU_INFO_CURLWP(%ecx), %eax cmpxchgl %ecx, (%edx) jnz 1f ret @@ -110,20 +111,20 @@ LOCK(2) cmpxchgl %ecx, (%edx) jnz 1f - ret + RET(2) 1: jmp 0b /* * Writer */ -2: movl CPUVAR(CURLWP), %ecx - xorl %eax, %eax +2: xorl %eax, %eax + movl %fs:CPU_INFO_CURLWP(%eax), %ecx orl $RW_WRITE_LOCKED, %ecx LOCK(3) cmpxchgl %ecx, (%edx) jnz 3f - ret + RET(3) 3: jmp _C_LABEL(rw_vector_enter) END(rw_enter) @@ -187,13 +188,13 @@ movl (%edx), %eax 0: testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 3f + jnz 4f leal RW_READ_INCR(%eax), %ecx LOCK(12) cmpxchgl %ecx, (%edx) jnz 1f movl %edx, %eax /* nonzero */ - ret + RET(4) 1: jmp 0b @@ -201,17 +202,18 @@ * Writer */ 2: - movl CPUVAR(CURLWP), %ecx - orl $RW_WRITE_LOCKED, %ecx xorl %eax, %eax + movl %fs:CPU_INFO_CURLWP(%eax), %ecx + orl $RW_WRITE_LOCKED, %ecx LOCK(13) cmpxchgl %ecx, (%edx) movl $0, %eax setz %al - ret 3: + RET(5) +4: xorl %eax, %eax - ret + jmp 3b END(rw_tryenter) #ifndef __XEN__ @@ -237,7 +239,7 @@ xchgb %al, MTX_LOCK(%edx) /* lock it */ testb %al, %al jnz 3f - ret + RET(6) 3: jmp _C_LABEL(mutex_spin_retry) @@ -326,7 +328,7 @@ LOCK(6) cmpxchgb %ah, (%edx) jnz 2f - ret + RET(7) 2: movl $0x0100, %eax pause @@ -350,7 +352,7 @@ cmpxchgb %ah, (%edx) movl $0, %eax setz %al - ret + RET(8) END(__cpu_simple_lock_try) /* @@ -365,3 +367,13 @@ .long .Lpatch13 .long 0 #endif + +LABEL(x86_retpatch) +#ifndef LOCKDEBUG + .long .Lret1, .Lret2, .Lret3, .Lret4, .Lret5 +#ifndef __XEN__ + .long .Lret6 +#endif +#endif + .long .Lret7, .Lret8 + .long 0 Index: src/sys/arch/x86/x86/patch.c diff -u src/sys/arch/x86/x86/patch.c:1.14.4.3 src/sys/arch/x86/x86/patch.c:1.14.4.4 --- src/sys/arch/x86/x86/patch.c:1.14.4.3 Fri Apr 3 17:42:36 2009 +++ src/sys/arch/x86/x86/patch.c Wed May 13 00:35:16 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: patch.c,v 1.14.4.3 2009/04/03 17:42:36 snj Exp $ */ +/* $NetBSD: patch.c,v 1.14.4.4 2009/05/13 00:35:16 snj Exp $ */ /*- * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.14.4.3 2009/04/03 17:42:36 snj Exp $"); +__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.14.4.4 2009/05/13 00:35:16 snj Exp $"); #include "opt_lockdebug.h" @@ -74,6 +74,7 @@ void _atomic_cas_cx8_end(void); extern void *x86_lockpatch[]; +extern void *x86_retpatch[]; extern void *atomic_lockpatch[]; #define X86_NOP 0x90 @@ -124,12 +125,14 @@ } static inline void __unused -patchbytes(void *addr, const int byte1, const int byte2) +patchbytes(void *addr, const int byte1, const int byte2, const int byte3) { ((uint8_t *)addr)[0] = (uint8_t)byte1; if (byte2 != -1) ((uint8_t *)addr)[1] = (uint8_t)byte2; + if (byte3 != -1) + ((uint8_t *)addr)[2] = (uint8_t)byte3; } void @@ -138,6 +141,7 @@ static bool first, second; u_long psl; u_long cr0; + int i; if (early) { if (first) @@ -160,13 +164,11 @@ #if !defined(GPROF) if (!early && ncpu == 1) { #ifndef LOCKDEBUG - int i; - /* Uniprocessor: kill LOCK prefixes. */ for (i = 0; x86_lockpatch[i] != 0; i++) - patchbytes(x86_lockpatch[i], X86_NOP, -1); + patchbytes(x86_lockpatch[i], X86_NOP, -1, -1); for (i = 0; atomic_lockpatch[i] != 0; i++) - patchbytes(atomic_lockpatch[i], X86_NOP, -1); + patchbytes(atomic_lockpatch[i], X86_NOP, -1, -1); #endif /* !LOCKDEBUG */ } if (!early && (cpu_feature & CPUID_SSE2) != 0) { @@ -214,6 +216,21 @@ #endif /* !LOCKDEBUG */ } + /* + * On some Opteron revisions, locked operations erroneously + * allow memory references to be `bled' outside of critical + * sections. Apply workaround. + */ + if (cpu_vendor == CPUVENDOR_AMD && + (CPUID2FAMILY(cpu_info_primary.ci_signature) == 0xe || + (CPUID2FAMILY(cpu_info_primary.ci_signature) == 0xf && + CPUID2EXTMODEL(cpu_info_primary.ci_signature) < 0x4))) { + for (i = 0; x86_retpatch[i] != 0; i++) { + /* ret,nop,nop,ret -> lfence,ret */ + patchbytes(x86_retpatch[i], 0x0f, 0xae, 0xe8); + } + } + /* Write back and invalidate cache, flush pipelines. */ wbinvd(); x86_flush();