On Wed, 14 Mar 2018, Peter Zijlstra wrote:
> On Tue, Mar 13, 2018 at 01:59:24PM -0700, Thomas Garnier wrote: > > @@ -1576,7 +1578,9 @@ first_nmi: > > addq $8, (%rsp) /* Fix up RSP */ > > pushfq /* RFLAGS */ > > pushq $__KERNEL_CS /* CS */ > > - pushq $1f /* RIP */ > > + pushq %rax /* Support Position Independent Code */ > > + leaq 1f(%rip), %rax /* RIP */ > > + xchgq %rax, (%rsp) /* Restore RAX, put 1f */ > > iretq /* continues at repeat_nmi below */ > > UNWIND_HINT_IRET_REGS > > 1: > > Urgh, xchg with a memop has an implicit LOCK prefix. this_cpu_xchg uses no lock cmpxchg as a replacement to reduce latency. From linux/arch/x86/include/asm/percpu.h /* * xchg is implemented using cmpxchg without a lock prefix. xchg is * expensive due to the implied lock prefix. The processor cannot prefetch * cachelines if xchg is used. */ #define percpu_xchg_op(var, nval) \ ({ \ typeof(var) pxo_ret__; \ typeof(var) pxo_new__ = (nval); \ switch (sizeof(var)) { \ case 1: \ asm("\n\tmov "__percpu_arg(1)",%%al" \ "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ : "q" (pxo_new__) \ : "memory"); \ break; \ case 2: \ asm("\n\tmov "__percpu_arg(1)",%%ax" \ "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ case 4: \ asm("\n\tmov "__percpu_arg(1)",%%eax" \ "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ case 8: \ asm("\n\tmov "__percpu_arg(1)",%%rax" \ "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ default: __bad_percpu_size(); \ } \ pxo_ret__; \ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel