Module Name: src Committed By: riastradh Date: Sun Nov 14 16:56:32 UTC 2021
Modified Files: src/sys/arch/aarch64/aarch64: locore.S src/sys/arch/arm/arm: armv6_start.S cpu_subr.c Log Message: arm: Fix CPU startup synchronization. - Use load-acquire instead of (wrong) membar_consumer then load in cpu_boot_secondary_processors and cpu_hatched_p. => (Could use load then membar_consumer instead but load-acquire is shorter.) - Issue dmb ish before setting or clearing the bit in cpu_set_hatched and cpu_clr_mbox to effect a store-release. => (Could use membar_exit, which is semantically weaker than dmb ish but on arm is just implemented as dmb ish.) => (Could use stlr except we don't have atomic_ops(9) to do that.) This way, everything before cpu_set_hatched or cpu_clr_mbox is guaranteed to happen before everything after cpu_boot_secondary_processors, which was previously not guaranteed. To generate a diff of this commit: cvs rdiff -u -r1.82 -r1.83 src/sys/arch/aarch64/aarch64/locore.S cvs rdiff -u -r1.36 -r1.37 src/sys/arch/arm/arm/armv6_start.S cvs rdiff -u -r1.4 -r1.5 src/sys/arch/arm/arm/cpu_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/aarch64/locore.S diff -u src/sys/arch/aarch64/aarch64/locore.S:1.82 src/sys/arch/aarch64/aarch64/locore.S:1.83 --- src/sys/arch/aarch64/aarch64/locore.S:1.82 Sun Oct 31 16:23:47 2021 +++ src/sys/arch/aarch64/aarch64/locore.S Sun Nov 14 16:56:32 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.82 2021/10/31 16:23:47 skrll Exp $ */ +/* $NetBSD: locore.S,v 1.83 2021/11/14 16:56:32 riastradh Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -38,7 +38,7 @@ #include <aarch64/hypervisor.h> #include "assym.h" -RCSID("$NetBSD: locore.S,v 1.82 2021/10/31 16:23:47 skrll Exp $") +RCSID("$NetBSD: locore.S,v 1.83 2021/11/14 16:56:32 riastradh Exp $") #ifdef AARCH64_DEVICE_MEM_STRONGLY_ORDERED #define MAIR_DEVICE_MEM MAIR_DEVICE_nGnRnE @@ -520,8 +520,7 @@ mp_vstart: /* wait for the mailbox start bit to become true */ 1: - dmb sy - ldr x20, [x28] + ldar x20, [x28] /* matches cpu_boot_secondary_processors */ tst x20, x29 bne 9f wfe Index: src/sys/arch/arm/arm/armv6_start.S diff -u src/sys/arch/arm/arm/armv6_start.S:1.36 src/sys/arch/arm/arm/armv6_start.S:1.37 --- src/sys/arch/arm/arm/armv6_start.S:1.36 Sun Sep 12 07:14:50 2021 +++ src/sys/arch/arm/arm/armv6_start.S Sun Nov 14 16:56:32 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: armv6_start.S,v 1.36 2021/09/12 07:14:50 skrll Exp $ */ +/* $NetBSD: armv6_start.S,v 1.37 2021/11/14 16:56:32 riastradh Exp $ */ /*- * Copyright (c) 2012, 2017, 2018 The NetBSD Foundation, Inc. @@ -924,8 +924,8 @@ armv7_mpcontinuation: lsl r5, R_INDEX // ... for our cpu /* wait for the mailbox start bit to become true */ -1: dmb // data memory barrier - ldr r2, [r6] // load mbox +1: ldr r2, [r6] // load mbox + dmb // make it a load-acquire tst r2, r5 // is our bit set? wfeeq // no, back to waiting beq 1b // no, and try again Index: src/sys/arch/arm/arm/cpu_subr.c diff -u src/sys/arch/arm/arm/cpu_subr.c:1.4 src/sys/arch/arm/arm/cpu_subr.c:1.5 --- src/sys/arch/arm/arm/cpu_subr.c:1.4 Sun Oct 31 16:23:47 2021 +++ src/sys/arch/arm/arm/cpu_subr.c Sun Nov 14 16:56:32 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_subr.c,v 1.4 2021/10/31 16:23:47 skrll Exp $ */ +/* $NetBSD: cpu_subr.c,v 1.5 2021/11/14 16:56:32 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -33,7 +33,7 @@ #include "opt_multiprocessor.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.4 2021/10/31 16:23:47 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.5 2021/11/14 16:56:32 riastradh Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -81,6 +81,7 @@ cpu_boot_secondary_processors(void) VPRINTF("%s: starting secondary processors\n", __func__); /* send mbox to have secondary processors do cpu_hatch() */ + dmb(ish); /* store-release matches locore.S/armv6_start.S */ for (size_t n = 0; n < __arraycount(arm_cpu_mbox); n++) atomic_or_ulong(&arm_cpu_mbox[n], arm_cpu_hatched[n]); @@ -95,7 +96,8 @@ cpu_boot_secondary_processors(void) const size_t off = cpuno / CPUINDEX_DIVISOR; const u_long bit = __BIT(cpuno % CPUINDEX_DIVISOR); - while (membar_consumer(), arm_cpu_mbox[off] & bit) { + /* load-acquire matches cpu_clr_mbox */ + while (atomic_load_acquire(&arm_cpu_mbox[off]) & bit) { __asm __volatile ("wfe"); } /* Add processor to kcpuset */ @@ -111,8 +113,8 @@ cpu_hatched_p(u_int cpuindex) const u_int off = cpuindex / CPUINDEX_DIVISOR; const u_int bit = cpuindex % CPUINDEX_DIVISOR; - membar_consumer(); - return (arm_cpu_hatched[off] & __BIT(bit)) != 0; + /* load-acquire matches cpu_set_hatched */ + return (atomic_load_acquire(&arm_cpu_hatched[off]) & __BIT(bit)) != 0; } void @@ -122,6 +124,7 @@ cpu_set_hatched(int cpuindex) const size_t off = cpuindex / CPUINDEX_DIVISOR; const u_long bit = __BIT(cpuindex % CPUINDEX_DIVISOR); + dmb(ish); /* store-release matches cpu_hatched_p */ atomic_or_ulong(&arm_cpu_hatched[off], bit); dsb(ishst); sev(); @@ -135,6 +138,7 @@ cpu_clr_mbox(int cpuindex) const u_long bit = __BIT(cpuindex % CPUINDEX_DIVISOR); /* Notify cpu_boot_secondary_processors that we're done */ + dmb(ish); /* store-release */ atomic_and_ulong(&arm_cpu_mbox[off], ~bit); dsb(ishst); sev();