Module Name:    src
Committed By:   riastradh
Date:           Sun Nov 14 16:56:32 UTC 2021

Modified Files:
        src/sys/arch/aarch64/aarch64: locore.S
        src/sys/arch/arm/arm: armv6_start.S cpu_subr.c

Log Message:
arm: Fix CPU startup synchronization.

- Use load-acquire instead of (wrong) membar_consumer then load in
  cpu_boot_secondary_processors and cpu_hatched_p.

  => (Could use load then membar_consumer instead but load-acquire is
     shorter.)

- Issue  dmb ish  before setting or clearing the bit in
  cpu_set_hatched and cpu_clr_mbox to effect a store-release.

  => (Could use membar_exit, which is semantically weaker than  dmb ish
     but on arm is just implemented as  dmb ish.)

  => (Could use stlr except we don't have atomic_ops(9) to do that.)

This way, everything before cpu_set_hatched or cpu_clr_mbox is
guaranteed to happen before everything after
cpu_boot_secondary_processors, which was previously not guaranteed.


To generate a diff of this commit:
cvs rdiff -u -r1.82 -r1.83 src/sys/arch/aarch64/aarch64/locore.S
cvs rdiff -u -r1.36 -r1.37 src/sys/arch/arm/arm/armv6_start.S
cvs rdiff -u -r1.4 -r1.5 src/sys/arch/arm/arm/cpu_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/locore.S
diff -u src/sys/arch/aarch64/aarch64/locore.S:1.82 src/sys/arch/aarch64/aarch64/locore.S:1.83
--- src/sys/arch/aarch64/aarch64/locore.S:1.82	Sun Oct 31 16:23:47 2021
+++ src/sys/arch/aarch64/aarch64/locore.S	Sun Nov 14 16:56:32 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.82 2021/10/31 16:23:47 skrll Exp $	*/
+/*	$NetBSD: locore.S,v 1.83 2021/11/14 16:56:32 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -38,7 +38,7 @@
 #include <aarch64/hypervisor.h>
 #include "assym.h"
 
-RCSID("$NetBSD: locore.S,v 1.82 2021/10/31 16:23:47 skrll Exp $")
+RCSID("$NetBSD: locore.S,v 1.83 2021/11/14 16:56:32 riastradh Exp $")
 
 #ifdef AARCH64_DEVICE_MEM_STRONGLY_ORDERED
 #define	MAIR_DEVICE_MEM		MAIR_DEVICE_nGnRnE
@@ -520,8 +520,7 @@ mp_vstart:
 
 	/* wait for the mailbox start bit to become true */
 1:
-	dmb	sy
-	ldr	x20, [x28]
+	ldar	x20, [x28]	/* matches cpu_boot_secondary_processors */
 	tst	x20, x29
 	bne	9f
 	wfe

Index: src/sys/arch/arm/arm/armv6_start.S
diff -u src/sys/arch/arm/arm/armv6_start.S:1.36 src/sys/arch/arm/arm/armv6_start.S:1.37
--- src/sys/arch/arm/arm/armv6_start.S:1.36	Sun Sep 12 07:14:50 2021
+++ src/sys/arch/arm/arm/armv6_start.S	Sun Nov 14 16:56:32 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: armv6_start.S,v 1.36 2021/09/12 07:14:50 skrll Exp $	*/
+/*	$NetBSD: armv6_start.S,v 1.37 2021/11/14 16:56:32 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2012, 2017, 2018 The NetBSD Foundation, Inc.
@@ -924,8 +924,8 @@ armv7_mpcontinuation:
 	lsl	r5, R_INDEX			// ... for our cpu
 
 	/* wait for the mailbox start bit to become true */
-1:	dmb					// data memory barrier
-	ldr	r2, [r6]			// load mbox
+1:	ldr	r2, [r6]			// load mbox
+	dmb					//    make it a load-acquire
 	tst	r2, r5				// is our bit set?
 	wfeeq					//    no, back to waiting
 	beq	1b				//    no, and try again

Index: src/sys/arch/arm/arm/cpu_subr.c
diff -u src/sys/arch/arm/arm/cpu_subr.c:1.4 src/sys/arch/arm/arm/cpu_subr.c:1.5
--- src/sys/arch/arm/arm/cpu_subr.c:1.4	Sun Oct 31 16:23:47 2021
+++ src/sys/arch/arm/arm/cpu_subr.c	Sun Nov 14 16:56:32 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu_subr.c,v 1.4 2021/10/31 16:23:47 skrll Exp $	*/
+/*	$NetBSD: cpu_subr.c,v 1.5 2021/11/14 16:56:32 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -33,7 +33,7 @@
 #include "opt_multiprocessor.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.4 2021/10/31 16:23:47 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.5 2021/11/14 16:56:32 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -81,6 +81,7 @@ cpu_boot_secondary_processors(void)
 	VPRINTF("%s: starting secondary processors\n", __func__);
 
 	/* send mbox to have secondary processors do cpu_hatch() */
+	dmb(ish);	/* store-release matches locore.S/armv6_start.S */
 	for (size_t n = 0; n < __arraycount(arm_cpu_mbox); n++)
 		atomic_or_ulong(&arm_cpu_mbox[n], arm_cpu_hatched[n]);
 
@@ -95,7 +96,8 @@ cpu_boot_secondary_processors(void)
 		const size_t off = cpuno / CPUINDEX_DIVISOR;
 		const u_long bit = __BIT(cpuno % CPUINDEX_DIVISOR);
 
-		while (membar_consumer(), arm_cpu_mbox[off] & bit) {
+		/* load-acquire matches cpu_clr_mbox */
+		while (atomic_load_acquire(&arm_cpu_mbox[off]) & bit) {
 			__asm __volatile ("wfe");
 		}
 		/* Add processor to kcpuset */
@@ -111,8 +113,8 @@ cpu_hatched_p(u_int cpuindex)
 	const u_int off = cpuindex / CPUINDEX_DIVISOR;
 	const u_int bit = cpuindex % CPUINDEX_DIVISOR;
 
-	membar_consumer();
-	return (arm_cpu_hatched[off] & __BIT(bit)) != 0;
+	/* load-acquire matches cpu_set_hatched */
+	return (atomic_load_acquire(&arm_cpu_hatched[off]) & __BIT(bit)) != 0;
 }
 
 void
@@ -122,6 +124,7 @@ cpu_set_hatched(int cpuindex)
 	const size_t off = cpuindex / CPUINDEX_DIVISOR;
 	const u_long bit = __BIT(cpuindex % CPUINDEX_DIVISOR);
 
+	dmb(ish);		/* store-release matches cpu_hatched_p */
 	atomic_or_ulong(&arm_cpu_hatched[off], bit);
 	dsb(ishst);
 	sev();
@@ -135,6 +138,7 @@ cpu_clr_mbox(int cpuindex)
 	const u_long bit = __BIT(cpuindex % CPUINDEX_DIVISOR);
 
 	/* Notify cpu_boot_secondary_processors that we're done */
+	dmb(ish);		/* store-release */
 	atomic_and_ulong(&arm_cpu_mbox[off], ~bit);
 	dsb(ishst);
 	sev();

Reply via email to