Module Name:    src
Committed By:   ryo
Date:           Mon Jul  9 06:19:53 UTC 2018

Modified Files:
        src/sys/arch/aarch64/aarch64: cpu.c cpu_machdep.c db_machdep.c
            genassym.cf locore.S
        src/sys/arch/aarch64/include: cpu.h locore.h machdep.h

Log Message:
add MULTIPROCESSOR support


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/arch/aarch64/aarch64/cpu.c
cvs rdiff -u -r1.3 -r1.4 src/sys/arch/aarch64/aarch64/cpu_machdep.c
cvs rdiff -u -r1.2 -r1.3 src/sys/arch/aarch64/aarch64/db_machdep.c \
    src/sys/arch/aarch64/aarch64/genassym.cf
cvs rdiff -u -r1.7 -r1.8 src/sys/arch/aarch64/aarch64/locore.S
cvs rdiff -u -r1.2 -r1.3 src/sys/arch/aarch64/include/cpu.h
cvs rdiff -u -r1.3 -r1.4 src/sys/arch/aarch64/include/locore.h
cvs rdiff -u -r1.1 -r1.2 src/sys/arch/aarch64/include/machdep.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/cpu.c
diff -u src/sys/arch/aarch64/aarch64/cpu.c:1.1 src/sys/arch/aarch64/aarch64/cpu.c:1.2
--- src/sys/arch/aarch64/aarch64/cpu.c:1.1	Sun Apr  1 04:35:03 2018
+++ src/sys/arch/aarch64/aarch64/cpu.c	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.1 2018/04/01 04:35:03 ryo Exp $ */
+/* $NetBSD: cpu.c,v 1.2 2018/07/09 06:19:53 ryo Exp $ */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -27,13 +27,16 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.1 2018/04/01 04:35:03 ryo Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.2 2018/07/09 06:19:53 ryo Exp $");
 
 #include "locators.h"
+#include "opt_arm_debug.h"
+#include "opt_fdt.h"
 #include "opt_multiprocessor.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/device.h>
 #include <sys/cpu.h>
 #include <sys/kmem.h>
@@ -43,71 +46,106 @@ __KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.1 
 #include <aarch64/cpufunc.h>
 #include <aarch64/machdep.h>
 
+#ifdef FDT
+#include <arm/fdt/arm_fdtvar.h>
+#endif
+
 void cpu_attach(device_t, cpuid_t);
-static void cpu_identify(device_t self, struct cpu_info *);
+static void identify_aarch64_model(uint32_t, char *, size_t);
+static void cpu_identify(device_t self, struct cpu_info *, uint32_t, uint64_t);
+static void cpu_identify1(device_t self, struct cpu_info *);
 static void cpu_identify2(device_t self, struct cpu_info *);
 
+#ifdef MULTIPROCESSOR
+volatile u_int arm_cpu_hatched __cacheline_aligned = 0;
+volatile uint32_t arm_cpu_mbox __cacheline_aligned = 0;
+u_int arm_cpu_max = 1;
+
+/* stored by secondary processors (available when arm_cpu_hatched) */
+uint32_t cpus_midr[MAXCPUS];
+uint64_t cpus_mpidr[MAXCPUS];
+
+static kmutex_t cpu_hatch_lock;
+#endif /* MULTIPROCESSOR */
+
+/* Our exported CPU info; we can have only one. */
+struct cpu_info cpu_info_store __cacheline_aligned = {
+	.ci_cpl = IPL_HIGH,
+	.ci_curlwp = &lwp0
+};
+
+#ifdef MULTIPROCESSOR
+#define NCPUINFO	MAXCPUS
+#else
+#define NCPUINFO	1
+#endif /* MULTIPROCESSOR */
+
+struct cpu_info *cpu_info[NCPUINFO] = {
+	[0] = &cpu_info_store
+};
+
 void
 cpu_attach(device_t dv, cpuid_t id)
 {
 	struct cpu_info *ci;
+	uint64_t mpidr;
+	uint32_t midr;
 
 	if (id == 0) {
 		ci = curcpu();
-
+		midr = reg_midr_el1_read();
+		mpidr = reg_mpidr_el1_read();
 	} else {
 #ifdef MULTIPROCESSOR
-		//XXXAARCH64: notyet?
-
-		uint64_t mpidr = reg_mpidr_el1_read();
-
 		KASSERT(cpu_info[id] == NULL);
 		ci = kmem_zalloc(sizeof(*ci), KM_SLEEP);
 		ci->ci_cpl = IPL_HIGH;
 		ci->ci_cpuid = id;
-		if (mpidr & MPIDR_MT) {
-			ci->ci_data.cpu_smt_id = mpidr & MPIDR_AFF0;
-			ci->ci_data.cpu_core_id = mpidr & MPIDR_AFF1;
-			ci->ci_data.cpu_package_id = mpidr & MPIDR_AFF2;
-		} else {
-			ci->ci_data.cpu_core_id = mpidr & MPIDR_AFF0;
-			ci->ci_data.cpu_package_id = mpidr & MPIDR_AFF1;
-		}
-		ci->ci_data.cpu_cc_freq = cpu_info_store.ci_data.cpu_cc_freq;
+
+		ci->ci_data.cpu_cc_freq = cpu_info[0]->ci_data.cpu_cc_freq;
 		cpu_info[ci->ci_cpuid] = ci;
 		if ((arm_cpu_hatched & (1 << id)) == 0) {
 			ci->ci_dev = dv;
 			dv->dv_private = ci;
 
 			aprint_naive(": disabled\n");
-			aprint_normal(": disabled (uniprocessor kernel)\n");
+			aprint_normal(": disabled (unresponsive)\n");
 			return;
 		}
-#else
+
+		/* cpus_{midr,mpidr}[id] is stored by secondary processor */
+		midr = cpus_midr[id];
+		mpidr = cpus_mpidr[id];
+#else /* MULTIPROCESSOR */
 		aprint_naive(": disabled\n");
 		aprint_normal(": disabled (uniprocessor kernel)\n");
 		return;
-#endif
+#endif /* MULTIPROCESSOR */
+	}
+
+	if (mpidr & MPIDR_MT) {
+		ci->ci_data.cpu_smt_id = mpidr & MPIDR_AFF0;
+		ci->ci_data.cpu_core_id = mpidr & MPIDR_AFF1;
+		ci->ci_data.cpu_package_id = mpidr & MPIDR_AFF2;
+	} else {
+		ci->ci_data.cpu_core_id = mpidr & MPIDR_AFF0;
+		ci->ci_data.cpu_package_id = mpidr & MPIDR_AFF1;
 	}
 
 	ci->ci_dev = dv;
 	dv->dv_private = ci;
 
+	cpu_identify(ci->ci_dev, ci, midr, mpidr);
 #ifdef MULTIPROCESSOR
-	if (caa->caa_cpucore != 0) {
-		aprint_naive("\n");
-		aprint_normal(": %s\n", cpu_getmodel());
+	if (id != 0) {
 		mi_cpu_attach(ci);
-
-		// XXXAARCH64
-		//pmap_tlb_info_attach();
-		panic("notyet");
+		return;
 	}
-#endif
+#endif /* MULTIPROCESSOR */
 
 	fpu_attach(ci);
 
-	cpu_identify(dv, ci);
+	cpu_identify1(dv, ci);
 	cpu_identify2(dv, ci);
 }
 
@@ -224,30 +262,25 @@ prt_cache(device_t self, int level)
 }
 
 static void
-cpu_identify(device_t self, struct cpu_info *ci)
+cpu_identify(device_t self, struct cpu_info *ci, uint32_t midr, uint64_t mpidr)
 {
-	uint64_t mpidr;
-	int level;
-	uint32_t cpuid;
-	uint32_t ctr, sctlr;	/* for cache */
 	char model[128];
 
-	cpuid = reg_midr_el1_read();
-	identify_aarch64_model(cpuid, model, sizeof(model));
+	identify_aarch64_model(midr, model, sizeof(model));
 	if (ci->ci_cpuid == 0)
 		cpu_setmodel("%s", model);
 
 	aprint_naive("\n");
 	aprint_normal(": %s\n", model);
+	aprint_normal_dev(ci->ci_dev, "package %lu, core %lu, smt %lu\n",
+	    ci->ci_package_id, ci->ci_core_id, ci->ci_smt_id);
+}
 
-
-	mpidr = reg_mpidr_el1_read();
-	aprint_normal_dev(self, "CPU Affinity %llu-%llu-%llu-%llu\n",
-	    __SHIFTOUT(mpidr, MPIDR_AFF3),
-	    __SHIFTOUT(mpidr, MPIDR_AFF2),
-	    __SHIFTOUT(mpidr, MPIDR_AFF1),
-	    __SHIFTOUT(mpidr, MPIDR_AFF0));
-
+static void
+cpu_identify1(device_t self, struct cpu_info *ci)
+{
+	int level;
+	uint32_t ctr, sctlr;	/* for cache */
 
 	/* SCTLR - System Control Register */
 	sctlr = reg_sctlr_el1_read();
@@ -448,3 +481,62 @@ cpu_identify2(device_t self, struct cpu_
 
 	aprint_normal("\n");
 }
+
+#ifdef MULTIPROCESSOR
+void
+cpu_boot_secondary_processors(void)
+{
+	mutex_init(&cpu_hatch_lock, MUTEX_DEFAULT, IPL_NONE);
+
+#ifdef VERBOSE_INIT_ARM
+	printf("%s: writing mbox with %#x\n", __func__, arm_cpu_hatched);
+#endif
+
+	/* send mbox to have secondary processors do cpu_hatch() */
+	atomic_or_32(&arm_cpu_mbox, arm_cpu_hatched);
+	__asm __volatile ("sev; sev; sev");
+
+	/* wait all cpus have done cpu_hatch() */
+	while (arm_cpu_mbox) {
+		__asm __volatile ("wfe");
+	}
+
+#ifdef VERBOSE_INIT_ARM
+	printf("%s: secondary processors hatched\n", __func__);
+#endif
+
+	/* add available processors to kcpuset */
+	uint32_t mbox = arm_cpu_hatched;
+	kcpuset_export_u32(kcpuset_attached, &mbox, sizeof(mbox));
+}
+
+void
+cpu_hatch(struct cpu_info *ci)
+{
+	KASSERT(curcpu() == ci);
+
+	delay(1000 * ci->ci_index);	/* XXX: to attach cpu* in order */
+
+	mutex_enter(&cpu_hatch_lock);
+
+	fpu_attach(ci);
+
+	cpu_identify1(ci->ci_dev, ci);
+	cpu_identify2(ci->ci_dev, ci);
+
+	mutex_exit(&cpu_hatch_lock);
+
+	intr_cpu_init(ci);
+
+#ifdef FDT
+	arm_fdt_cpu_hatch(ci);
+#endif
+#ifdef MD_CPU_HATCH
+	MD_CPU_HATCH(ci);	/* for non-fdt arch? */
+#endif
+
+	/* clear my bit of arm_cpu_mbox to tell cpu_boot_secondary_processors() */
+	atomic_and_32(&arm_cpu_mbox, ~(1 << ci->ci_cpuid));
+	__asm __volatile ("sev; sev; sev");
+}
+#endif /* MULTIPROCESSOR */

Index: src/sys/arch/aarch64/aarch64/cpu_machdep.c
diff -u src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.3 src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.4
--- src/sys/arch/aarch64/aarch64/cpu_machdep.c:1.3	Sun Apr  1 04:35:03 2018
+++ src/sys/arch/aarch64/aarch64/cpu_machdep.c	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_machdep.c,v 1.3 2018/04/01 04:35:03 ryo Exp $ */
+/* $NetBSD: cpu_machdep.c,v 1.4 2018/07/09 06:19:53 ryo Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.3 2018/04/01 04:35:03 ryo Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu_machdep.c,v 1.4 2018/07/09 06:19:53 ryo Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -52,30 +52,6 @@ __KERNEL_RCSID(1, "$NetBSD: cpu_machdep.
 #include <aarch64/pcb.h>
 #include <aarch64/userret.h>
 
-#ifdef MULTIPROCESSOR
-/* for arm compatibility (referred from pic.c) */
-volatile u_int arm_cpu_hatched;
-u_int arm_cpu_max = 1;
-#endif
-
-/* Our exported CPU info; we can have only one. */
-struct cpu_info cpu_info_store __cacheline_aligned = {
-	.ci_cpl = IPL_HIGH,
-	.ci_curlwp = &lwp0
-};
-
-#ifdef MULTIPROCESSOR
-#define NCPUINFO	MAXCPUS
-#else
-#define NCPUINFO	1
-#endif
-
-struct cpu_info *cpu_info[NCPUINFO] = {
-	[0] = &cpu_info_store
-};
-
-uint32_t cpu_boot_mbox;
-
 #ifdef __HAVE_FAST_SOFTINTS
 #if IPL_VM != IPL_SOFTSERIAL + 1
 #error IPLs are screwed up
@@ -382,16 +358,6 @@ cpu_kpreempt_disabled(void)
 
 #ifdef MULTIPROCESSOR
 void
-cpu_boot_secondary_processors(void)
-{
-	uint32_t mbox;
-	kcpuset_export_u32(kcpuset_attached, &mbox, sizeof(mbox));
-	atomic_swap_32(&cpu_boot_mbox, mbox);
-	membar_producer();
-	__asm __volatile("sev; sev; sev");
-}
-
-void
 xc_send_ipi(struct cpu_info *ci)
 {
 	KASSERT(kpreempt_disabled());

Index: src/sys/arch/aarch64/aarch64/db_machdep.c
diff -u src/sys/arch/aarch64/aarch64/db_machdep.c:1.2 src/sys/arch/aarch64/aarch64/db_machdep.c:1.3
--- src/sys/arch/aarch64/aarch64/db_machdep.c:1.2	Sun Apr  1 04:35:03 2018
+++ src/sys/arch/aarch64/aarch64/db_machdep.c	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: db_machdep.c,v 1.2 2018/04/01 04:35:03 ryo Exp $ */
+/* $NetBSD: db_machdep.c,v 1.3 2018/07/09 06:19:53 ryo Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.2 2018/04/01 04:35:03 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.3 2018/07/09 06:19:53 ryo Exp $");
 
 #include "opt_kernhist.h"
 #include "opt_uvmhist.h"
@@ -52,6 +52,7 @@ __KERNEL_RCSID(0, "$NetBSD: db_machdep.c
 #include <ddb/db_command.h>
 #include <ddb/db_output.h>
 #include <ddb/db_variables.h>
+#include <ddb/db_run.h>
 #include <ddb/db_sym.h>
 #include <ddb/db_extern.h>
 #include <ddb/db_interface.h>
@@ -68,6 +69,9 @@ void db_md_pte_cmd(db_expr_t, bool, db_e
 void db_md_tlbi_cmd(db_expr_t, bool, db_expr_t, const char *);
 void db_md_sysreg_cmd(db_expr_t, bool, db_expr_t, const char *);
 void db_md_watch_cmd(db_expr_t, bool, db_expr_t, const char *);
+#if defined(_KERNEL) && defined(MULTIPROCESSOR)
+void db_md_switch_cpu_cmd(db_expr_t, bool, db_expr_t, const char *);
+#endif
 
 const struct db_command db_machine_command_table[] = {
 #if defined(_KERNEL) && defined(MULTIPROCESSOR)
@@ -823,9 +827,51 @@ db_md_watch_cmd(db_expr_t addr, bool hav
 	show_watchpoints();
 }
 
+#ifdef MULTIPROCESSOR
+volatile struct cpu_info *db_trigger;
+volatile struct cpu_info *db_onproc;
+volatile struct cpu_info *db_newcpu;
+
+#ifdef _KERNEL
+void
+db_md_switch_cpu_cmd(db_expr_t addr, bool have_addr, db_expr_t count,
+    const char *modif)
+{
+	if (addr >= ncpu) {
+		db_printf("cpu %"DDB_EXPR_FMT"d out of range", addr);
+		return;
+	}
+
+	struct cpu_info *new_ci = cpu_lookup(addr);
+	if (new_ci == NULL) {
+		db_printf("cpu %"DDB_EXPR_FMT"d does not exist", addr);
+		return;
+	}
+
+	if (new_ci == curcpu())
+		return;
+
+	/* XXX */
+	membar_consumer();
+	if (db_trigger == curcpu()) {
+		DDB_REGS->tf_pc -= 4;
+		db_trigger = NULL;
+		membar_producer();
+	}
+
+	db_newcpu = new_ci;
+	db_continue_cmd(0, false, 0, "");
+}
+
+#endif /* _KERNEL */
+#endif /* MULTIPROCESSOR */
+
 int
 kdb_trap(int type, struct trapframe *tf)
 {
+#ifdef MULTIPROCESSOR
+	struct cpu_info * const ci = curcpu();
+#endif
 	int s;
 
 	switch (type) {
@@ -843,18 +889,71 @@ kdb_trap(int type, struct trapframe *tf)
 		break;
 	}
 
-	/* Should switch to kdb`s own stack here. */
-	ddb_regs = *tf;
+#ifdef MULTIPROCESSOR
+	/*
+	 * Try to take ownership of DDB.
+	 * If we do, tell all other CPUs to enter DDB too.
+	 */
+	if ((ncpu > 1) &&
+	    (atomic_cas_ptr(&db_onproc, NULL, ci) == NULL)) {
+		intr_ipi_send(NULL, IPI_DDB);
+		db_trigger = ci;
+		membar_producer();
+	}
+#endif
+
+	for (;;) {
+#ifdef MULTIPROCESSOR
+		if (ncpu > 1) {
+
+			/* waiting my turn, or exit */
+			membar_consumer();
+			while (db_onproc != ci) {
+				__asm __volatile ("wfe");
+
+				membar_consumer();
+				if (db_onproc == NULL) {
+					return 1;
+				}
+			}
+			/* It's my turn! */
+		}
+#endif /* MULTIPROCESSOR */
+
+		/* Should switch to kdb`s own stack here. */
+		ddb_regs = *tf;
+
+		s = splhigh();
+		db_active++;
+		cnpollc(true);
+		db_trap(type, 0/*code*/);
+		cnpollc(false);
+		db_active--;
+		splx(s);
+
+		*tf = ddb_regs;
+
+#ifdef MULTIPROCESSOR
+		if ((ncpu > 1) && (db_newcpu != NULL)) {
+			db_onproc = db_newcpu;
+			db_newcpu = NULL;
+			membar_producer();
+			__asm __volatile ("sev; sev; sev");
+			continue;	/* redo DDB on new cpu */
+		}
+#endif /* MULTIPROCESSOR */
 
-	s = splhigh();
-	db_active++;
-	cnpollc(true);
-	db_trap(type, 0/*code*/);
-	cnpollc(false);
-	db_active--;
-	splx(s);
+		break;
+	}
 
-	*tf = ddb_regs;
+#ifdef MULTIPROCESSOR
+	if (ncpu > 1) {
+		db_onproc = NULL;
+		membar_producer();
+		__asm __volatile ("sev; sev; sev");
+	}
+	db_trigger = NULL;
+#endif
 
 	return 1;
 }
Index: src/sys/arch/aarch64/aarch64/genassym.cf
diff -u src/sys/arch/aarch64/aarch64/genassym.cf:1.2 src/sys/arch/aarch64/aarch64/genassym.cf:1.3
--- src/sys/arch/aarch64/aarch64/genassym.cf:1.2	Sun Apr  1 04:35:03 2018
+++ src/sys/arch/aarch64/aarch64/genassym.cf	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.2 2018/04/01 04:35:03 ryo Exp $
+# $NetBSD: genassym.cf,v 1.3 2018/07/09 06:19:53 ryo Exp $
 #-
 # Copyright (c) 2014 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -282,6 +282,11 @@ define	ID_AA64PFR0_EL1_GIC		ID_AA64PFR0_
 define	ID_AA64PFR0_EL1_GIC_SHIFT	ID_AA64PFR0_EL1_GIC_SHIFT
 define	ID_AA64PFR0_EL1_GIC_CPUIF_EN	ID_AA64PFR0_EL1_GIC_CPUIF_EN
 
+define	MPIDR_AFF0		MPIDR_AFF0
+define	MPIDR_AFF1		MPIDR_AFF1
+define	MPIDR_AFF2		MPIDR_AFF2
+define	MPIDR_AFF3		MPIDR_AFF3
+
 define	MAIR_ATTR0		MAIR_ATTR0
 define	MAIR_ATTR1		MAIR_ATTR1
 define	MAIR_ATTR2		MAIR_ATTR2

Index: src/sys/arch/aarch64/aarch64/locore.S
diff -u src/sys/arch/aarch64/aarch64/locore.S:1.7 src/sys/arch/aarch64/aarch64/locore.S:1.8
--- src/sys/arch/aarch64/aarch64/locore.S:1.7	Thu Jun 21 11:57:05 2018
+++ src/sys/arch/aarch64/aarch64/locore.S	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.7 2018/06/21 11:57:05 ryo Exp $	*/
+/*	$NetBSD: locore.S,v 1.8 2018/07/09 06:19:53 ryo Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -36,12 +36,12 @@
 #include <aarch64/pte.h>
 #include "assym.h"
 
-RCSID("$NetBSD: locore.S,v 1.7 2018/06/21 11:57:05 ryo Exp $")
+RCSID("$NetBSD: locore.S,v 1.8 2018/07/09 06:19:53 ryo Exp $")
 
 /* #define DEBUG_LOCORE */
 /* #define DEBUG_MMU */
 
-#if defined(VERBOSE_INIT_ARM) && defined(EARLYCONS)
+#if (defined(VERBOSE_INIT_ARM) || defined(DEBUG_LOCORE)) && defined(EARLYCONS)
 #define VERBOSE_LOCORE
 #endif
 
@@ -61,23 +61,36 @@ RCSID("$NetBSD: locore.S,v 1.7 2018/06/2
 #define VERBOSE(string)
 #endif
 
+/* load far effective address (pc relative) */
+.macro	ADDR, reg, addr
+	adrp	\reg, \addr
+	add	\reg, \reg, #:lo12:\addr
+.endm
+
 ENTRY_NP(aarch64_start)
+	/* Zero the BSS. The size must be aligned 16, usually it should be. */
+	ADDR	x0, __bss_start__
+	ADDR	x1, __bss_end__
+	b	2f
+1:	stp	xzr, xzr, [x0], #16
+2:	cmp	x0, x1
+	b.lo	1b
 
-	adr	x0, lwp0uspace
-	add	x0, x0, #(UPAGES * PAGE_SIZE)
-	sub	x0, x0, #TF_SIZE	/* lwp0space + USPACE - TF_SIZE */
-	mov	sp, x0			/* define lwp0 ksp bottom */
+	/* set stack pointer for boot */
+	ADDR	x0, bootstk
+	mov	sp, x0
 
 #ifdef DEBUG_LOCORE
-	PRINT("SP               = ")
-	mov	x0, sp
-	bl	print_x0
-
 	PRINT("PC               = ")
 	bl	1f
 1:	mov	x0, lr
 	bl	print_x0
 
+	PRINT("SP               = ")
+	bl	1f
+1:	mov	x0, sp
+	bl	print_x0
+
 	PRINT("CurrentEL        = ")
 	mrs	x0, CurrentEL
 	lsr	x0, x0, #2
@@ -175,9 +188,7 @@ ENTRY_NP(aarch64_start)
 	PRINT("SPSR_EL1        = ")
 	mrs	x0, spsr_el1
 	bl	print_x0
-#endif
 
-#ifdef DEBUG_LOCORE
 	PRINT("DAIF            = ")
 	mrs	x0, daif
 	bl	print_x0
@@ -194,7 +205,7 @@ ENTRY_NP(aarch64_start)
  * vstart is in kernel virtual address
  */
 vstart:
-	adr	x0, lwp0uspace
+	ADDR	x0, lwp0uspace
 	add	x0, x0, #(UPAGES * PAGE_SIZE)
 	sub	x0, x0, #TF_SIZE	/* lwp0space + USPACE - TF_SIZE */
 	mov	sp, x0			/* define lwp0 ksp bottom */
@@ -205,16 +216,9 @@ vstart:
 	bl	print_x0
 #endif
 
-	/* Zero the BSS */
-	ldr	x0, =__bss_start__
-	ldr	x1, =__bss_end__
-	b	2f
-1:	stp	xzr, xzr, [x0], #16
-2:	cmp	x0, x1
-	b.lo	1b
-
 	msr	tpidr_el0, xzr		/* tpidr_el0 (for TLS) = NULL */
-	bl	set_cpuinfo
+	ADDR	x0, cpu_info_store	/* cpu_info_store is cpu_info[0] */
+	msr	tpidr_el1, x0		/* curcpu is cpu_info[0] */
 
 	mov	fp, #0			/* trace back starts here */
 	bl	_C_LABEL(initarm)	/* Off we go */
@@ -231,6 +235,253 @@ END(aarch64_start)
 
 	.align 3
 	.text
+
+#ifdef MULTIPROCESSOR
+
+#if defined(VERBOSE_LOCORE) || defined(DEBUG_LOCORE)
+/*
+ * print "[CPU$x27] " (x27 as cpuid)
+ * XXX: max 4 digit
+ */
+printcpu:
+	stp	x0, lr, [sp, #-16]!
+	stp	x25, x26, [sp, #-16]!
+	PRINT("[CPU")
+	mov	x26, x27		# n = cpuid
+	mov	x25, xzr		# zeropad = 0
+	mov	x1, #1000
+	udiv	x0, x26, x1		# x0 = n / 1000
+	msub	x26, x0, x1, x26	# n %= 1000
+	cbz	x0, 1f			# if (x0 == 0) goto 1f
+	add	x0, x0, #'0'
+	bl	uartputc
+	mov	x25, #1			# zeropad = 1
+1:
+	mov	x1, #100
+	udiv	x0, x26, x1		# x0 = n / 100
+	msub	x26, x0, x1, x26	# n %= 100
+	adds	x25, x25, x0		# if ((zeropad + x0) == 0)
+	beq	1f			#   goto 1f
+	add	x0, x0, #'0'
+	bl	uartputc
+	mov	x25, #1			# zeropad = 1
+1:
+	mov	x1, #10
+	udiv	x0, x26, x1		# x0 = n / 10
+	msub	x26, x0, x1, x26	# n %= 10
+	adds	x25, x25, x0		# if ((zeropad + x0) == 0)
+	beq	1f			#   goto 1f
+	add	x0, x0, #'0'
+	bl	uartputc
+1:
+	add	x0, x26, #'0'
+	bl	uartputc
+	PRINT("] ")
+	ldp	x25, x26, [sp], #16
+	ldp	x0, lr, [sp], #16
+	ret
+#define PRINTCPU()	bl	printcpu
+#else
+#define PRINTCPU()
+#endif /* VERBOSE_LOCORE || DEBUG_LOCORE */
+
+#ifdef VERBOSE_LOCORE
+#define VERBOSE_PRINTCPU()	PRINTCPU()
+#else
+#define VERBOSE_PRINTCPU()
+#endif
+
+ENTRY_NP(aarch64_mpstart)
+ENTRY_NP(cortex_mpstart)	/* compat arm */
+	/*
+	 * XXX:
+	 *  cpuid(index) is read from MPIDR_EL1.AFF0. AFF1,2,3 are ignored.
+	 *  cpuid should be passed from primary processor...
+	 */
+	mrs	x27, mpidr_el1
+	and	x27, x27, #MPIDR_AFF0	/* XXX: cpuid = mpidr_el1 & Aff0 */
+	mov	x0, #1
+	lsl	x28, x0, x27		/* x28 = 1 << cpuid */
+	mov	x0, x28
+
+	/* x27 = cpuid, x28 = (1 << cpuid) */
+
+	/* set stack pointer for boot */
+#define BOOT_STACKSIZE	256
+	mov	x1, #BOOT_STACKSIZE
+	mul	x1, x1, x27
+	ADDR	x0, bootstk_cpus
+	sub	sp, x0, x1	/* sp = bootstk_cpus - BOOT_STACKSIZE * cpuid */
+
+#ifdef DEBUG_LOCORE
+	PRINTCPU()
+	PRINT("PC               = ")
+	bl	1f
+1:	mov	x0, lr
+	bl	print_x0
+
+	PRINTCPU()
+	PRINT("SP               = ")
+	bl	1f
+1:	mov	x0, sp
+	bl	print_x0
+
+	PRINTCPU()
+	PRINT("CurrentEL        = ")
+	mrs	x0, CurrentEL
+	lsr	x0, x0, #2
+	bl	print_x0
+#endif
+
+#ifdef LOCORE_EL2
+#ifdef DEBUG_LOCORE
+	VERBOSE_PRINTCPU()
+	VERBOSE("Drop to EL1...")
+#endif
+	bl	drop_to_el1
+#ifdef DEBUG_LOCORE
+	VERBOSE("OK\r\n")
+#endif
+#ifdef DEBUG_LOCORE
+	PRINTCPU()
+	PRINT("CurrentEL        = ")
+	mrs	x0, CurrentEL
+	lsr	x0, x0, #2
+	bl	print_x0
+#endif /* DEBUG_LOCORE */
+#endif /* LOCORE_EL2 */
+
+	bl	mmu_disable
+
+	bl	init_sysregs
+
+#ifdef DEBUG_LOCORE
+	VERBOSE_PRINTCPU()
+	VERBOSE("MMU Enable...")
+#endif
+	bl	mmu_enable
+#ifdef DEBUG_LOCORE
+	VERBOSE("OK\r\n")
+#endif
+
+	/* jump to virtual address */
+	ldr	x0, =mp_vstart
+	br	x0
+
+mp_vstart:
+	/* set exception vector */
+	ADDR	x0, el1_vectors
+	msr	vbar_el1, x0
+
+#ifdef DEBUG_LOCORE
+	PRINTCPU()
+	PRINT("PC               = ")
+	bl	1f
+1:	mov	x0, lr
+	bl	print_x0
+
+	PRINTCPU()
+	PRINT("arm_cpu_hatched  = ")
+	ADDR	x0, _C_LABEL(arm_cpu_hatched)
+	ldr	x0, [x0]
+	bl	print_x0
+
+	PRINTCPU()
+	PRINT("my cpubit        = ")
+	mov	x0, x28
+	bl	print_x0
+#endif
+
+	ADDR	x0, _C_LABEL(cpus_midr)
+	mrs	x1, midr_el1
+	str	w1, [x0, x27, lsl #2]	/* cpu_midr[cpuid] = midr_el1 */
+
+	ADDR	x0, _C_LABEL(cpus_mpidr)
+	mrs	x1, mpidr_el1
+	str	x1, [x0, x27, lsl #3]	/* cpu_mpidr[cpuid] = mpidr_el1 */
+
+
+	/*
+	 * atomic_or_32(&arm_cpu_hatched, 1 << cpuid)
+	 * to tell my activity to primary processor.
+	 */
+	ADDR	x0, _C_LABEL(arm_cpu_hatched)
+	mov	x1, x28
+	bl	_C_LABEL(atomic_or_32)	/* hatched! */
+	sev
+
+#ifdef DEBUG_LOCORE
+	PRINTCPU()
+	PRINT("arm_cpu_hatched -> ")
+	ADDR	x0, _C_LABEL(arm_cpu_hatched)
+	ldr	x0, [x0]
+	bl	print_x0
+#endif
+
+#ifdef DEBUG_LOCORE
+	PRINTCPU()
+	PRINT("Hatched.\r\n")
+#endif
+
+	/* wait for my bit of arm_cpu_mbox become true */
+	ADDR	x1, _C_LABEL(arm_cpu_mbox)
+1:
+	dmb	sy
+	ldr	x0, [x1]
+	tst	x0, x28
+	bne	9f
+	wfe
+	b	1b
+9:
+
+#ifdef DEBUG_LOCORE
+	/* XXX: delay to prevent the mixing of console output */
+	mov	x0, #0x4000000
+	mul	x0, x0, x27	/* delay (cpuid * 0x4000000) */
+1:	subs	x0, x0, #1
+	bne	1b
+
+	PRINTCPU()
+	PRINT("MBOX received\r\n")
+
+	PRINTCPU()
+	PRINT("arm_cpu_mbox  = ")
+	ADDR	x0, _C_LABEL(arm_cpu_mbox)
+	ldr	x0, [x0]
+	bl	print_x0
+#endif
+
+	msr	tpidr_el0, xzr		/* tpidr_el0 (for TLS) = NULL */
+
+	/* fill my cpu_info */
+	ADDR	x0, _C_LABEL(cpu_info)
+	ldr	x0, [x0, x27, lsl #3]	/* x0 = cpu_info[cpuid] */
+	msr	tpidr_el1, x0		/* tpidr_el1 = my cpu_info */
+
+	ldr	x1, [x0, #CI_IDLELWP]	/* x1 = curcpu()->ci_data.cpu_idlelwp */
+	str	x1, [x0, #CI_CURLWP]	/* curlwp is idlelwp */
+
+	ldr	x2, [x1, #L_PCB]	/* x2 = lwp_getpcb(idlelwp) */
+	add	x2, x2, #(UPAGES * PAGE_SIZE)
+	sub	sp, x2, #TF_SIZE	/* sp = pcb + USPACE - TF_SIZE */
+
+
+	mov	fp, xzr			/* trace back starts here */
+	bl	_C_LABEL(cpu_hatch)
+	mov	x0, xzr
+	b	_C_LABEL(idle_loop)	/* never to return */
+END(aarch64_mpstart)
+
+#else /* MULTIPROCESSOR */
+
+ENTRY_NP(aarch64_mpstart)
+ENTRY_NP(cortex_mpstart)	/* compat arm */
+1:	wfi
+	b	1b
+END(aarch64_mpstart)
+
+#endif /* MULTIPROCESSOR */
+
 /*
  * xprint - print strings pointed by $PC(LR)
  *          and return to the end of string.
@@ -268,7 +519,7 @@ _C_LABEL(uartputs):
 	ldrb	w0, [x11], #1
 	cbnz	w0, 1b
 9:
-	mov 	x0, x11
+	mov	x0, x11
 	ret
 END(_C_LABEL(uartputs))
 
@@ -322,37 +573,12 @@ print_x2:
 	ldp	x0, lr, [sp], #16
 	ret
 
-
-set_cpuinfo:
-	mrs	x1, mpidr_el1
-	and	x1, x1, #0xff	/* Aff0 = cpu id */
-	cmp	x1, #MAXCPUS
-	bcs	arm_cpuinit_too_many_cpu
-
-	ldr	x0, =cpu_info
-	ldr	x0, [x0, x1, lsl #3]	/* x0 = cpu_info[cpuid] */
-	msr	tpidr_el1, x0		/* tpidr_el1 = my cpu info */
-	str	x1, [x0, #CI_CPUID]	/* ci->ci_cpuid = CPUID */
-
-	ret
-
-arm_cpuinit_too_many_cpu:
-	PRINT("Too many CPUs: MPIDR_EL1=")
-	mrs	x0, mpidr_el1
-	bl	print_x0
-1:	wfi
-	b	1b
-	ret
-
-	.align 3
-
-
 arm_boot_l0pt_init:
 	stp	x0, lr, [sp, #-16]!
 
 	/* Clean the page table */
-	adr	x0, mmutables_start
-	adr	x1, mmutables_end
+	ADDR	x0, mmutables_start
+	ADDR	x1, mmutables_end
 1:
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
@@ -363,13 +589,13 @@ arm_boot_l0pt_init:
 
 	VERBOSE("Creating VA=PA tables\r\n")
 	/* VA=PA table for L0 */
-	adr	x0, ttbr0_l0table
+	ADDR	x0, ttbr0_l0table
 	mov	x1, #0
-	adr	x2, ttbr0_l1table
+	ADDR	x2, ttbr0_l1table
 	bl	l0_settable
 
 	/* VA=PA blocks */
-	adr	x0, ttbr0_l1table
+	ADDR	x0, ttbr0_l1table
 	mov	x1, #0			/* PA */
 	mov	x2, #0			/* VA */
 	mov	x3, #L2_BLKPAG_ATTR_DEVICE_MEM
@@ -378,13 +604,13 @@ arm_boot_l0pt_init:
 
 	VERBOSE("Creating KSEG tables\r\n")
 	/* KSEG table for L0 */
-	adr	x0, ttbr1_l0table
+	ADDR	x0, ttbr1_l0table
 	mov	x1, #AARCH64_KSEG_START
-	adr	x2, ttbr1_l1table_kseg
+	ADDR	x2, ttbr1_l1table_kseg
 	bl	l0_settable
 
 	/* KSEG blocks */
-	adr	x0, ttbr1_l1table_kseg
+	ADDR	x0, ttbr1_l1table_kseg
 	mov	x1, #AARCH64_KSEG_START
 	mov	x2, #0
 	mov	x3, #L2_BLKPAG_ATTR_NORMAL_WB
@@ -394,25 +620,25 @@ arm_boot_l0pt_init:
 
 	VERBOSE("Creating KVA=PA tables\r\n")
 	/* KVA=PA table for L0 */
-	adr	x0, ttbr1_l0table
+	ADDR	x0, ttbr1_l0table
 	mov	x1, #VM_MIN_KERNEL_ADDRESS
-	adr	x2, ttbr1_l1table_kva
+	ADDR	x2, ttbr1_l1table_kva
 	bl	l0_settable
 
 	/* KVA=PA table for L1 */
-	adr	x0, ttbr1_l1table_kva
+	ADDR	x0, ttbr1_l1table_kva
 	mov	x1, #VM_MIN_KERNEL_ADDRESS
-	adr	x2, ttbr1_l2table_kva
+	ADDR	x2, ttbr1_l2table_kva
 	bl	l1_settable
 
 	/* KVA=PA blocks */
-	adr	x0, ttbr1_l2table_kva
+	ADDR	x0, ttbr1_l2table_kva
 	adr	x2, start		/* physical addr. before MMU */
 	and	x2, x2, #L2_BLK_OA	/* L2 block size aligned (2MB) */
 	mov	x1, #VM_MIN_KERNEL_ADDRESS
 	mov	x3, #L2_BLKPAG_ATTR_NORMAL_WB
 
-	/* kernelsize = _end - _start */
+	/* kernelsize = _end - start */
 	ldr	x1, =start
 	ldr	x4, =_end
 	sub	x4, x4, x1
@@ -425,9 +651,10 @@ arm_boot_l0pt_init:
 
 	VERBOSE("Creating devmap tables\r\n")
 	/* devmap=PA table for L1 */
-	adr	x0, ttbr1_l1table_kva
+nop
+	ADDR	x0, ttbr1_l1table_kva
 	ldr	x1, .L_devmap_addr
-	adr	x2, ttbr1_l2table_devmap
+	ADDR	x2, ttbr1_l2table_devmap
 	bl	l1_settable
 
 	ldp	x0, lr, [sp], #16
@@ -592,9 +819,9 @@ mmu_disable:
 mmu_enable:
 	dsb	sy
 
-	adr	x0, ttbr0_l0table
+	ADDR	x0, ttbr0_l0table
 	msr	ttbr0_el1, x0
-	adr	x0, ttbr1_l0table
+	ADDR	x0, ttbr1_l0table
 	msr	ttbr1_el1, x0
 	isb
 
@@ -720,16 +947,21 @@ sctlr_clear:
 	    0)
 
 
-	/*
-	 * XXXAARCH64: kernel segment is writable
-	 */
+	.bss
+
 	.align PGSHIFT
 	.global _C_LABEL(lwp0uspace)
 _C_LABEL(lwp0uspace):
 	.space	UPAGES * PAGE_SIZE
+bootstk:
 
-	.align PGSHIFT
+#ifdef MULTIPROCESSOR
+	.space	BOOT_STACKSIZE * (MAXCPUS - 1)
+bootstk_cpus:
+#endif
 
+
+	.align PGSHIFT
 mmutables_start:
 /*
  * PA == VA mapping using L1 1G block (whole 32bit)

Index: src/sys/arch/aarch64/include/cpu.h
diff -u src/sys/arch/aarch64/include/cpu.h:1.2 src/sys/arch/aarch64/include/cpu.h:1.3
--- src/sys/arch/aarch64/include/cpu.h:1.2	Sun Apr  1 04:35:03 2018
+++ src/sys/arch/aarch64/include/cpu.h	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.2 2018/04/01 04:35:03 ryo Exp $ */
+/* $NetBSD: cpu.h,v 1.3 2018/07/09 06:19:53 ryo Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -95,23 +95,25 @@ void cpu_set_curpri(int);
 void cpu_proc_fork(struct proc *, struct proc *);
 void cpu_need_proftick(struct lwp *l);
 void cpu_boot_secondary_processors(void);
+void cpu_hatch(struct cpu_info *);
 
 extern struct cpu_info *cpu_info[];
-extern struct cpu_info cpu_info_store;	/* MULTIPROCESSOR */
 extern volatile u_int arm_cpu_hatched;	/* MULTIPROCESSOR */
+extern uint32_t cpus_midr[];		/* MULTIPROCESSOR */
+extern uint64_t cpus_mpidr[];		/* MULTIPROCESSOR */
 
 #define CPU_INFO_ITERATOR	cpuid_t
 #ifdef MULTIPROCESSOR
 #define cpu_number()		(curcpu()->ci_index)
 #define CPU_IS_PRIMARY(ci)	((ci)->ci_index == 0)
-#define CPU_INFO_FOREACH(cii, ci)				\
-	cii = 0, ci = cpu_info[0];				\
-	cii < ncpu && (ci = cpu_info[cii]) != NULL;		\
+#define CPU_INFO_FOREACH(cii, ci)					\
+	cii = 0, ci = cpu_info[0];					\
+	cii < (ncpu ? ncpu : 1) && (ci = cpu_info[cii]) != NULL;	\
 	cii++
 #else /* MULTIPROCESSOR */
 #define cpu_number()		0
 #define CPU_IS_PRIMARY(ci)	true
-#define CPU_INFO_FOREACH(cii, ci)				\
+#define CPU_INFO_FOREACH(cii, ci)					\
 	cii = 0, __USE(cii), ci = curcpu(); ci != NULL; ci = NULL
 #endif /* MULTIPROCESSOR */
 

Index: src/sys/arch/aarch64/include/locore.h
diff -u src/sys/arch/aarch64/include/locore.h:1.3 src/sys/arch/aarch64/include/locore.h:1.4
--- src/sys/arch/aarch64/include/locore.h:1.3	Sun Apr  1 04:35:03 2018
+++ src/sys/arch/aarch64/include/locore.h	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.h,v 1.3 2018/04/01 04:35:03 ryo Exp $ */
+/* $NetBSD: locore.h,v 1.4 2018/07/09 06:19:53 ryo Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -51,11 +51,6 @@
 
 #include <aarch64/armreg.h>
 
-#ifdef MULTIPROCESSOR
-/* for compatibility arch/arm/pic/pic.c */
-extern u_int arm_cpu_max;
-#endif
-
 /* for compatibility arch/arm */
 #define I32_bit			DAIF_I
 #define F32_bit			DAIF_F

Index: src/sys/arch/aarch64/include/machdep.h
diff -u src/sys/arch/aarch64/include/machdep.h:1.1 src/sys/arch/aarch64/include/machdep.h:1.2
--- src/sys/arch/aarch64/include/machdep.h:1.1	Sun Apr  1 04:35:03 2018
+++ src/sys/arch/aarch64/include/machdep.h	Mon Jul  9 06:19:53 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.h,v 1.1 2018/04/01 04:35:03 ryo Exp $	*/
+/*	$NetBSD: machdep.h,v 1.2 2018/07/09 06:19:53 ryo Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -65,6 +65,10 @@ extern void (*cpu_powerdown_address)(voi
 
 extern char *booted_kernel;
 
+#ifdef MULTIPROCESSOR
+extern u_int arm_cpu_max;
+#endif
+
 vaddr_t initarm_common(vaddr_t, vsize_t, const struct boot_physmem *, size_t);
 
 void parse_mi_bootargs(char *);

Reply via email to