Module Name: src Committed By: ryo Date: Sun Aug 26 18:15:50 UTC 2018
Modified Files: src/sys/arch/aarch64/aarch64: aarch64_machdep.c cpu.c cpufunc.c genassym.cf locore.S src/sys/arch/aarch64/include: cpu.h cpufunc.h src/sys/arch/arm/broadcom: bcm283x_platform.c src/sys/arch/arm/fdt: cpu_fdt.c psci_fdt.c Log Message: add support multiple cpu clusters. * pass cpu index as an argument to secondary processors when hatching. * keep cpu cache confituration per cpu clusters. Hello big.LITTLE! To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/arch/aarch64/aarch64/aarch64_machdep.c cvs rdiff -u -r1.5 -r1.6 src/sys/arch/aarch64/aarch64/cpu.c cvs rdiff -u -r1.2 -r1.3 src/sys/arch/aarch64/aarch64/cpufunc.c cvs rdiff -u -r1.6 -r1.7 src/sys/arch/aarch64/aarch64/genassym.cf cvs rdiff -u -r1.19 -r1.20 src/sys/arch/aarch64/aarch64/locore.S cvs rdiff -u -r1.6 -r1.7 src/sys/arch/aarch64/include/cpu.h cvs rdiff -u -r1.2 -r1.3 src/sys/arch/aarch64/include/cpufunc.h cvs rdiff -u -r1.14 -r1.15 src/sys/arch/arm/broadcom/bcm283x_platform.c cvs rdiff -u -r1.8 -r1.9 src/sys/arch/arm/fdt/cpu_fdt.c cvs rdiff -u -r1.14 -r1.15 src/sys/arch/arm/fdt/psci_fdt.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/aarch64/aarch64_machdep.c diff -u src/sys/arch/aarch64/aarch64/aarch64_machdep.c:1.10 src/sys/arch/aarch64/aarch64/aarch64_machdep.c:1.11 --- src/sys/arch/aarch64/aarch64/aarch64_machdep.c:1.10 Fri Aug 24 01:59:40 2018 +++ src/sys/arch/aarch64/aarch64/aarch64_machdep.c Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: aarch64_machdep.c,v 1.10 2018/08/24 01:59:40 jmcneill Exp $ */ +/* $NetBSD: aarch64_machdep.c,v 1.11 2018/08/26 18:15:49 ryo Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(1, "$NetBSD: aarch64_machdep.c,v 1.10 2018/08/24 01:59:40 jmcneill Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aarch64_machdep.c,v 1.11 2018/08/26 18:15:49 ryo Exp $"); #include "opt_arm_debug.h" #include "opt_ddb.h" @@ -168,8 +168,6 @@ initarm_common(vaddr_t kvm_base, vsize_t vaddr_t kernelvmstart; int i; - aarch64_getcacheinfo(); - cputype = cpu_idnum(); /* for compatible arm */ kernstart = trunc_page((vaddr_t)__kernel_text); Index: src/sys/arch/aarch64/aarch64/cpu.c diff -u src/sys/arch/aarch64/aarch64/cpu.c:1.5 src/sys/arch/aarch64/aarch64/cpu.c:1.6 --- src/sys/arch/aarch64/aarch64/cpu.c:1.5 Mon Aug 20 18:13:56 2018 +++ src/sys/arch/aarch64/aarch64/cpu.c Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.5 2018/08/20 18:13:56 jmcneill Exp $ */ +/* $NetBSD: cpu.c,v 1.6 2018/08/26 18:15:49 ryo Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.5 2018/08/20 18:13:56 jmcneill Exp $"); +__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.6 2018/08/26 18:15:49 ryo Exp $"); #include "locators.h" #include "opt_arm_debug.h" @@ -58,70 +58,72 @@ __KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.5 void cpu_attach(device_t, cpuid_t); static void identify_aarch64_model(uint32_t, char *, size_t); -static void cpu_identify(device_t self, struct cpu_info *, uint32_t, uint64_t); +static void cpu_identify(device_t self, struct cpu_info *); static void cpu_identify1(device_t self, struct cpu_info *); static void cpu_identify2(device_t self, struct cpu_info *); #ifdef MULTIPROCESSOR volatile u_int arm_cpu_hatched __cacheline_aligned = 0; +volatile u_int arm_cpu_hatch_arg __cacheline_aligned; volatile uint32_t arm_cpu_mbox __cacheline_aligned = 0; u_int arm_cpu_max = 1; -/* stored by secondary processors (available when arm_cpu_hatched) */ -uint32_t cpus_midr[MAXCPUS]; -uint64_t cpus_mpidr[MAXCPUS]; - static kmutex_t cpu_hatch_lock; #endif /* MULTIPROCESSOR */ -/* Our exported CPU info; we can have only one. */ -struct cpu_info cpu_info_store __cacheline_aligned = { - .ci_cpl = IPL_HIGH, - .ci_curlwp = &lwp0 -}; - #ifdef MULTIPROCESSOR #define NCPUINFO MAXCPUS #else #define NCPUINFO 1 #endif /* MULTIPROCESSOR */ -struct cpu_info *cpu_info[NCPUINFO] = { - [0] = &cpu_info_store +/* + * Our exported CPU info; + * these will be refered from secondary cpus in the middle of hatching. + */ +struct cpu_info cpu_info_store[NCPUINFO] = { + [0] = { + .ci_cpl = IPL_HIGH, + .ci_curlwp = &lwp0 + } +}; + +struct cpu_info *cpu_info[NCPUINFO] __read_mostly = { + [0] = &cpu_info_store[0] }; void cpu_attach(device_t dv, cpuid_t id) { struct cpu_info *ci; + const int unit = device_unit(dv); uint64_t mpidr; - uint32_t midr; - if (id == 0) { + if (unit == 0) { ci = curcpu(); - midr = reg_midr_el1_read(); - mpidr = reg_mpidr_el1_read(); + ci->ci_cpuid = id; + cpu_info_store[unit].ci_midr = reg_midr_el1_read(); + cpu_info_store[unit].ci_mpidr = reg_mpidr_el1_read(); } else { #ifdef MULTIPROCESSOR - KASSERT(cpu_info[id] == NULL); - ci = kmem_zalloc(sizeof(*ci), KM_SLEEP); + KASSERT(unit < MAXCPUS); + ci = &cpu_info_store[unit]; + ci->ci_cpl = IPL_HIGH; ci->ci_cpuid = id; + ci->ci_data.cpu_cc_freq = cpu_info_store[0].ci_data.cpu_cc_freq; + /* ci_{midr,mpidr} are stored by own cpus when hatching */ - ci->ci_data.cpu_cc_freq = cpu_info[0]->ci_data.cpu_cc_freq; - cpu_info[ci->ci_cpuid] = ci; - if ((arm_cpu_hatched & (1 << id)) == 0) { + cpu_info[ncpu] = ci; + if ((arm_cpu_hatched & __BIT(unit)) == 0) { ci->ci_dev = dv; dv->dv_private = ci; + ci->ci_index = -1; aprint_naive(": disabled\n"); aprint_normal(": disabled (unresponsive)\n"); return; } - - /* cpus_{midr,mpidr}[id] is stored by secondary processor */ - midr = cpus_midr[id]; - mpidr = cpus_mpidr[id]; #else /* MULTIPROCESSOR */ aprint_naive(": disabled\n"); aprint_normal(": disabled (uniprocessor kernel)\n"); @@ -129,6 +131,7 @@ cpu_attach(device_t dv, cpuid_t id) #endif /* MULTIPROCESSOR */ } + mpidr = ci->ci_mpidr; if (mpidr & MPIDR_MT) { ci->ci_data.cpu_smt_id = __SHIFTOUT(mpidr, MPIDR_AFF0); ci->ci_data.cpu_core_id = __SHIFTOUT(mpidr, MPIDR_AFF1); @@ -141,9 +144,9 @@ cpu_attach(device_t dv, cpuid_t id) ci->ci_dev = dv; dv->dv_private = ci; - cpu_identify(ci->ci_dev, ci, midr, mpidr); + cpu_identify(ci->ci_dev, ci); #ifdef MULTIPROCESSOR - if (id != 0) { + if (unit != 0) { mi_cpu_attach(ci); return; } @@ -152,6 +155,8 @@ cpu_attach(device_t dv, cpuid_t id) fpu_attach(ci); cpu_identify1(dv, ci); + aarch64_getcacheinfo(); + aarch64_printcacheinfo(dv); cpu_identify2(dv, ci); } @@ -196,91 +201,13 @@ identify_aarch64_model(uint32_t cpuid, c snprintf(buf, len, "unknown CPU (ID = 0x%08x)", cpuid); } -static int -prt_cache(device_t self, int level) -{ - struct aarch64_cache_info *cinfo; - struct aarch64_cache_unit *cunit; - u_int purging; - int i; - const char *cacheable, *cachetype; - - cinfo = &aarch64_cache_info[level]; - - if (cinfo->cacheable == CACHE_CACHEABLE_NONE) - return -1; - - for (i = 0; i < 2; i++) { - switch (cinfo->cacheable) { - case CACHE_CACHEABLE_ICACHE: - cunit = &cinfo->icache; - cacheable = "Instruction"; - break; - case CACHE_CACHEABLE_DCACHE: - cunit = &cinfo->dcache; - cacheable = "Data"; - break; - case CACHE_CACHEABLE_IDCACHE: - if (i == 0) { - cunit = &cinfo->icache; - cacheable = "Instruction"; - } else { - cunit = &cinfo->dcache; - cacheable = "Data"; - } - break; - case CACHE_CACHEABLE_UNIFIED: - cunit = &cinfo->dcache; - cacheable = "Unified"; - break; - default: - cunit = &cinfo->dcache; - cacheable = "*UNK*"; - break; - } - - switch (cunit->cache_type) { - case CACHE_TYPE_VIVT: - cachetype = "VIVT"; - break; - case CACHE_TYPE_VIPT: - cachetype = "VIPT"; - break; - case CACHE_TYPE_PIPT: - cachetype = "PIPT"; - break; - default: - cachetype = "*UNK*"; - break; - } - - purging = cunit->cache_purging; - aprint_normal_dev(self, - "L%d %dKB/%dB %d-way%s%s%s%s %s %s cache\n", - level + 1, - cunit->cache_size / 1024, - cunit->cache_line_size, - cunit->cache_ways, - (purging & CACHE_PURGING_WT) ? " write-through" : "", - (purging & CACHE_PURGING_WB) ? " write-back" : "", - (purging & CACHE_PURGING_RA) ? " read-allocate" : "", - (purging & CACHE_PURGING_WA) ? " write-allocate" : "", - cachetype, cacheable); - - if (cinfo->cacheable != CACHE_CACHEABLE_IDCACHE) - break; - } - - return 0; -} - static void -cpu_identify(device_t self, struct cpu_info *ci, uint32_t midr, uint64_t mpidr) +cpu_identify(device_t self, struct cpu_info *ci) { char model[128]; - identify_aarch64_model(midr, model, sizeof(model)); - if (ci->ci_cpuid == 0) + identify_aarch64_model(ci->ci_midr, model, sizeof(model)); + if (ci->ci_index == 0) cpu_setmodel("%s", model); aprint_naive("\n"); @@ -292,7 +219,6 @@ cpu_identify(device_t self, struct cpu_i static void cpu_identify1(device_t self, struct cpu_info *ci) { - int level; uint32_t ctr, sctlr; /* for cache */ /* SCTLR - System Control Register */ @@ -339,11 +265,6 @@ cpu_identify1(device_t self, struct cpu_ aprint_normal_dev(self, "Dcache line %ld, Icache line %ld\n", sizeof(int) << __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE), sizeof(int) << __SHIFTOUT(ctr, CTR_EL0_IMIN_LINE)); - - for (level = 0; level < MAX_CACHE_LEVEL; level++) { - if (prt_cache(self, level) < 0) - break; - } } @@ -508,7 +429,7 @@ cpu_boot_secondary_processors(void) __asm __volatile ("sev; sev; sev"); /* wait all cpus have done cpu_hatch() */ - while (arm_cpu_mbox) { + while (membar_consumer(), arm_cpu_mbox & arm_cpu_hatched) { __asm __volatile ("wfe"); } @@ -531,6 +452,8 @@ cpu_hatch(struct cpu_info *ci) fpu_attach(ci); cpu_identify1(ci->ci_dev, ci); + aarch64_getcacheinfo(); + aarch64_printcacheinfo(ci->ci_dev); cpu_identify2(ci->ci_dev, ci); mutex_exit(&cpu_hatch_lock); @@ -544,8 +467,13 @@ cpu_hatch(struct cpu_info *ci) MD_CPU_HATCH(ci); /* for non-fdt arch? */ #endif - /* clear my bit of arm_cpu_mbox to tell cpu_boot_secondary_processors() */ - atomic_and_32(&arm_cpu_mbox, ~(1 << ci->ci_cpuid)); + /* + * clear my bit of arm_cpu_mbox to tell cpu_boot_secondary_processors(). + * there are cpu0,1,2,3, and if cpu2 is unresponsive, + * ci_index are each cpu0=0, cpu1=1, cpu2=undef, cpu3=2. + * therefore we have to use device_unit instead of ci_index for mbox. + */ + atomic_and_32(&arm_cpu_mbox, ~__BIT(device_unit(ci->ci_dev))); __asm __volatile ("sev; sev; sev"); } #endif /* MULTIPROCESSOR */ Index: src/sys/arch/aarch64/aarch64/cpufunc.c diff -u src/sys/arch/aarch64/aarch64/cpufunc.c:1.2 src/sys/arch/aarch64/aarch64/cpufunc.c:1.3 --- src/sys/arch/aarch64/aarch64/cpufunc.c:1.2 Tue Jul 17 00:30:34 2018 +++ src/sys/arch/aarch64/aarch64/cpufunc.c Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.c,v 1.2 2018/07/17 00:30:34 christos Exp $ */ +/* $NetBSD: cpufunc.c,v 1.3 2018/08/26 18:15:49 ryo Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -27,27 +27,31 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.2 2018/07/17 00:30:34 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.3 2018/08/26 18:15:49 ryo Exp $"); -#include <sys/types.h> #include <sys/param.h> -#include <sys/systm.h> -#include <aarch64/armreg.h> +#include <sys/types.h> +#include <sys/kmem.h> + +#include <aarch64/cpu.h> #include <aarch64/cpufunc.h> -u_int cputype; /* compat arm */ +u_int cputype; /* compat arm */ +u_int arm_dcache_align; /* compat arm */ +u_int arm_dcache_align_mask; /* compat arm */ +u_int arm_dcache_maxline; -/* L1-L8 cache info */ -struct aarch64_cache_info aarch64_cache_info[MAX_CACHE_LEVEL]; u_int aarch64_cache_vindexsize; u_int aarch64_cache_prefer_mask; -u_int arm_dcache_minline; -u_int arm_dcache_align; -u_int arm_dcache_align_mask; +/* cache info per cluster. the same cluster has the same cache configuration? */ +#define MAXCPUPACKAGES MAXCPUS /* maximum of ci->ci_package_id */ +static struct aarch64_cache_info *aarch64_cacheinfo[MAXCPUPACKAGES]; + static void -extract_cacheunit(int level, bool insn, int cachetype) +extract_cacheunit(int level, bool insn, int cachetype, + struct aarch64_cache_info *cacheinfo) { struct aarch64_cache_unit *cunit; uint32_t ccsidr; @@ -60,9 +64,9 @@ extract_cacheunit(int level, bool insn, ccsidr = reg_ccsidr_el1_read(); if (insn) - cunit = &aarch64_cache_info[level].icache; + cunit = &cacheinfo[level].icache; else - cunit = &aarch64_cache_info[level].dcache; + cunit = &cacheinfo[level].dcache; cunit->cache_type = cachetype; @@ -81,13 +85,30 @@ extract_cacheunit(int level, bool insn, cunit->cache_purging |= (ccsidr & CCSIDR_WA) ? CACHE_PURGING_WA : 0; } -int +void aarch64_getcacheinfo(void) { uint32_t clidr, ctr; int level, cachetype; + struct aarch64_cache_info *cinfo; + + if (cputype == 0) + cputype = aarch64_cpuid(); + + /* already extract about this cluster? */ + KASSERT(curcpu()->ci_package_id < MAXCPUPACKAGES); + cinfo = aarch64_cacheinfo[curcpu()->ci_package_id]; + if (cinfo != NULL) { + curcpu()->ci_cacheinfo = cinfo; + return; + } + + cinfo = aarch64_cacheinfo[curcpu()->ci_package_id] = + kmem_zalloc(sizeof(struct aarch64_cache_info) * MAX_CACHE_LEVEL, + KM_NOSLEEP); + KASSERT(cinfo != NULL); + curcpu()->ci_cacheinfo = cinfo; - cputype = aarch64_cpuid(); /* * CTR - Cache Type Register @@ -108,9 +129,12 @@ aarch64_getcacheinfo(void) break; } - arm_dcache_minline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE); - arm_dcache_align = sizeof(int) << arm_dcache_minline; - arm_dcache_align_mask = arm_dcache_align - 1; + /* remember maximum alignment */ + if (arm_dcache_maxline < __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE)) { + arm_dcache_maxline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE); + arm_dcache_align = sizeof(int) << arm_dcache_maxline; + arm_dcache_align_mask = arm_dcache_align - 1; + } /* * CLIDR - Cache Level ID Register @@ -130,27 +154,27 @@ aarch64_getcacheinfo(void) break; case CLIDR_TYPE_ICACHE: cacheable = CACHE_CACHEABLE_ICACHE; - extract_cacheunit(level, true, cachetype); + extract_cacheunit(level, true, cachetype, cinfo); break; case CLIDR_TYPE_DCACHE: cacheable = CACHE_CACHEABLE_DCACHE; - extract_cacheunit(level, false, CACHE_TYPE_PIPT); + extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); break; case CLIDR_TYPE_IDCACHE: cacheable = CACHE_CACHEABLE_IDCACHE; - extract_cacheunit(level, true, cachetype); - extract_cacheunit(level, false, CACHE_TYPE_PIPT); + extract_cacheunit(level, true, cachetype, cinfo); + extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); break; case CLIDR_TYPE_UNIFIEDCACHE: cacheable = CACHE_CACHEABLE_UNIFIED; - extract_cacheunit(level, false, CACHE_TYPE_PIPT); + extract_cacheunit(level, false, CACHE_TYPE_PIPT, cinfo); break; default: cacheable = CACHE_CACHEABLE_NONE; break; } - aarch64_cache_info[level].cacheable = cacheable; + cinfo[level].cacheable = cacheable; if (cacheable == CACHE_CACHEABLE_NONE) { /* no more level */ break; @@ -164,24 +188,112 @@ aarch64_getcacheinfo(void) } /* calculate L1 icache virtual index size */ - if (((aarch64_cache_info[0].icache.cache_type == CACHE_TYPE_VIVT) || - (aarch64_cache_info[0].icache.cache_type == CACHE_TYPE_VIPT)) && - ((aarch64_cache_info[0].cacheable == CACHE_CACHEABLE_ICACHE) || - (aarch64_cache_info[0].cacheable == CACHE_CACHEABLE_IDCACHE))) { + if (((cinfo[0].icache.cache_type == CACHE_TYPE_VIVT) || + (cinfo[0].icache.cache_type == CACHE_TYPE_VIPT)) && + ((cinfo[0].cacheable == CACHE_CACHEABLE_ICACHE) || + (cinfo[0].cacheable == CACHE_CACHEABLE_IDCACHE))) { aarch64_cache_vindexsize = - aarch64_cache_info[0].icache.cache_size / - aarch64_cache_info[0].icache.cache_ways; + cinfo[0].icache.cache_size / + cinfo[0].icache.cache_ways; KASSERT(aarch64_cache_vindexsize != 0); aarch64_cache_prefer_mask = aarch64_cache_vindexsize - 1; } else { aarch64_cache_vindexsize = 0; } +} + +static int +prt_cache(device_t self, struct aarch64_cache_info *cinfo, int level) +{ + struct aarch64_cache_unit *cunit; + u_int purging; + int i; + const char *cacheable, *cachetype; + + if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) + return -1; + + for (i = 0; i < 2; i++) { + switch (cinfo[level].cacheable) { + case CACHE_CACHEABLE_ICACHE: + cunit = &cinfo[level].icache; + cacheable = "Instruction"; + break; + case CACHE_CACHEABLE_DCACHE: + cunit = &cinfo[level].dcache; + cacheable = "Data"; + break; + case CACHE_CACHEABLE_IDCACHE: + if (i == 0) { + cunit = &cinfo[level].icache; + cacheable = "Instruction"; + } else { + cunit = &cinfo[level].dcache; + cacheable = "Data"; + } + break; + case CACHE_CACHEABLE_UNIFIED: + cunit = &cinfo[level].dcache; + cacheable = "Unified"; + break; + default: + cunit = &cinfo[level].dcache; + cacheable = "*UNK*"; + break; + } + + switch (cunit->cache_type) { + case CACHE_TYPE_VIVT: + cachetype = "VIVT"; + break; + case CACHE_TYPE_VIPT: + cachetype = "VIPT"; + break; + case CACHE_TYPE_PIPT: + cachetype = "PIPT"; + break; + default: + cachetype = "*UNK*"; + break; + } + + purging = cunit->cache_purging; + aprint_normal_dev(self, + "L%d %dKB/%dB %d-way%s%s%s%s %s %s cache\n", + level + 1, + cunit->cache_size / 1024, + cunit->cache_line_size, + cunit->cache_ways, + (purging & CACHE_PURGING_WT) ? " write-through" : "", + (purging & CACHE_PURGING_WB) ? " write-back" : "", + (purging & CACHE_PURGING_RA) ? " read-allocate" : "", + (purging & CACHE_PURGING_WA) ? " write-allocate" : "", + cachetype, cacheable); + + if (cinfo[level].cacheable != CACHE_CACHEABLE_IDCACHE) + break; + } return 0; } +void +aarch64_printcacheinfo(device_t dev) +{ + struct aarch64_cache_info *cinfo; + int level; + + cinfo = curcpu()->ci_cacheinfo; + + for (level = 0; level < MAX_CACHE_LEVEL; level++) + if (prt_cache(dev, cinfo, level) < 0) + break; +} + + + static inline void ln_dcache_wb_all(int level, struct aarch64_cache_unit *cunit) { @@ -239,14 +351,17 @@ ln_dcache_inv_all(int level, struct aarc void aarch64_dcache_wbinv_all(void) { + struct aarch64_cache_info *cinfo; int level; + cinfo = curcpu()->ci_cacheinfo; + for (level = 0; level < MAX_CACHE_LEVEL; level++) { - if (aarch64_cache_info[level].cacheable == CACHE_CACHEABLE_NONE) + if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) break; __asm __volatile ("dsb ish"); - ln_dcache_wbinv_all(level, &aarch64_cache_info[level].dcache); + ln_dcache_wbinv_all(level, &cinfo[level].dcache); } __asm __volatile ("dsb ish"); } @@ -254,14 +369,17 @@ aarch64_dcache_wbinv_all(void) void aarch64_dcache_inv_all(void) { + struct aarch64_cache_info *cinfo; int level; + cinfo = curcpu()->ci_cacheinfo; + for (level = 0; level < MAX_CACHE_LEVEL; level++) { - if (aarch64_cache_info[level].cacheable == CACHE_CACHEABLE_NONE) + if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) break; __asm __volatile ("dsb ish"); - ln_dcache_inv_all(level, &aarch64_cache_info[level].dcache); + ln_dcache_inv_all(level, &cinfo[level].dcache); } __asm __volatile ("dsb ish"); } @@ -269,14 +387,17 @@ aarch64_dcache_inv_all(void) void aarch64_dcache_wb_all(void) { + struct aarch64_cache_info *cinfo; int level; + cinfo = curcpu()->ci_cacheinfo; + for (level = 0; level < MAX_CACHE_LEVEL; level++) { - if (aarch64_cache_info[level].cacheable == CACHE_CACHEABLE_NONE) + if (cinfo[level].cacheable == CACHE_CACHEABLE_NONE) break; __asm __volatile ("dsb ish"); - ln_dcache_wb_all(level, &aarch64_cache_info[level].dcache); + ln_dcache_wb_all(level, &cinfo[level].dcache); } __asm __volatile ("dsb ish"); } Index: src/sys/arch/aarch64/aarch64/genassym.cf diff -u src/sys/arch/aarch64/aarch64/genassym.cf:1.6 src/sys/arch/aarch64/aarch64/genassym.cf:1.7 --- src/sys/arch/aarch64/aarch64/genassym.cf:1.6 Fri Aug 3 16:32:55 2018 +++ src/sys/arch/aarch64/aarch64/genassym.cf Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.6 2018/08/03 16:32:55 ryo Exp $ +# $NetBSD: genassym.cf,v 1.7 2018/08/26 18:15:49 ryo Exp $ #- # Copyright (c) 2014 The NetBSD Foundation, Inc. # All rights reserved. @@ -299,10 +299,13 @@ define FPREG_Q31 offsetof(struct fpreg, define FPREG_FPCR offsetof(struct fpreg, fpcr) define FPREG_FPSR offsetof(struct fpreg, fpsr) +define CPU_INFO_SIZE sizeof(struct cpu_info) define CI_CURPRIORITY offsetof(struct cpu_info, ci_schedstate.spc_curpriority) define CI_CURLWP offsetof(struct cpu_info, ci_curlwp) define CI_CPL offsetof(struct cpu_info, ci_cpl) define CI_CPUID offsetof(struct cpu_info, ci_cpuid) +define CI_MIDR offsetof(struct cpu_info, ci_midr) +define CI_MPIDR offsetof(struct cpu_info, ci_mpidr) define CI_ASTPENDING offsetof(struct cpu_info, ci_astpending) define CI_WANT_RESCHED offsetof(struct cpu_info, ci_want_resched) define CI_INTR_DEPTH offsetof(struct cpu_info, ci_intr_depth) Index: src/sys/arch/aarch64/aarch64/locore.S diff -u src/sys/arch/aarch64/aarch64/locore.S:1.19 src/sys/arch/aarch64/aarch64/locore.S:1.20 --- src/sys/arch/aarch64/aarch64/locore.S:1.19 Fri Aug 24 19:06:30 2018 +++ src/sys/arch/aarch64/aarch64/locore.S Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.19 2018/08/24 19:06:30 ryo Exp $ */ +/* $NetBSD: locore.S,v 1.20 2018/08/26 18:15:49 ryo Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -35,7 +35,7 @@ #include <aarch64/hypervisor.h> #include "assym.h" -RCSID("$NetBSD: locore.S,v 1.19 2018/08/24 19:06:30 ryo Exp $") +RCSID("$NetBSD: locore.S,v 1.20 2018/08/26 18:15:49 ryo Exp $") /* #define DEBUG_LOCORE */ /* #define DEBUG_MMU */ @@ -241,14 +241,14 @@ END(aarch64_start) #if defined(VERBOSE_LOCORE) || defined(DEBUG_LOCORE) /* - * print "[CPU$x27] " (x27 as cpuid) + * print "[CPU$x27] " (x27 as cpuindex) * XXX: max 4 digit */ printcpu: stp x0, lr, [sp, #-16]! stp x25, x26, [sp, #-16]! PRINT("[CPU") - mov x26, x27 /* n = cpuid */ + mov x26, x27 /* n = cpuindex */ mov x25, xzr /* zeropad = 0 */ mov x1, #1000 udiv x0, x26, x1 /* x0 = n / 1000 */ @@ -294,25 +294,21 @@ printcpu: ENTRY_NP(aarch64_mpstart) ENTRY_NP(cortex_mpstart) /* compat arm */ - /* - * XXX: - * cpuid(index) is read from MPIDR_EL1.AFF0. AFF1,2,3 are ignored. - * cpuid should be passed from primary processor... - */ - mrs x27, mpidr_el1 - and x27, x27, #MPIDR_AFF0 /* XXX: cpuid = mpidr_el1 & Aff0 */ + ADDR x0, arm_cpu_hatch_arg /* from cpu0 */ + ldr w27, [x0] /* x27 = cpuindex */ mov x0, #1 - lsl x28, x0, x27 /* x28 = 1 << cpuid */ - mov x0, x28 + lsl x28, x0, x27 /* x28 = 1 << cpuindex */ - /* x27 = cpuid, x28 = (1 << cpuid) */ + /* x27 = cpuindex, x28 = (1 << cpuindex) */ + cmp x27, MAXCPUS + bge toomanycpus /* set stack pointer for boot */ #define BOOT_STACKSIZE 256 mov x1, #BOOT_STACKSIZE mul x1, x1, x27 ADDR x0, bootstk_cpus - sub sp, x0, x1 /* sp = bootstk_cpus - BOOT_STACKSIZE * cpuid */ + sub sp, x0, x1 /* sp= bootstk_cpus-(BOOT_STACKSIZE*cpuindex) */ #ifdef DEBUG_LOCORE PRINTCPU() @@ -384,7 +380,7 @@ mp_vstart: PRINTCPU() PRINT("arm_cpu_hatched = ") ADDR x0, _C_LABEL(arm_cpu_hatched) - ldr x0, [x0] + ldr w0, [x0] bl print_x0 PRINTCPU() @@ -393,17 +389,22 @@ mp_vstart: bl print_x0 #endif - ADDR x0, _C_LABEL(cpus_midr) - mrs x1, midr_el1 - str w1, [x0, x27, lsl #2] /* cpu_midr[cpuid] = midr_el1 */ + msr tpidr_el0, xzr /* tpidr_el0 (for TLS) = NULL */ - ADDR x0, _C_LABEL(cpus_mpidr) - mrs x1, mpidr_el1 - str x1, [x0, x27, lsl #3] /* cpu_mpidr[cpuid] = mpidr_el1 */ + /* set curcpu(), and fill curcpu()->ci_{midr,mpidr} */ + mov x0, #CPU_INFO_SIZE + mul x0, x27, x0 + ADDR x1, _C_LABEL(cpu_info_store) + add x0, x0, x1 /* x0 = &cpu_info_store[cpuindex] */ + msr tpidr_el1, x0 /* tpidr_el1 = curcpu() = x0 */ + mrs x1, midr_el1 + str x1, [x0, #CI_MIDR] /* curcpu()->ci_cpuid = midr_el1 */ + mrs x1, mpidr_el1 + str x1, [x0, #CI_MPIDR] /* curcpu()->ci_mpidr = mpidr_el1 */ /* - * atomic_or_32(&arm_cpu_hatched, 1 << cpuid) + * atomic_or_32(&arm_cpu_hatched, (1 << cpuindex)) * to tell my activity to primary processor. */ ADDR x0, _C_LABEL(arm_cpu_hatched) @@ -415,7 +416,7 @@ mp_vstart: PRINTCPU() PRINT("arm_cpu_hatched -> ") ADDR x0, _C_LABEL(arm_cpu_hatched) - ldr x0, [x0] + ldr w0, [x0] bl print_x0 #endif @@ -438,7 +439,7 @@ mp_vstart: #ifdef DEBUG_LOCORE /* XXX: delay to prevent the mixing of console output */ mov x0, #0x4000000 - mul x0, x0, x27 /* delay (cpuid * 0x4000000) */ + mul x0, x0, x27 /* delay (cpuindex * 0x4000000) */ 1: subs x0, x0, #1 bne 1b @@ -452,12 +453,8 @@ mp_vstart: bl print_x0 #endif - msr tpidr_el0, xzr /* tpidr_el0 (for TLS) = NULL */ - /* fill my cpu_info */ - ADDR x0, _C_LABEL(cpu_info) - ldr x0, [x0, x27, lsl #3] /* x0 = cpu_info[cpuid] */ - msr tpidr_el1, x0 /* tpidr_el1 = my cpu_info */ + mrs x0, tpidr_el1 /* curcpu() */ ldr x1, [x0, #CI_IDLELWP] /* x1 = curcpu()->ci_data.cpu_idlelwp */ str x1, [x0, #CI_CURLWP] /* curlwp is idlelwp */ @@ -473,6 +470,13 @@ mp_vstart: b _C_LABEL(idle_loop) /* never to return */ END(aarch64_mpstart) +toomanycpus: + PRINTCPU() + PRINT("too many cpus\r\n") +1: wfi + b 1b + + #else /* MULTIPROCESSOR */ ENTRY_NP(aarch64_mpstart) Index: src/sys/arch/aarch64/include/cpu.h diff -u src/sys/arch/aarch64/include/cpu.h:1.6 src/sys/arch/aarch64/include/cpu.h:1.7 --- src/sys/arch/aarch64/include/cpu.h:1.6 Wed Aug 8 19:01:15 2018 +++ src/sys/arch/aarch64/include/cpu.h Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.6 2018/08/08 19:01:15 jmcneill Exp $ */ +/* $NetBSD: cpu.h,v 1.7 2018/08/26 18:15:49 ryo Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -38,6 +38,8 @@ #include "opt_multiprocessor.h" #endif +#include <sys/param.h> + #if defined(_KERNEL) || defined(_KMEMUSER) #include <sys/evcnt.h> #include <aarch64/frame.h> @@ -82,7 +84,13 @@ struct cpu_info { /* interrupt controller */ u_int ci_gic_redist; /* GICv3 redistributor index */ uint64_t ci_gic_sgir; /* GICv3 SGIR target */ -}; + + uint64_t ci_midr; /* MIDR_EL1 */ + uint64_t ci_mpidr; /* MPIDR_EL1 */ + + struct aarch64_cache_info *ci_cacheinfo; + +} __aligned(COHERENCY_UNIT); static inline struct cpu_info * curcpu(void) @@ -103,8 +111,7 @@ void cpu_hatch(struct cpu_info *); extern struct cpu_info *cpu_info[]; extern volatile u_int arm_cpu_hatched; /* MULTIPROCESSOR */ -extern uint32_t cpus_midr[]; /* MULTIPROCESSOR */ -extern uint64_t cpus_mpidr[]; /* MULTIPROCESSOR */ +extern volatile u_int arm_cpu_hatch_arg;/* MULTIPROCESSOR */ #define CPU_INFO_ITERATOR cpuid_t #ifdef MULTIPROCESSOR Index: src/sys/arch/aarch64/include/cpufunc.h diff -u src/sys/arch/aarch64/include/cpufunc.h:1.2 src/sys/arch/aarch64/include/cpufunc.h:1.3 --- src/sys/arch/aarch64/include/cpufunc.h:1.2 Mon Jul 23 22:51:39 2018 +++ src/sys/arch/aarch64/include/cpufunc.h Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.h,v 1.2 2018/07/23 22:51:39 ryo Exp $ */ +/* $NetBSD: cpufunc.h,v 1.3 2018/08/26 18:15:49 ryo Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -74,7 +74,8 @@ extern u_int aarch64_cache_vindexsize; / extern u_int aarch64_cache_prefer_mask; extern u_int cputype; /* compat arm */ -int aarch64_getcacheinfo(void); +void aarch64_getcacheinfo(void); +void aarch64_printcacheinfo(device_t); void aarch64_dcache_wbinv_all(void); void aarch64_dcache_inv_all(void); Index: src/sys/arch/arm/broadcom/bcm283x_platform.c diff -u src/sys/arch/arm/broadcom/bcm283x_platform.c:1.14 src/sys/arch/arm/broadcom/bcm283x_platform.c:1.15 --- src/sys/arch/arm/broadcom/bcm283x_platform.c:1.14 Sat Aug 25 20:55:15 2018 +++ src/sys/arch/arm/broadcom/bcm283x_platform.c Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: bcm283x_platform.c,v 1.14 2018/08/25 20:55:15 rin Exp $ */ +/* $NetBSD: bcm283x_platform.c,v 1.15 2018/08/26 18:15:49 ryo Exp $ */ /*- * Copyright (c) 2017 Jared D. McNeill <jmcne...@invisible.ca> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: bcm283x_platform.c,v 1.14 2018/08/25 20:55:15 rin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: bcm283x_platform.c,v 1.15 2018/08/26 18:15:49 ryo Exp $"); #include "opt_arm_debug.h" #include "opt_bcm283x.h" @@ -738,12 +738,19 @@ bcm2836_bootstrap(void) #endif #endif /* MULTIPROCESSOR */ -#ifdef __aarch64__ /* - * XXX: use psci_fdt_bootstrap() + * XXX: TODO: + * should make cpu_fdt_bootstrap() that support spin-table and use it + * to share with arm/aarch64. */ +#ifdef __aarch64__ extern void aarch64_mpstart(void); for (int i = 1; i < RPI_CPU_MAX; i++) { + /* argument for mpstart() */ + arm_cpu_hatch_arg = i; + cpu_dcache_wb_range((vaddr_t)&arm_cpu_hatch_arg, + sizeof(arm_cpu_hatch_arg)); + /* * Reference: * armstubs/armstub8.S @@ -753,16 +760,22 @@ bcm2836_bootstrap(void) #define RPI3_ARMSTUB8_SPINADDR_BASE 0x000000d8 cpu_release_addr = (void *) AARCH64_PA_TO_KVA(RPI3_ARMSTUB8_SPINADDR_BASE + i * 8); - *cpu_release_addr = aarch64_kern_vtophys((vaddr_t)aarch64_mpstart); + *cpu_release_addr = + aarch64_kern_vtophys((vaddr_t)aarch64_mpstart); /* need flush cache. secondary processors are cache disabled */ - cpu_dcache_wb_range((vaddr_t)cpu_release_addr, sizeof(cpu_release_addr)); + cpu_dcache_wb_range((vaddr_t)cpu_release_addr, + sizeof(cpu_release_addr)); + /* Wake up AP in case firmware has placed it in WFE state */ __asm __volatile("sev" ::: "memory"); -#if defined(VERBOSE_INIT_ARM) && defined(EARLYCONS) - /* wait secondary processor's debug output */ - gtmr_delay(100000); -#endif + /* Wait for APs to start */ + for (int loop = 0; loop < 16; loop++) { + membar_consumer(); + if (arm_cpu_hatched & __BIT(i)) + break; + gtmr_delay(10000); + } } #endif /* __aarch64__ */ @@ -772,6 +785,7 @@ bcm2836_bootstrap(void) * It is need to initialize the secondary CPU, * and go into wfi loop (cortex_mpstart), * otherwise system would be freeze... + * (because netbsd will use the spinning address) */ extern void cortex_mpstart(void); @@ -782,29 +796,26 @@ bcm2836_bootstrap(void) bus_space_write_4(iot, ioh, BCM2836_LOCAL_MAILBOX3_SETN(i), (uint32_t)cortex_mpstart); + /* Wake up AP in case firmware has placed it in WFE state */ + __asm __volatile("sev" ::: "memory"); + + /* Wait for APs to start */ + for (int loop = 0; loop < 16; loop++) { + membar_consumer(); + if (arm_cpu_hatched & __BIT(i)) + break; + gtmr_delay(10000); + } } #endif #ifdef MULTIPROCESSOR - /* Wake up AP in case firmware has placed it in WFE state */ - __asm __volatile("sev" ::: "memory"); - - for (int loop = 0; loop < 16; loop++) { - if (arm_cpu_hatched == __BITS(arm_cpu_max - 1, 1)) - break; - gtmr_delay(10000); - } - for (size_t i = 1; i < arm_cpu_max; i++) { if ((arm_cpu_hatched & (1 << i)) == 0) { printf("%s: warning: cpu%zu failed to hatch\n", __func__, i); } } -#if defined(VERBOSE_INIT_ARM) && defined(EARLYCONS) - /* for viewability of secondary processor's debug outputs */ - printf("\n"); -#endif #endif } Index: src/sys/arch/arm/fdt/cpu_fdt.c diff -u src/sys/arch/arm/fdt/cpu_fdt.c:1.8 src/sys/arch/arm/fdt/cpu_fdt.c:1.9 --- src/sys/arch/arm/fdt/cpu_fdt.c:1.8 Mon Jul 2 16:36:49 2018 +++ src/sys/arch/arm/fdt/cpu_fdt.c Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_fdt.c,v 1.8 2018/07/02 16:36:49 jmcneill Exp $ */ +/* $NetBSD: cpu_fdt.c,v 1.9 2018/08/26 18:15:49 ryo Exp $ */ /*- * Copyright (c) 2017 Jared McNeill <jmcne...@invisible.ca> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.8 2018/07/02 16:36:49 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.9 2018/08/26 18:15:49 ryo Exp $"); #include <sys/param.h> #include <sys/bus.h> @@ -97,15 +97,17 @@ cpu_fdt_match(device_t parent, cfdata_t switch (type) { case ARM_CPU_ARMV7: case ARM_CPU_ARMV8: - /* XXX NetBSD requires all CPUs to be in the same cluster */ if (fdtbus_get_reg(phandle, 0, &mpidr, NULL) != 0) return 0; +#ifndef __aarch64__ + /* XXX NetBSD/arm requires all CPUs to be in the same cluster */ const u_int bp_clid = cpu_clusterid(); const u_int clid = __SHIFTOUT(mpidr, MPIDR_AFF1); if (bp_clid != clid) return 0; +#endif break; default: break; @@ -136,8 +138,10 @@ cpu_fdt_attach(device_t parent, device_t aprint_error(": missing 'reg' property\n"); return; } - - cpuid = __SHIFTOUT(mpidr, MPIDR_AFF0); +#ifndef __aarch64__ + mpidr = __SHIFTOUT(mpidr, MPIDR_AFF0); +#endif + cpuid = mpidr; break; default: cpuid = 0; Index: src/sys/arch/arm/fdt/psci_fdt.c diff -u src/sys/arch/arm/fdt/psci_fdt.c:1.14 src/sys/arch/arm/fdt/psci_fdt.c:1.15 --- src/sys/arch/arm/fdt/psci_fdt.c:1.14 Fri Aug 24 21:56:13 2018 +++ src/sys/arch/arm/fdt/psci_fdt.c Sun Aug 26 18:15:49 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: psci_fdt.c,v 1.14 2018/08/24 21:56:13 ryo Exp $ */ +/* $NetBSD: psci_fdt.c,v 1.15 2018/08/26 18:15:49 ryo Exp $ */ /*- * Copyright (c) 2017 Jared McNeill <jmcne...@invisible.ca> @@ -29,7 +29,7 @@ #include "opt_multiprocessor.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: psci_fdt.c,v 1.14 2018/08/24 21:56:13 ryo Exp $"); +__KERNEL_RCSID(0, "$NetBSD: psci_fdt.c,v 1.15 2018/08/26 18:15:49 ryo Exp $"); #include <sys/param.h> #include <sys/bus.h> @@ -42,6 +42,7 @@ __KERNEL_RCSID(0, "$NetBSD: psci_fdt.c,v #include <arm/locore.h> #include <arm/armreg.h> +#include <arm/cpufunc.h> #include <arm/arm/psci.h> #include <arm/fdt/psci_fdt.h> @@ -172,8 +173,8 @@ void psci_fdt_bootstrap(void) { #ifdef MULTIPROCESSOR - extern void cortex_mpstart(void); uint64_t mpidr, bp_mpidr; + u_int cpuindex; int child; const char *devtype; @@ -199,7 +200,7 @@ psci_fdt_bootstrap(void) bp_mpidr = cpu_mpidr_aff_read(); /* Boot APs */ - uint32_t started = 0; + cpuindex = 1; for (child = OF_child(cpus); child; child = OF_peer(child)) { if (!fdtbus_status_okay(child)) continue; @@ -208,21 +209,25 @@ psci_fdt_bootstrap(void) if (mpidr == bp_mpidr) continue; /* BP already started */ - /* XXX NetBSD requires all CPUs to be in the same cluster */ - if ((mpidr & ~MPIDR_AFF0) != (bp_mpidr & ~MPIDR_AFF0)) +#ifdef __aarch64__ + /* argument for mpstart() */ + arm_cpu_hatch_arg = cpuindex; + cpu_dcache_wb_range((vaddr_t)&arm_cpu_hatch_arg, + sizeof(arm_cpu_hatch_arg)); +#endif + + int ret = psci_cpu_on(cpuindex, psci_fdt_mpstart_pa(), 0); + if (ret != PSCI_SUCCESS) continue; - const u_int cpuid = __SHIFTOUT(mpidr, MPIDR_AFF0); - int ret = psci_cpu_on(mpidr, psci_fdt_mpstart_pa(), 0); - if (ret == PSCI_SUCCESS) - started |= __BIT(cpuid); - } + /* Wait for APs to start */ + for (u_int i = 0x4000000; i > 0; i--) { + membar_consumer(); + if (arm_cpu_hatched & __BIT(cpuindex)) + break; + } - /* Wait for APs to start */ - for (u_int i = 0x10000000; i > 0; i--) { - membar_consumer(); - if (arm_cpu_hatched == started) - break; + cpuindex++; } #endif }