Hi York, I have mostly minor comments this time; this is looking pretty good.
On Tue, Aug 19, 2014 at 09:28:00PM +0100, York Sun wrote: > Secondary cores need to be released from holdoff by boot release > registers. With GPP bootrom, they can boot from main memory > directly. Individual spin table is used for each core. If a single > release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL > will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved > in device tree so OS won't overwrite. > > Signed-off-by: York Sun <york...@freescale.com> > Signed-off-by: Arnab Basu <arnab.b...@freescale.com> > --- > v2: Removed copying boot page. Use u-boot image as is in memory. > Added dealing with different size of addr_cell in device tree. > Added dealing with big- and little-endian. > Added flushing spin table after cpu_release(). > > arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 + > arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++ > arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 + > arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++ > arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++--- > arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172 > +++++++++++++++++++++ > arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++ > arch/arm/cpu/armv8/transition.S | 63 +------- > arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +- > arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++ > arch/arm/include/asm/macro.h | 92 +++++++++++ > arch/arm/lib/gic_64.S | 10 +- > common/board_f.c | 2 +- > 13 files changed, 525 insertions(+), 88 deletions(-) > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h > > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile > b/arch/arm/cpu/armv8/fsl-lsch3/Makefile > index 9249537..f920eeb 100644 > --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile > +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile > @@ -7,3 +7,5 @@ > obj-y += cpu.o > obj-y += lowlevel.o > obj-y += speed.o > +obj-$(CONFIG_MP) += mp.o > +obj-$(CONFIG_OF_LIBFDT) += fdt.o > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c > b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c > index c129d03..47b947f 100644 > --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c > +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c > @@ -11,6 +11,7 @@ > #include <asm/io.h> > #include <asm/arch-fsl-lsch3/immap_lsch3.h> > #include "cpu.h" > +#include "mp.h" > #include "speed.h" > #include <fsl_mc.h> > > @@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis) > #endif > return error; > } > + > + > +int arch_early_init_r(void) > +{ > + int rv; > + rv = fsl_lsch3_wake_seconday_cores(); > + > + if (rv) > + printf("Did not wake secondary cores\n"); > + > + return 0; > +} > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h > b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h > index 28544d7..2e3312b 100644 > --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h > +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h > @@ -5,3 +5,4 @@ > */ > > int fsl_qoriq_core_to_cluster(unsigned int core); > +u32 cpu_mask(void); > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c > b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c > new file mode 100644 > index 0000000..2dbcdcb > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c > @@ -0,0 +1,56 @@ > +/* > + * Copyright 2014 Freescale Semiconductor, Inc. > + * > + * SPDX-License-Identifier: GPL-2.0+ > + */ > + > +#include <common.h> > +#include <libfdt.h> > +#include <fdt_support.h> > +#include "mp.h" > + > +#ifdef CONFIG_MP > +void ft_fixup_cpu(void *blob) > +{ > + int off; > + __maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr(); > + fdt32_t *reg; > + int addr_cells; > + u64 val; > + size_t *boot_code_size = &(__secondary_boot_code_size); > + > + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpus", > 4); I didn't think /cpus had device_type = "cpus". I can't see any instances in any DTs I have to hand. Can we not find /cpus by path? > + of_bus_default_count_cells(blob, off, &addr_cells, NULL); > + > + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", > 4); > + while (off != -FDT_ERR_NOTFOUND) { > + reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0); > + if (reg) { > + val = spin_tbl_addr; > +#ifndef CONFIG_FSL_SMP_RELEASE_ALL > + val += id_to_core(of_read_number(reg, addr_cells)) > + * SPIN_TABLE_ELEM_SIZE; > +#endif > + val = cpu_to_fdt64(val); > + fdt_setprop_string(blob, off, "enable-method", > + "spin-table"); > + fdt_setprop(blob, off, "cpu-release-addr", > + &val, sizeof(val)); > + } else { > + puts("cpu NULL\n"); Could we change that to "Warning: found cpu node without reg property" or something like that which makes the problem clear? > + } > + off = fdt_node_offset_by_prop_value(blob, off, "device_type", > + "cpu", 4); > + } > + > + fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code, > + *boot_code_size); > +} > +#endif > + > +void ft_cpu_setup(void *blob, bd_t *bd) > +{ > +#ifdef CONFIG_MP > + ft_fixup_cpu(blob); > +#endif > +} > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > index ad32b6c..629e6d4 100644 > --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > @@ -8,7 +8,9 @@ > > #include <config.h> > #include <linux/linkage.h> > +#include <asm/gic.h> > #include <asm/macro.h> > +#include "mp.h" > > ENTRY(lowlevel_init) > mov x29, lr /* Save LR */ > @@ -37,29 +39,119 @@ ENTRY(lowlevel_init) > > branch_if_master x0, x1, 1f > > + ldr x0, =secondary_boot_func > + blr x0 > + > +1: > +2: Isn't the '2' label redundant? > + mov lr, x29 /* Restore LR */ > + ret > +ENDPROC(lowlevel_init) > + > + /* Keep literals not used by the secondary boot code outside it */ > + .ltorg > + > + /* Using 64 bit alignment since the spin table is accessed as data */ > + .align 4 > + .global secondary_boot_code > + /* Secondary Boot Code starts here */ > +secondary_boot_code: > + .global __spin_table > +__spin_table: > + .space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE > + > + .align 2 > +ENTRY(secondary_boot_func) > /* > - * Slave should wait for master clearing spin table. > - * This sync prevent salves observing incorrect > - * value of spin table and jumping to wrong place. > + * MPIDR_EL1 Fields: > + * MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1) > + * MPIDR[7:2] = AFF0_RES > + * MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3) > + * MPIDR[23:16] = AFF2_CLUSTERID > + * MPIDR[24] = MT > + * MPIDR[29:25] = RES0 > + * MPIDR[30] = U > + * MPIDR[31] = ME > + * MPIDR[39:32] = AFF3 > + * > + * Linear Processor ID (LPID) calculation from MPIDR_EL1: > + * (We only use AFF0_CPUID and AFF1_CLUSTERID for now > + * until AFF2_CLUSTERID and AFF3 have non-zero values) > + * > + * LPID = MPIDR[15:8] | MPIDR[1:0] > */ > -#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) > -#ifdef CONFIG_GICV2 > - ldr x0, =GICC_BASE > + mrs x0, mpidr_el1 > + ubfm x1, x0, #8, #15 > + ubfm x2, x0, #0, #1 > + orr x10, x2, x1, lsl #2 /* x10 has LPID */ > + ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */ > + /* > + * offset of the spin table element for this core from start of spin > + * table (each elem is padded to 64 bytes) > + */ > + lsl x1, x10, #6 > + ldr x0, =__spin_table > + /* physical address of this cpus spin table element */ > + add x11, x1, x0 > + > + str x9, [x11, #16] /* LPID */ > + mov x4, #1 > + str x4, [x11, #8] /* STATUS */ > + dsb sy > +#if defined(CONFIG_GICV3) > + gic_wait_for_interrupt_m x0 > #endif > - bl gic_wait_for_interrupt Why do we only need to wait for GICv3? We previously did so for GICv2. > + > + bl secondary_switch_to_el2 > +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 > + secondary_switch_to_el1 > #endif Missing bl? Is there any reason to have the SWITCH_TO_EL1 option other than for debugging? EL2 is the preferred EL to boot at for Linux and Xen (it gives far more flexibility), and if dropping to EL1 is necessary I think it would make more sense as a run-time option than a compile-time option. > > - /* > - * All processors will enter EL2 and optionally EL1. > +slave_cpu: > + wfe > +#ifdef CONFIG_FSL_SMP_RELEASE_ALL > + /* All cores are released from the address in the 1st spin table > + * element > */ > - bl armv8_switch_to_el2 > -#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 > - bl armv8_switch_to_el1 > + ldr x1, =__spin_table > + ldr x0, [x1] > +#else > + ldr x0, [x11] > +#endif > + cbz x0, slave_cpu Similarly is there any reason to have the option of a single release addr if we can support unique addresses? [...] > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c > b/arch/arm/cpu/armv8/fsl-lsch3/mp.c > new file mode 100644 > index 0000000..b29eda9 > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c > @@ -0,0 +1,172 @@ > +/* > + * Copyright 2014 Freescale Semiconductor, Inc. > + * > + * SPDX-License-Identifier: GPL-2.0+ > + */ > + > +#include <common.h> > +#include <asm/io.h> > +#include <asm/system.h> > +#include <asm/io.h> > +#include <asm/arch-fsl-lsch3/immap_lsch3.h> > +#include "mp.h" > + > +DECLARE_GLOBAL_DATA_PTR; > + > +void *get_spin_tbl_addr(void) > +{ > + void *ptr = (void *)&__spin_table; > + > + return ptr; > +} I don't think the cast is necessary here. As far as I can see this could be just: void *get_spin_tbl_addr(void) { return &__spin_table; } [...] > diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h > index f77e4b8..0009c28 100644 > --- a/arch/arm/include/asm/macro.h > +++ b/arch/arm/include/asm/macro.h > @@ -105,6 +105,98 @@ lr .req x30 > cbz \xreg1, \master_label > .endm > > +.macro armv8_switch_to_el2_m, xreg1 > + mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */ > + msr scr_el3, \xreg1 When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due to lack of a handler. Would it make sense to do similarly here and disable SMC here until we have a user (e.g. PSCI)? > + msr cptr_el3, xzr /* Disable coprocessor traps to EL3 */ > + mov \xreg1, #0x33ff > + msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */ > + > + /* Initialize SCTLR_EL2 */ > + /* > + * setting RES1 bits (29,28,23,22,18,16,11,5,4) to 1 > + * and RES0 bits (31,30,27,26,24,21,20,17,15-13,10-6) + > + * EE,WXN,I,SA,C,A,M to 0 > + */ > + mov \xreg1, #0x0830 > + movk \xreg1, #0x30C5, lsl #16 > + msr sctlr_el2, \xreg1 > + > + /* Return to the EL2_SP2 mode from EL3 */ > + mov \xreg1, sp > + msr sp_el2, \xreg1 /* Migrate SP */ > + mrs \xreg1, vbar_el3 > + msr vbar_el2, \xreg1 /* Migrate VBAR */ > + mov \xreg1, #0x3c9 > + msr spsr_el3, \xreg1 /* EL2_SP2 | D | A | I | F */ > + msr elr_el3, lr > + eret > +.endm Otherwise this looks fine to me. > +.macro armv8_switch_to_el1_m, xreg1, xreg2 > + /* Initialize Generic Timers */ > + mrs \xreg1, cnthctl_el2 > + orr \xreg1, \xreg1, #0x3 /* Enable EL1 access to timers */ > + msr cnthctl_el2, \xreg1 > + msr cntvoff_el2, xzr > + > + /* Initilize MPID/MPIDR registers */ > + mrs \xreg1, midr_el1 > + mrs \xreg2, mpidr_el1 > + msr vpidr_el2, \xreg1 > + msr vmpidr_el2, \xreg2 > + > + /* Disable coprocessor traps */ > + mov \xreg1, #0x33ff > + msr cptr_el2, \xreg1 /* Disable coprocessor traps to EL2 */ > + msr hstr_el2, xzr /* Disable coprocessor traps to EL2 */ > + mov \xreg1, #3 << 20 > + msr cpacr_el1, \xreg1 /* Enable FP/SIMD at EL1 */ > + > + /* Initialize HCR_EL2 */ > + mov \xreg1, #(1 << 31) /* 64bit EL1 */ > + orr \xreg1, \xreg1, #(1 << 29) /* Disable HVC */ > + msr hcr_el2, \xreg1 > + > + /* SCTLR_EL1 initialization */ > + /* > + * setting RES1 bits (29,28,23,22,20,11) to 1 > + * and RES0 bits (31,30,27,21,17,13,10,6) + > + * UCI,EE,EOE,WXN,nTWE,nTWI,UCT,DZE,I,UMA,SED,ITD, > + * CP15BEN,SA0,SA,C,A,M to 0 > + */ > + mov \xreg1, #0x0800 > + movk \xreg1, #0x30d0, lsl #16 > + msr sctlr_el1, \xreg1 > + > + /* Return to the EL1_SP1 mode from EL2 */ > + mov \xreg1, sp > + msr sp_el1, \xreg1 /* Migrate SP */ > + mrs \xreg1, vbar_el2 > + msr vbar_el1, \xreg1 /* Migrate VBAR */ > + mov \xreg1, #0x3c5 > + msr spsr_el2, \xreg1 /* EL1_SP1 | D | A | I | F */ > + msr elr_el2, lr > + eret > +.endm This looks fine to me. Thanks, Mark. _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot