On 08/21/2014 06:47 AM, Mark Rutland wrote: > Hi York, > > I have mostly minor comments this time; this is looking pretty good. > > On Tue, Aug 19, 2014 at 09:28:00PM +0100, York Sun wrote: >> Secondary cores need to be released from holdoff by boot release >> registers. With GPP bootrom, they can boot from main memory >> directly. Individual spin table is used for each core. If a single >> release address is needed, defining macro CONFIG_FSL_SMP_RELEASE_ALL >> will use the CPU_RELEASE_ADDR. Spin table and the boot page is reserved >> in device tree so OS won't overwrite. >> >> Signed-off-by: York Sun <york...@freescale.com> >> Signed-off-by: Arnab Basu <arnab.b...@freescale.com> >> --- >> v2: Removed copying boot page. Use u-boot image as is in memory. >> Added dealing with different size of addr_cell in device tree. >> Added dealing with big- and little-endian. >> Added flushing spin table after cpu_release(). >> >> arch/arm/cpu/armv8/fsl-lsch3/Makefile | 2 + >> arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 13 ++ >> arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 1 + >> arch/arm/cpu/armv8/fsl-lsch3/fdt.c | 56 +++++++ >> arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 128 ++++++++++++--- >> arch/arm/cpu/armv8/fsl-lsch3/mp.c | 172 >> +++++++++++++++++++++ >> arch/arm/cpu/armv8/fsl-lsch3/mp.h | 36 +++++ >> arch/arm/cpu/armv8/transition.S | 63 +------- >> arch/arm/include/asm/arch-fsl-lsch3/config.h | 3 +- >> arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 35 +++++ >> arch/arm/include/asm/macro.h | 92 +++++++++++ >> arch/arm/lib/gic_64.S | 10 +- >> common/board_f.c | 2 +- >> 13 files changed, 525 insertions(+), 88 deletions(-) >> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/fdt.c >> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.c >> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/mp.h >> >> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile >> b/arch/arm/cpu/armv8/fsl-lsch3/Makefile >> index 9249537..f920eeb 100644 >> --- a/arch/arm/cpu/armv8/fsl-lsch3/Makefile >> +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile >> @@ -7,3 +7,5 @@ >> obj-y += cpu.o >> obj-y += lowlevel.o >> obj-y += speed.o >> +obj-$(CONFIG_MP) += mp.o >> +obj-$(CONFIG_OF_LIBFDT) += fdt.o >> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c >> b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c >> index c129d03..47b947f 100644 >> --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c >> +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c >> @@ -11,6 +11,7 @@ >> #include <asm/io.h> >> #include <asm/arch-fsl-lsch3/immap_lsch3.h> >> #include "cpu.h" >> +#include "mp.h" >> #include "speed.h" >> #include <fsl_mc.h> >> >> @@ -434,3 +435,15 @@ int cpu_eth_init(bd_t *bis) >> #endif >> return error; >> } >> + >> + >> +int arch_early_init_r(void) >> +{ >> + int rv; >> + rv = fsl_lsch3_wake_seconday_cores(); >> + >> + if (rv) >> + printf("Did not wake secondary cores\n"); >> + >> + return 0; >> +} >> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h >> b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h >> index 28544d7..2e3312b 100644 >> --- a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h >> +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h >> @@ -5,3 +5,4 @@ >> */ >> >> int fsl_qoriq_core_to_cluster(unsigned int core); >> +u32 cpu_mask(void); >> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/fdt.c >> b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c >> new file mode 100644 >> index 0000000..2dbcdcb >> --- /dev/null >> +++ b/arch/arm/cpu/armv8/fsl-lsch3/fdt.c >> @@ -0,0 +1,56 @@ >> +/* >> + * Copyright 2014 Freescale Semiconductor, Inc. >> + * >> + * SPDX-License-Identifier: GPL-2.0+ >> + */ >> + >> +#include <common.h> >> +#include <libfdt.h> >> +#include <fdt_support.h> >> +#include "mp.h" >> + >> +#ifdef CONFIG_MP >> +void ft_fixup_cpu(void *blob) >> +{ >> + int off; >> + __maybe_unused u64 spin_tbl_addr = (u64)get_spin_tbl_addr(); >> + fdt32_t *reg; >> + int addr_cells; >> + u64 val; >> + size_t *boot_code_size = &(__secondary_boot_code_size); >> + >> + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpus", >> 4); > > I didn't think /cpus had device_type = "cpus". I can't see any > instances in any DTs I have to hand. Can we not find /cpus by path?
I will let Arnab to comment on this. He is coordinating with Linux device tree. > >> + of_bus_default_count_cells(blob, off, &addr_cells, NULL); >> + >> + off = fdt_node_offset_by_prop_value(blob, -1, "device_type", "cpu", >> 4); >> + while (off != -FDT_ERR_NOTFOUND) { >> + reg = (fdt32_t *)fdt_getprop(blob, off, "reg", 0); >> + if (reg) { >> + val = spin_tbl_addr; >> +#ifndef CONFIG_FSL_SMP_RELEASE_ALL >> + val += id_to_core(of_read_number(reg, addr_cells)) >> + * SPIN_TABLE_ELEM_SIZE; >> +#endif >> + val = cpu_to_fdt64(val); >> + fdt_setprop_string(blob, off, "enable-method", >> + "spin-table"); >> + fdt_setprop(blob, off, "cpu-release-addr", >> + &val, sizeof(val)); >> + } else { >> + puts("cpu NULL\n"); > > Could we change that to "Warning: found cpu node without reg property" > or something like that which makes the problem clear? Yes, we can. > >> + } >> + off = fdt_node_offset_by_prop_value(blob, off, "device_type", >> + "cpu", 4); >> + } >> + >> + fdt_add_mem_rsv(blob, (uintptr_t)&secondary_boot_code, >> + *boot_code_size); >> +} >> +#endif >> + >> +void ft_cpu_setup(void *blob, bd_t *bd) >> +{ >> +#ifdef CONFIG_MP >> + ft_fixup_cpu(blob); >> +#endif >> +} >> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S >> b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S >> index ad32b6c..629e6d4 100644 >> --- a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S >> +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S >> @@ -8,7 +8,9 @@ >> >> #include <config.h> >> #include <linux/linkage.h> >> +#include <asm/gic.h> >> #include <asm/macro.h> >> +#include "mp.h" >> >> ENTRY(lowlevel_init) >> mov x29, lr /* Save LR */ >> @@ -37,29 +39,119 @@ ENTRY(lowlevel_init) >> >> branch_if_master x0, x1, 1f >> >> + ldr x0, =secondary_boot_func >> + blr x0 >> + >> +1: >> +2: > > Isn't the '2' label redundant? We have some internal code dealing with trust zone between the 1 and 2. It is not likely to be used in long term since we are trying to move them into security monitor. I can drop label 2 here. > >> + mov lr, x29 /* Restore LR */ >> + ret >> +ENDPROC(lowlevel_init) >> + >> + /* Keep literals not used by the secondary boot code outside it */ >> + .ltorg >> + >> + /* Using 64 bit alignment since the spin table is accessed as data */ >> + .align 4 >> + .global secondary_boot_code >> + /* Secondary Boot Code starts here */ >> +secondary_boot_code: >> + .global __spin_table >> +__spin_table: >> + .space CONFIG_MAX_CPUS*SPIN_TABLE_ELEM_SIZE >> + >> + .align 2 >> +ENTRY(secondary_boot_func) >> /* >> - * Slave should wait for master clearing spin table. >> - * This sync prevent salves observing incorrect >> - * value of spin table and jumping to wrong place. >> + * MPIDR_EL1 Fields: >> + * MPIDR[1:0] = AFF0_CPUID <- Core ID (0,1) >> + * MPIDR[7:2] = AFF0_RES >> + * MPIDR[15:8] = AFF1_CLUSTERID <- Cluster ID (0,1,2,3) >> + * MPIDR[23:16] = AFF2_CLUSTERID >> + * MPIDR[24] = MT >> + * MPIDR[29:25] = RES0 >> + * MPIDR[30] = U >> + * MPIDR[31] = ME >> + * MPIDR[39:32] = AFF3 >> + * >> + * Linear Processor ID (LPID) calculation from MPIDR_EL1: >> + * (We only use AFF0_CPUID and AFF1_CLUSTERID for now >> + * until AFF2_CLUSTERID and AFF3 have non-zero values) >> + * >> + * LPID = MPIDR[15:8] | MPIDR[1:0] >> */ >> -#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) >> -#ifdef CONFIG_GICV2 >> - ldr x0, =GICC_BASE >> + mrs x0, mpidr_el1 >> + ubfm x1, x0, #8, #15 >> + ubfm x2, x0, #0, #1 >> + orr x10, x2, x1, lsl #2 /* x10 has LPID */ >> + ubfm x9, x0, #0, #15 /* x9 contains MPIDR[15:0] */ >> + /* >> + * offset of the spin table element for this core from start of spin >> + * table (each elem is padded to 64 bytes) >> + */ >> + lsl x1, x10, #6 >> + ldr x0, =__spin_table >> + /* physical address of this cpus spin table element */ >> + add x11, x1, x0 >> + >> + str x9, [x11, #16] /* LPID */ >> + mov x4, #1 >> + str x4, [x11, #8] /* STATUS */ >> + dsb sy >> +#if defined(CONFIG_GICV3) >> + gic_wait_for_interrupt_m x0 >> #endif >> - bl gic_wait_for_interrupt > > Why do we only need to wait for GICv3? We previously did so for GICv2. I think this is leftover from the attempt to boot all cores simultaneously. I will let Arnab to comment. > >> + >> + bl secondary_switch_to_el2 >> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 >> + secondary_switch_to_el1 >> #endif > > Missing bl? Looks so. > > Is there any reason to have the SWITCH_TO_EL1 option other than for > debugging? Good question. I will let Arnab to comment here. > > EL2 is the preferred EL to boot at for Linux and Xen (it gives far more > flexibility), and if dropping to EL1 is necessary I think it would make > more sense as a run-time option than a compile-time option. > >> >> - /* >> - * All processors will enter EL2 and optionally EL1. >> +slave_cpu: >> + wfe >> +#ifdef CONFIG_FSL_SMP_RELEASE_ALL >> + /* All cores are released from the address in the 1st spin table >> + * element >> */ >> - bl armv8_switch_to_el2 >> -#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 >> - bl armv8_switch_to_el1 >> + ldr x1, =__spin_table >> + ldr x0, [x1] >> +#else >> + ldr x0, [x11] >> +#endif >> + cbz x0, slave_cpu > > Similarly is there any reason to have the option of a single release > addr if we can support unique addresses? I think it was used by Linux for some ARM parts. I personally not a fun of using single release. But if it makes everyone happy, I can keep it. > > [...] > >> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/mp.c >> b/arch/arm/cpu/armv8/fsl-lsch3/mp.c >> new file mode 100644 >> index 0000000..b29eda9 >> --- /dev/null >> +++ b/arch/arm/cpu/armv8/fsl-lsch3/mp.c >> @@ -0,0 +1,172 @@ >> +/* >> + * Copyright 2014 Freescale Semiconductor, Inc. >> + * >> + * SPDX-License-Identifier: GPL-2.0+ >> + */ >> + >> +#include <common.h> >> +#include <asm/io.h> >> +#include <asm/system.h> >> +#include <asm/io.h> >> +#include <asm/arch-fsl-lsch3/immap_lsch3.h> >> +#include "mp.h" >> + >> +DECLARE_GLOBAL_DATA_PTR; >> + >> +void *get_spin_tbl_addr(void) >> +{ >> + void *ptr = (void *)&__spin_table; >> + >> + return ptr; >> +} > > I don't think the cast is necessary here. As far as I can see this > could be just: > > void *get_spin_tbl_addr(void) > { > return &__spin_table; > } > Let me try that. > [...] > >> diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h >> index f77e4b8..0009c28 100644 >> --- a/arch/arm/include/asm/macro.h >> +++ b/arch/arm/include/asm/macro.h >> @@ -105,6 +105,98 @@ lr .req x30 >> cbz \xreg1, \master_label >> .endm >> >> +.macro armv8_switch_to_el2_m, xreg1 >> + mov \xreg1, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */ >> + msr scr_el3, \xreg1 > > When dropping to EL1 from EL2 we disable HVC via HCR_EL2; presumably due > to lack of a handler. Would it make sense to do similarly here and > disable SMC here until we have a user (e.g. PSCI)? I will let Arnab to comment here. York _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot