On Wed, May 28, 2014 at 6:46 PM, York Sun <york...@freescale.com> wrote: > Freescale LayerScape with Chassis Generation 3 is a set of SoCs with > ARMv8 cores and 3rd generation of Chassis. We use different MMU setup > to support memory map and cache attribute for these SoCs. MMU and cache > are enabled very early to bootst performance, especially for early > development on emulators. After u-boot relocates to DDR, a new MMU > table with QBMan cache access is created in DDR. SMMU pagesize is set > in SMMU_sACR register. Both DDR3 and DDR4 are supported. > > Signed-off-by: York Sun <york...@freescale.com> > Signed-off-by: Varun Sethi <varun.se...@freescale.com> > Signed-off-by: Arnab Basu <arnab.b...@freescale.com> > --- > Change log: > v3: Remove blank lines at the of files > Fix cluster PLL GSR register for accessing beyond array size > Update final MMU table to support QBMan memory with cache > Set SMMU pagesize in SMMU_sACR register in lowlevel init. > Add DDR4 support > Remove forcing L3 cache flusing > Update GICv3 redistributor base address > > Some of these changes are caused by model change. > > arch/arm/cpu/armv8/cache_v8.c | 7 +- > arch/arm/cpu/armv8/fsl-lsch3/Makefile | 10 + > arch/arm/cpu/armv8/fsl-lsch3/README | 10 + > arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 474 > +++++++++++++++++++++ > arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 7 + > arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 65 +++ > arch/arm/cpu/armv8/fsl-lsch3/speed.c | 176 ++++++++ > arch/arm/cpu/armv8/fsl-lsch3/speed.h | 7 + > arch/arm/cpu/armv8/fsl-lsch3/timer.c | 62 +++ > arch/arm/include/asm/arch-fsl-lsch3/clock.h | 23 + > arch/arm/include/asm/arch-fsl-lsch3/config.h | 65 +++ > arch/arm/include/asm/arch-fsl-lsch3/gpio.h | 9 + > arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 116 +++++ > arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h | 13 + > arch/arm/include/asm/arch-fsl-lsch3/mmu.h | 10 + > arch/arm/include/asm/config.h | 4 + > arch/arm/include/asm/system.h | 2 + > drivers/i2c/mxc_i2c.c | 5 + > include/common.h | 5 +- > 19 files changed, 1066 insertions(+), 4 deletions(-) > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/Makefile > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/README > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.c > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.h > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.c > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.h > create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/timer.c > create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/clock.h > create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/config.h > create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/gpio.h > create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h > create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h > create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/mmu.h > > diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c > index a96ecda..c47acba 100644 > --- a/arch/arm/cpu/armv8/cache_v8.c > +++ b/arch/arm/cpu/armv8/cache_v8.c > @@ -83,12 +83,17 @@ void invalidate_dcache_all(void) > __asm_invalidate_dcache_all(); > } > > +void __weak flush_l3_cache(void) > +{ > +} > + > /* > * Performs a clean & invalidation of the entire data cache at all levels > */ > void flush_dcache_all(void) > { > __asm_flush_dcache_all(); > + flush_l3_cache(); > } > > /* > @@ -221,7 +226,7 @@ void invalidate_icache_all(void) > * Enable dCache & iCache, whether cache is actually enabled > * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF > */ > -void enable_caches(void) > +void __weak enable_caches(void) > { > icache_enable(); > dcache_enable(); > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile > b/arch/arm/cpu/armv8/fsl-lsch3/Makefile > new file mode 100644 > index 0000000..4b859cf > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile > @@ -0,0 +1,10 @@ > +# > +# Copyright 2014, Freescale Semiconductor > +# > +# SPDX-License-Identifier: GPL-2.0+ > +# > + > +obj-y += cpu.o > +obj-y += timer.o > +obj-y += lowlevel.o > +obj-y += speed.o > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/README > b/arch/arm/cpu/armv8/fsl-lsch3/README > new file mode 100644 > index 0000000..de34a91 > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/README > @@ -0,0 +1,10 @@ > +# > +# Copyright 2014 Freescale Semiconductor > +# > +# SPDX-License-Identifier: GPL-2.0+ > +# > + > +Freescale LayerScape with Chassis Generation 3 > + > +This architecture supports Freescale ARMv8 SoCs with Chassis generation 3, > +for example LS2100A. > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c > b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c > new file mode 100644 > index 0000000..2780390 > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c > @@ -0,0 +1,474 @@ > +/* > + * Copyright 2014 Freescale Semiconductor, Inc. > + * > + * SPDX-License-Identifier: GPL-2.0+ > + */ > + > +#include <common.h> > +#include <asm/io.h> > +#include <asm/system.h> > +#include <asm/armv8/mmu.h> > +#include <asm/io.h> > +#include <asm/arch-fsl-lsch3/immap_lsch3.h> > +#include "cpu.h" > +#include "speed.h" > + > +DECLARE_GLOBAL_DATA_PTR; > + > +#ifndef CONFIG_SYS_DCACHE_OFF > +/* > + * To start MMU before DDR is available, we create MMU table in SRAM. > + * The base address of SRAM is CONFIG_SYS_FSL_OCRAM_BASE. We use three > + * levels of translation tables here to cover 40-bit address space. > + * We use 4KB granule size, with 40 bits physical address, T0SZ=24 > + * Level 0 IA[39], table address @0 > + * Level 1 IA[31:30], table address @01000, 0x2000 > + * Level 2 IA[29:21], table address @0x3000 > + */ > + > +#define EARLY_SECTION_SHIFT_L0 39UL > +#define EARLY_SECTION_SHIFT_L1 30UL > +#define EARLY_SECTION_SHIFT_L2 21UL > +#define EARLY_BLOCK_SIZE_L0 0x8000000000UL > +#define EARLY_BLOCK_SIZE_L1 (1 << EARLY_SECTION_SHIFT_L1) > +#define EARLY_BLOCK_SIZE_L2 (1 << EARLY_SECTION_SHIFT_L2) > +#define CONFIG_SYS_IFC_BASE 0x30000000 > +#define CONFIG_SYS_IFC_SIZE 0x10000000 > +#define CONFIG_SYS_IFC_BASE2 0x500000000 > +#define CONFIG_SYS_IFC_SIZE2 0x100000000 > +#define TCR_EL2_PS_40BIT (2 << 16) > +#define EARLY_VA_BITS (40) > +#define EARLY_TCR (TCR_TG0_4K | \ > + TCR_EL2_PS_40BIT | \ > + TCR_SHARED_NON | \ > + TCR_ORGN_NC | \ > + TCR_IRGN_NC | \ > + TCR_T0SZ(EARLY_VA_BITS)) > + > +/* > + * Final MMU > + * Let's start from the same layout as early MMU and modify as needed. > + * IFC regions will be cache-inhibit. > + */ > +#define FINAL_SECTION_SHIFT_L0 39UL > +#define FINAL_SECTION_SHIFT_L1 30UL > +#define FINAL_SECTION_SHIFT_L2 21UL > +#define FINAL_BLOCK_SIZE_L0 0x8000000000UL > +#define FINAL_BLOCK_SIZE_L1 (1 << FINAL_SECTION_SHIFT_L1) > +#define FINAL_BLOCK_SIZE_L2 (1 << FINAL_SECTION_SHIFT_L2) > +#define FINAL_QBMAN_CACHED_MEM 0x818000000UL > +#define FINAL_QBMAN_CACHED_SIZE 0x4000000 > +#define TCR_EL2_PS_40BIT (2 << 16) > +#define FINAL_VA_BITS (40) > +#define FINAL_TCR (TCR_TG0_4K | \ > + TCR_EL2_PS_40BIT | \ > + TCR_SHARED_NON | \ > + TCR_ORGN_NC | \ > + TCR_IRGN_NC | \ > + TCR_T0SZ(FINAL_VA_BITS)) > + > + > +static void set_pgtable_section(u64 *page_table, u64 index, u64 section, > + u8 memory_type) > +{ > + u64 value; > + > + value = section | PMD_TYPE_SECT | PMD_SECT_AF; > + value |= PMD_ATTRINDX(memory_type); > + page_table[index] = value; > +}
This function looks like it should be common. > + > +static inline void early_mmu_setup(void) > +{ > + int el; > + u64 i; > + u64 section_l1t0, section_l1t1, section_l2; > + u64 *level0_table = (u64 *)CONFIG_SYS_FSL_OCRAM_BASE; > + u64 *level1_table_0 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x1000); > + u64 *level1_table_1 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x2000); > + u64 *level2_table = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x3000); > + > + > + level0_table[0] = > + (u64)level1_table_0 | PMD_TYPE_TABLE; > + level0_table[1] = > + (u64)level1_table_1 | PMD_TYPE_TABLE; > + > + /* > + * set level 1 table 0 to cache_inhibit, covering 0 to 512GB > + * set level 1 table 1 to cache enabled, covering 512GB to 1TB > + * set level 2 table to cache-inhibit, covering 0 to 1GB > + */ > + section_l1t0 = 0; > + section_l1t1 = EARLY_BLOCK_SIZE_L0; > + section_l2 = 0; > + for (i = 0; i < 512; i++) { > + set_pgtable_section(level1_table_0, i, section_l1t0, > + MT_DEVICE_NGNRNE); > + set_pgtable_section(level1_table_1, i, section_l1t1, > + MT_NORMAL); > + set_pgtable_section(level2_table, i, section_l2, > + MT_DEVICE_NGNRNE); > + section_l1t0 += EARLY_BLOCK_SIZE_L1; > + section_l1t1 += EARLY_BLOCK_SIZE_L1; > + section_l2 += EARLY_BLOCK_SIZE_L2; > + } > + > + level1_table_0[0] = > + (u64)level2_table | PMD_TYPE_TABLE; > + level1_table_0[1] = > + 0x40000000 | PMD_SECT_AF | PMD_TYPE_SECT | > + PMD_ATTRINDX(MT_DEVICE_NGNRNE); > + level1_table_0[2] = > + 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT | > + PMD_ATTRINDX(MT_NORMAL); > + level1_table_0[3] = > + 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT | > + PMD_ATTRINDX(MT_NORMAL); > + > + /* Rewrite table to enable cache */ > + set_pgtable_section(level2_table, > + CONFIG_SYS_FSL_OCRAM_BASE >> > EARLY_SECTION_SHIFT_L2, > + CONFIG_SYS_FSL_OCRAM_BASE, > + MT_NORMAL); > + for (i = CONFIG_SYS_IFC_BASE >> EARLY_SECTION_SHIFT_L2; > + i < (CONFIG_SYS_IFC_BASE + CONFIG_SYS_IFC_SIZE) > + >> EARLY_SECTION_SHIFT_L2; i++) { > + section_l2 = i << EARLY_SECTION_SHIFT_L2; > + set_pgtable_section(level2_table, i, > + section_l2, MT_NORMAL); > + } > + > + el = current_el(); We really can't have u-boot running at random ELs in v8 for different platforms. It's a mess on v7. You should never be at EL3. u-boot could be defined to run at EL1, but then you need to be able to go back to EL2 to boot the kernel. So really u-boot should always run at EL2 unless you are running in a VM, but that would be a different platform. > + if (el == 1) { > + asm volatile("dsb sy;isb"); > + asm volatile("msr ttbr0_el1, %0" > + : : "r" ((u64)level0_table) : "memory"); > + asm volatile("msr tcr_el1, %0" > + : : "r" (EARLY_TCR) : "memory"); > + asm volatile("msr mair_el1, %0" > + : : "r" (MEMORY_ATTRIBUTES) : "memory"); These should all be inline functions or macros. > + } else if (el == 2) { > + asm volatile("dsb sy;isb"); > + asm volatile("msr ttbr0_el2, %0" > + : : "r" ((u64)level0_table) : "memory"); > + asm volatile("msr tcr_el2, %0" > + : : "r" (EARLY_TCR) : "memory"); > + asm volatile("msr mair_el2, %0" > + : : "r" (MEMORY_ATTRIBUTES) : "memory"); > + } else if (el == 3) { > + asm volatile("dsb sy;isb"); > + asm volatile("msr ttbr0_el3, %0" > + : : "r" ((u64)level0_table) : "memory"); > + asm volatile("msr tcr_el3, %0" > + : : "r" (EARLY_TCR) : "memory"); > + asm volatile("msr mair_el3, %0" > + : : "r" (MEMORY_ATTRIBUTES) : "memory"); > + } else { > + hang(); > + } > + > + set_sctlr(get_sctlr() | CR_M); > +} > + > +static inline void final_mmu_setup(void) Looks like nearly the same code repeated... > +{ > + int el; > + u64 i, tbl_base, tbl_limit, section_base; > + u64 section_l1t0, section_l1t1, section_l2; > + u64 *level0_table = (u64 *)gd->arch.tlb_addr; > + u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + 0x1000); > + u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + 0x2000); > + u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + 0x3000); > + u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + 0x4000); > + > + > + level0_table[0] = > + (u64)level1_table_0 | PMD_TYPE_TABLE; > + level0_table[1] = > + (u64)level1_table_1 | PMD_TYPE_TABLE; > + > + /* > + * set level 1 table 0 to cache_inhibit, covering 0 to 512GB > + * set level 1 table 1 to cache enabled, covering 512GB to 1TB > + * set level 2 table 0 to cache-inhibit, covering 0 to 1GB > + */ > + section_l1t0 = 0; > + section_l1t1 = FINAL_BLOCK_SIZE_L0; > + section_l2 = 0; > + for (i = 0; i < 512; i++) { > + set_pgtable_section(level1_table_0, i, section_l1t0, > + MT_DEVICE_NGNRNE); > + set_pgtable_section(level1_table_1, i, section_l1t1, > + MT_NORMAL); > + set_pgtable_section(level2_table_0, i, section_l2, > + MT_DEVICE_NGNRNE); > + section_l1t0 += FINAL_BLOCK_SIZE_L1; > + section_l1t1 += FINAL_BLOCK_SIZE_L1; > + section_l2 += FINAL_BLOCK_SIZE_L2; > + } > + > + level1_table_0[0] = > + (u64)level2_table_0 | PMD_TYPE_TABLE; > + level1_table_0[2] = > + 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT | > + PMD_ATTRINDX(MT_NORMAL); > + level1_table_0[3] = > + 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT | > + PMD_ATTRINDX(MT_NORMAL); > + > + /* Rewrite table to enable cache */ > + set_pgtable_section(level2_table_0, > + CONFIG_SYS_FSL_OCRAM_BASE >> > FINAL_SECTION_SHIFT_L2, > + CONFIG_SYS_FSL_OCRAM_BASE, > + MT_NORMAL); > + > + /* > + * Fill in other part of tables if cache is needed > + * If finer granularity than 1GB is needed, sub table > + * should be created. > + */ > + section_base = FINAL_QBMAN_CACHED_MEM & ~(FINAL_BLOCK_SIZE_L1 - 1); > + i = section_base >> FINAL_SECTION_SHIFT_L1; > + level1_table_0[i] = (u64)level2_table_1 | PMD_TYPE_TABLE; > + section_l2 = section_base; > + for (i = 0; i < 512; i++) { > + set_pgtable_section(level2_table_1, i, section_l2, > + MT_DEVICE_NGNRNE); > + section_l2 += FINAL_BLOCK_SIZE_L2; > + } > + tbl_base = FINAL_QBMAN_CACHED_MEM & (FINAL_BLOCK_SIZE_L1 - 1); > + tbl_limit = (FINAL_QBMAN_CACHED_MEM + FINAL_QBMAN_CACHED_SIZE) & > + (FINAL_BLOCK_SIZE_L1 - 1); > + for (i = tbl_base >> FINAL_SECTION_SHIFT_L2; > + i < tbl_limit >> FINAL_SECTION_SHIFT_L2; i++) { > + section_l2 = section_base + (i << FINAL_SECTION_SHIFT_L2); > + set_pgtable_section(level2_table_1, i, > + section_l2, MT_NORMAL); > + } > + > + el = current_el(); > + if (el == 1) { > + asm volatile("dsb sy;isb"); > + asm volatile("msr ttbr0_el1, %0" > + : : "r" ((u64)level0_table) : "memory"); > + asm volatile("msr tcr_el1, %0" > + : : "r" (FINAL_TCR) : "memory"); > + asm volatile("msr mair_el1, %0" > + : : "r" (MEMORY_ATTRIBUTES) : "memory"); > + } else if (el == 2) { > + asm volatile("dsb sy;isb"); > + asm volatile("msr ttbr0_el2, %0" > + : : "r" ((u64)level0_table) : "memory"); > + asm volatile("msr tcr_el2, %0" > + : : "r" (FINAL_TCR) : "memory"); > + asm volatile("msr mair_el2, %0" > + : : "r" (MEMORY_ATTRIBUTES) : "memory"); > + } else if (el == 3) { > + asm volatile("dsb sy;isb"); > + asm volatile("msr ttbr0_el3, %0" > + : : "r" ((u64)level0_table) : "memory"); > + asm volatile("msr tcr_el3, %0" > + : : "r" (FINAL_TCR) : "memory"); > + asm volatile("msr mair_el3, %0" > + : : "r" (MEMORY_ATTRIBUTES) : "memory"); > + } else { > + hang(); > + } > + > + set_sctlr(get_sctlr() | CR_M); > +} > + > +int arch_cpu_init(void) > +{ > + icache_enable(); > + __asm_invalidate_dcache_all(); > + __asm_invalidate_tlb_all(); > + early_mmu_setup(); > + set_sctlr(get_sctlr() | CR_C); > + return 0; > +} > + > +/* > + * flush_l3_cache > + * Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power > + * state, by writing to HP-F P-state request register. Other SOCs will have Dickens. Are these registers FSL specific? If not, this should be common. Also, I believe the proper way to flush Dickens is using the architected cache flushing method where you walk the levels out to level 3. > + */ > +#define HNF0_PSTATE_REQ 0x04200010 > +#define HNF1_PSTATE_REQ 0x04210010 > +#define HNF2_PSTATE_REQ 0x04220010 > +#define HNF3_PSTATE_REQ 0x04230010 > +#define HNF4_PSTATE_REQ 0x04240010 > +#define HNF5_PSTATE_REQ 0x04250010 > +#define HNF6_PSTATE_REQ 0x04260010 > +#define HNF7_PSTATE_REQ 0x04270010 > +#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC) > +#define HNFPSTAT_FAM 0x3 > +#define HNFPSTAT_SFONLY 0x01 > + > +static void hnf_pstate_req(u64 *ptr, u64 state) > +{ > + int timeout = 1000; > + out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3)); > + ptr++; > + /* checking if the transition is completed */ > + while (timeout > 0) { > + if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3)) > + break; > + udelay(100); > + timeout--; > + } > +} > + > +void flush_l3_cache(void) > +{ > + hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY); > + hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM); > + hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM); > + hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM); > + hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM); > + hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM); > + hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM); > + hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM); > + hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM); > +} > + > +/* > + * This function is called from lib/board.c. > + * It recreates MMU table in main memory. MMU and d-cache are enabled > earlier. > + * There is no need to disable d-cache for this operation. > + */ > +void enable_caches(void) > +{ > + final_mmu_setup(); > + flush_dcache_range(gd->arch.tlb_addr, > + gd->arch.tlb_addr + gd->arch.tlb_size); > + __asm_invalidate_tlb_all(); > +} > +#endif > + > +static inline u32 init_type(u32 cluster, int init_id) init_type? That's a great name. > +{ > + struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR); > + u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK; > + u32 type = in_le32(&gur->tp_ityp[idx]); > + > + if (type & TP_ITYP_AV) > + return type; > + > + return 0; > +} > + > +u32 cpu_mask(void) > +{ > + struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR); > + int i = 0, count = 0; > + u32 cluster, type, mask = 0; > + > + do { > + int j; > + cluster = in_le32(&gur->tp_cluster[i].lower); > + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) { > + type = init_type(cluster, j); > + if (type) { > + if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM) > + mask |= 1 << count; > + count++; > + } > + } > + i++; > + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC); > + > + return mask; > +} > + > +/* > + * Return the number of cores on this SOC. > + */ > +int cpu_numcores(void) > +{ > + return hweight32(cpu_mask()); > +} > + > +int fsl_qoriq_core_to_cluster(unsigned int core) > +{ > + struct ccsr_gur __iomem *gur = > + (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR); > + int i = 0, count = 0; > + u32 cluster; > + > + do { > + int j; > + cluster = in_le32(&gur->tp_cluster[i].lower); > + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) { > + if (init_type(cluster, j)) { > + if (count == core) > + return i; > + count++; > + } > + } > + i++; > + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC); > + > + return -1; /* cannot identify the cluster */ > +} > + > +u32 fsl_qoriq_core_to_type(unsigned int core) > +{ > + struct ccsr_gur __iomem *gur = > + (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR); > + int i = 0, count = 0; > + u32 cluster, type; > + > + do { > + int j; > + cluster = in_le32(&gur->tp_cluster[i].lower); > + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) { > + type = init_type(cluster, j); > + if (type) { > + if (count == core) > + return type; > + count++; > + } > + } > + i++; > + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC); > + > + return -1; /* cannot identify the cluster */ > +} Do you plan on supporting PSCI because all this core and cluster stuff belongs there. > + > +#ifdef CONFIG_DISPLAY_CPUINFO > +int print_cpuinfo(void) > +{ > + struct sys_info sysinfo; > + char buf[32]; > + unsigned int i, core; > + u32 type; > + > + get_sys_info(&sysinfo); > + puts("Clock Configuration:"); > + for_each_cpu(i, core, cpu_numcores(), cpu_mask()) { > + if (!(i % 3)) > + puts("\n "); > + type = TP_ITYP_VER(fsl_qoriq_core_to_type(core)); > + printf("CPU%d(%s):%-4s MHz ", core, > + type == TY_ITYP_VER_A7 ? "A7 " : > + (type == TY_ITYP_VER_A53 ? "A53" : > + (type == TY_ITYP_VER_A57 ? "A57" : " ")), > + strmhz(buf, sysinfo.freq_processor[core])); > + } > + printf("\n Bus: %-4s MHz ", > + strmhz(buf, sysinfo.freq_systembus)); > + printf("DDR: %-4s MHz", strmhz(buf, sysinfo.freq_ddrbus)); > + puts("\n"); > + > + return 0; > +} > +#endif > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h > b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h > new file mode 100644 > index 0000000..28544d7 > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h > @@ -0,0 +1,7 @@ > +/* > + * Copyright 2014, Freescale Semiconductor > + * > + * SPDX-License-Identifier: GPL-2.0+ > + */ > + > +int fsl_qoriq_core_to_cluster(unsigned int core); > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > new file mode 100644 > index 0000000..087d5d1 > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S > @@ -0,0 +1,65 @@ > +/* > + * (C) Copyright 2014 Freescale Semiconductor > + * > + * SPDX-License-Identifier: GPL-2.0+ > + * > + * Extracted from armv8/start.S > + */ > + > +#include <config.h> > +#include <linux/linkage.h> > +#include <asm/macro.h> > + > +ENTRY(lowlevel_init) > + /* Initialize GIC Secure Bank Status */ > + mov x29, lr /* Save LR */ > + > + /* Set the SMMU page size in the sACR register */ > + ldr x1, =SMMU_BASE > + ldr w0, [x1, #0x10] > + orr w0, w0, #1 << 16 /* set sACR.pagesize to indicate 64K page */ > + str w0, [x1, #0x10] > + > +#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) You can have either v2 or v3? > + branch_if_slave x0, 1f > + ldr x0, =GICD_BASE > + bl gic_init_secure > +1: > +#if defined(CONFIG_GICV3) > + ldr x0, =GICR_BASE > + bl gic_init_secure_percpu > +#elif defined(CONFIG_GICV2) > + ldr x0, =GICD_BASE > + ldr x1, =GICC_BASE > + bl gic_init_secure_percpu > +#endif > +#endif > + > + branch_if_master x0, x1, 1f > + > + /* > + * Slave should wait for master clearing spin table. > + * This sync prevent salves observing incorrect > + * value of spin table and jumping to wrong place. > + */ > +#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3) > +#ifdef CONFIG_GICV2 > + ldr x0, =GICC_BASE > +#endif > + bl gic_wait_for_interrupt > +#endif > + > + /* > + * All processors will enter EL2 and optionally EL1. > + */ > + bl armv8_switch_to_el2 > +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 > + bl armv8_switch_to_el1 > +#endif > + b 2f This all looks like cut and paste from existing startup code. Can't you refactor things? > + > +1: > +2: > + mov lr, x29 /* Restore LR */ > + ret > +ENDPROC(lowlevel_init) > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.c > b/arch/arm/cpu/armv8/fsl-lsch3/speed.c > new file mode 100644 > index 0000000..dc4a34b > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.c > @@ -0,0 +1,176 @@ > +/* > + * Copyright 2014, Freescale Semiconductor, Inc. > + * > + * SPDX-License-Identifier: GPL-2.0+ > + * > + * Derived from arch/power/cpu/mpc85xx/speed.c > + */ > + > +#include <common.h> > +#include <linux/compiler.h> > +#include <fsl_ifc.h> > +#include <asm/processor.h> > +#include <asm/io.h> > +#include <asm/arch-fsl-lsch3/immap_lsch3.h> > +#include <asm/arch/clock.h> > +#include "cpu.h" > + > +DECLARE_GLOBAL_DATA_PTR; > + > +#ifndef CONFIG_SYS_FSL_NUM_CC_PLLS > +#define CONFIG_SYS_FSL_NUM_CC_PLLS 6 > +#endif > + > + > +void get_sys_info(struct sys_info *sys_info) > +{ > + struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR); > +#ifdef CONFIG_FSL_IFC > + struct fsl_ifc *ifc_regs = (void *)CONFIG_SYS_IFC_ADDR; > + u32 ccr; > +#endif > + struct ccsr_clk_cluster_group __iomem *clk_grp[2] = { > + (void *)(CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR), > + (void *)(CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR) > + }; > + struct ccsr_clk_ctrl __iomem *clk_ctrl = > + (void *)(CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR); > + unsigned int cpu; > + const u8 core_cplx_pll[16] = { > + [0] = 0, /* CC1 PPL / 1 */ > + [1] = 0, /* CC1 PPL / 2 */ > + [2] = 0, /* CC1 PPL / 4 */ > + [4] = 1, /* CC2 PPL / 1 */ > + [5] = 1, /* CC2 PPL / 2 */ > + [6] = 1, /* CC2 PPL / 4 */ > + [8] = 2, /* CC3 PPL / 1 */ > + [9] = 2, /* CC3 PPL / 2 */ > + [10] = 2, /* CC3 PPL / 4 */ > + [12] = 3, /* CC4 PPL / 1 */ > + [13] = 3, /* CC4 PPL / 2 */ > + [14] = 3, /* CC4 PPL / 4 */ > + }; > + > + const u8 core_cplx_pll_div[16] = { > + [0] = 1, /* CC1 PPL / 1 */ > + [1] = 2, /* CC1 PPL / 2 */ > + [2] = 4, /* CC1 PPL / 4 */ > + [4] = 1, /* CC2 PPL / 1 */ > + [5] = 2, /* CC2 PPL / 2 */ > + [6] = 4, /* CC2 PPL / 4 */ > + [8] = 1, /* CC3 PPL / 1 */ > + [9] = 2, /* CC3 PPL / 2 */ > + [10] = 4, /* CC3 PPL / 4 */ > + [12] = 1, /* CC4 PPL / 1 */ > + [13] = 2, /* CC4 PPL / 2 */ > + [14] = 4, /* CC4 PPL / 4 */ > + }; > + > + uint i, cluster; > + uint freq_c_pll[CONFIG_SYS_FSL_NUM_CC_PLLS]; > + uint ratio[CONFIG_SYS_FSL_NUM_CC_PLLS]; > + unsigned long sysclk = CONFIG_SYS_CLK_FREQ; > + int cc_group[12] = CONFIG_SYS_FSL_CLUSTER_CLOCKS; > + u32 c_pll_sel, cplx_pll; > + void *offset; > + > + sys_info->freq_systembus = sysclk; > +#ifdef CONFIG_DDR_CLK_FREQ > + sys_info->freq_ddrbus = CONFIG_DDR_CLK_FREQ; > +#else > + sys_info->freq_ddrbus = sysclk; > +#endif > + > + sys_info->freq_systembus *= (in_le32(&gur->rcwsr[0]) >> > + FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT) & > + FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK; > + sys_info->freq_ddrbus *= (in_le32(&gur->rcwsr[0]) >> > + FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT) & > + FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK; > + > + for (i = 0; i < CONFIG_SYS_FSL_NUM_CC_PLLS; i++) { > + /* > + * fixme: prefer to combine the following into one line, but > + * cannot pass compiling without warning about in_le32. > + */ > + offset = (void *)((size_t)clk_grp[i/3] + > + offsetof(struct ccsr_clk_cluster_group, > + pllngsr[i%3].gsr)); > + ratio[i] = (in_le32(offset) >> 1) & 0x3f; > + if (ratio[i] > 4) > + freq_c_pll[i] = sysclk * ratio[i]; > + else > + freq_c_pll[i] = sys_info->freq_systembus * ratio[i]; > + } > + > + for_each_cpu(i, cpu, cpu_numcores(), cpu_mask()) { > + cluster = fsl_qoriq_core_to_cluster(cpu); > + c_pll_sel = (in_le32(&clk_ctrl->clkcncsr[cluster].csr) >> 27) > + & 0xf; > + cplx_pll = core_cplx_pll[c_pll_sel]; > + cplx_pll += cc_group[cluster] - 1; > + sys_info->freq_processor[cpu] = > + freq_c_pll[cplx_pll] / core_cplx_pll_div[c_pll_sel]; > + } > + > +#if defined(CONFIG_FSL_IFC) > + ccr = in_le32(&ifc_regs->ifc_ccr); > + ccr = ((ccr & IFC_CCR_CLK_DIV_MASK) >> IFC_CCR_CLK_DIV_SHIFT) + 1; > + > + sys_info->freq_localbus = sys_info->freq_systembus / ccr; > +#endif > +} > + > + > +int get_clocks(void) > +{ > + struct sys_info sys_info; > + get_sys_info(&sys_info); > + gd->cpu_clk = sys_info.freq_processor[0]; > + gd->bus_clk = sys_info.freq_systembus; > + gd->mem_clk = sys_info.freq_ddrbus; > + > +#if defined(CONFIG_FSL_ESDHC) > + gd->arch.sdhc_clk = gd->bus_clk / 2; > +#endif /* defined(CONFIG_FSL_ESDHC) */ > + > + if (gd->cpu_clk != 0) > + return 0; > + else > + return 1; > +} > + > +/******************************************** > + * get_bus_freq > + * return system bus freq in Hz > + *********************************************/ > +ulong get_bus_freq(ulong dummy) > +{ > + if (!gd->bus_clk) > + get_clocks(); > + > + return gd->bus_clk; > +} > + > +/******************************************** > + * get_ddr_freq > + * return ddr bus freq in Hz > + *********************************************/ > +ulong get_ddr_freq(ulong dummy) > +{ > + if (!gd->mem_clk) > + get_clocks(); > + > + return gd->mem_clk; > +} > + > +unsigned int mxc_get_clock(enum mxc_clock clk) > +{ > + switch (clk) { > + case MXC_I2C_CLK: > + return get_bus_freq(0) / 2; > + default: > + printf("Unsupported clock\n"); > + } > + return 0; > +} > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.h > b/arch/arm/cpu/armv8/fsl-lsch3/speed.h > new file mode 100644 > index 0000000..15af5b9 > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.h > @@ -0,0 +1,7 @@ > +/* > + * Copyright 2014, Freescale Semiconductor, Inc. > + * > + * SPDX-License-Identifier: GPL-2.0+ > + */ > + > +void get_sys_info(struct sys_info *sys_info); > diff --git a/arch/arm/cpu/armv8/fsl-lsch3/timer.c > b/arch/arm/cpu/armv8/fsl-lsch3/timer.c > new file mode 100644 > index 0000000..3adfa41 > --- /dev/null > +++ b/arch/arm/cpu/armv8/fsl-lsch3/timer.c > @@ -0,0 +1,62 @@ > +/* > + * Copyright 2014, Freescale Semiconductor > + * > + * SPDX-License-Identifier: GPL-2.0+ > + */ > + > +#include <common.h> > +#include <div64.h> > +#include <linux/compiler.h> > + > +static inline u64 get_cntfrq(void) > +{ > + u64 cntfrq; > + asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq)); > + return cntfrq; > +} > + > +static inline u64 tick_to_time(u64 tick) > +{ > + tick *= CONFIG_SYS_HZ; > + do_div(tick, get_cntfrq()); > + return tick; > +} > + > +static inline u64 time_to_tick(u64 time) > +{ > + time *= get_cntfrq(); > + do_div(time, CONFIG_SYS_HZ); > + return time; > +} > + > +static inline u64 us_to_tick(unsigned long long usec) > +{ > + usec = usec * get_cntfrq() + 999999; > + do_div(usec, 1000000); > + > + return usec; > +} > + > +u64 get_ticks(void) > +{ > + u64 cval; > + > + asm volatile("isb;mrs %0, cntpct_el0" : "=r" (cval)); > + > + return cval; > +} > + > +ulong get_timer(ulong base) > +{ > + return tick_to_time(get_ticks()) - base; > +} > + > +void __udelay(unsigned long usec) > +{ > + u64 start, tmo; > + > + start = get_ticks(); > + tmo = us_to_tick(usec); > + while (get_ticks() < (start + tmo)) > + ; > +} What's wrong with the existing arch timer code? Rob _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot