sorry for the delay ;) Am 02.05.20 um 10:59 schrieb Stefan Roese: > From: Aaron Williams <[email protected]> > > This patch adds very basic support for the Octeon III SoCs. Only > CFI parallel NOR flash and UART is supported for now. > > Please note that the basic Octeon port does not include the DDR3/4 > initialization yet. This will be added in some follow-up patches > later. To still use U-Boot on with this port, the L2 cache (4MiB on > Octeon III CN73xx) is used as RAM. This way, U-Boot can boot to the > prompt on such boards.
this patch should come after the common MIPS patches > > Signed-off-by: Aaron Williams <[email protected]> > Signed-off-by: Stefan Roese <[email protected]> > --- > > MAINTAINERS | 6 + > arch/Kconfig | 1 + > arch/mips/Kconfig | 49 +- > arch/mips/Makefile | 7 + > arch/mips/cpu/Makefile | 4 +- > arch/mips/include/asm/arch-octeon/cavm-reg.h | 42 + > arch/mips/include/asm/arch-octeon/clock.h | 24 + > arch/mips/mach-octeon/Kconfig | 92 ++ > arch/mips/mach-octeon/Makefile | 10 + > arch/mips/mach-octeon/clock.c | 22 + > arch/mips/mach-octeon/cpu.c | 55 + > arch/mips/mach-octeon/dram.c | 27 + > arch/mips/mach-octeon/include/ioremap.h | 30 + > arch/mips/mach-octeon/start.S | 1241 ++++++++++++++++++ > 14 files changed, 1608 insertions(+), 2 deletions(-) > create mode 100644 arch/mips/include/asm/arch-octeon/cavm-reg.h > create mode 100644 arch/mips/include/asm/arch-octeon/clock.h > create mode 100644 arch/mips/mach-octeon/Kconfig > create mode 100644 arch/mips/mach-octeon/Makefile > create mode 100644 arch/mips/mach-octeon/clock.c > create mode 100644 arch/mips/mach-octeon/cpu.c > create mode 100644 arch/mips/mach-octeon/dram.c > create mode 100644 arch/mips/mach-octeon/include/ioremap.h > create mode 100644 arch/mips/mach-octeon/start.S > > diff --git a/MAINTAINERS b/MAINTAINERS > index 66f0b07263..29f2d7328c 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -749,6 +749,12 @@ M: Ezequiel Garcia <[email protected]> > S: Maintained > F: arch/mips/mach-jz47xx/ > > +MIPS Octeon > +M: Aaron Williams <[email protected]> > +S: Maintained > +F: arch/mips/mach-octeon/ > +F: arch/mips/include/asm/arch-octeon/ > + > MMC > M: Peng Fan <[email protected]> > S: Maintained > diff --git a/arch/Kconfig b/arch/Kconfig > index 91e049b322..1cd3e1dc0b 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -37,6 +37,7 @@ config MICROBLAZE > > config MIPS > bool "MIPS architecture" > + select CREATE_ARCH_SYMLINK you should not need that. The path arch/mips/mach-octeon/include/ will be automatically added to the include search paths. Thus move all files in arch/mips/include/asm/arch-octeon/ to arch/mips/mach-octeon/include/ > select HAVE_ARCH_IOREMAP > select HAVE_PRIVATE_LIBGCC > select SUPPORT_OF_CONTROL > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig > index 48e754cc46..3c7f3eb94f 100644 > --- a/arch/mips/Kconfig > +++ b/arch/mips/Kconfig > @@ -106,6 +106,24 @@ config ARCH_JZ47XX > select OF_CONTROL > select DM > > +config ARCH_OCTEON > + bool "Support Marvell Octeon CN7xxx platforms" > + select DISPLAY_CPUINFO > + select DMA_ADDR_T_64BIT > + select DM > + select DM_SERIAL > + select MIPS_CACHE_COHERENT > + select MIPS_INIT_STACK_IN_SRAM > + select MIPS_L2_CACHE > + select MIPS_TUNE_OCTEON3 > + select ROM_EXCEPTION_VECTORS > + select SUPPORTS_BIG_ENDIAN > + select SUPPORTS_CPU_MIPS64_OCTEON > + select PHYS_64BIT > + select OF_CONTROL > + select OF_LIVE > + imply CMD_DM > + > config MACH_PIC32 > bool "Support Microchip PIC32" > select DM > @@ -160,6 +178,7 @@ source "arch/mips/mach-bmips/Kconfig" > source "arch/mips/mach-jz47xx/Kconfig" > source "arch/mips/mach-pic32/Kconfig" > source "arch/mips/mach-mtmips/Kconfig" > +source "arch/mips/mach-octeon/Kconfig" > > if MIPS > > @@ -233,6 +252,14 @@ config CPU_MIPS64_R6 > Choose this option to build a kernel for release 6 or later of the > MIPS64 architecture. > > +config CPU_MIPS64_OCTEON > + bool "Marvell Octeon series of CPUs" > + depends on SUPPORTS_CPU_MIPS64_OCTEON > + select 64BIT > + help > + Choose this option for Marvell Octeon CPUs. These CPUs are between > + MIPS64 R5 and R6 with other extensions. > + > endchoice > > menu "General setup" > @@ -261,7 +288,7 @@ config MIPS_CM_BASE > config MIPS_CACHE_INDEX_BASE > hex "Index base address for cache initialisation" > default 0x80000000 if CPU_MIPS32 > - default 0xffffffff80000000 if CPU_MIPS64 > + default 0xFFFFFFFFC0000000 if ARCH_OCTEON > help > This is the base address for a memory block, which is used for > initialising the cache lines. This is also the base address of a > memory > @@ -342,6 +369,14 @@ config SPL_LOADER_SUPPORT > help > Enable this option if you want to use SPL loaders without DM enabled. > > +config MIPS_CACHE_COHERENT > + bool "Set if MIPS processor is cache coherent" > + help > + Enable this if the MIPS architecture is cache coherent like the > + Marvell Octeon series of SoCs. When this is set, cache flushes > + and invalidates only flush the write buffer since the hardware > + maintains cache coherency. > + > endmenu > > menu "OS boot interface" > @@ -398,6 +433,9 @@ config SUPPORTS_CPU_MIPS64_R2 > config SUPPORTS_CPU_MIPS64_R6 > bool > > +config SUPPORTS_CPU_MIPS64_OCTEON > + bool > + > config CPU_MIPS32 > bool > default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6 > @@ -405,6 +443,7 @@ config CPU_MIPS32 > config CPU_MIPS64 > bool > default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6 > + default y if CPU_MIPS64_OCTEON > > config MIPS_TUNE_4KC > bool > @@ -421,6 +460,9 @@ config MIPS_TUNE_34KC > config MIPS_TUNE_74KC > bool > > +config MIPS_TUNE_OCTEON3 > + bool > + > config 32BIT > bool > > @@ -453,6 +495,11 @@ config MIPS_SRAM_INIT > before it can be used. If enabled, a function mips_sram_init() will > be called just before setup_stack_gd. > > +config DMA_ADDR_T_64BIT > + bool > + help > + Select this to enable 64-bit DMA addressing > + > config SYS_DCACHE_SIZE > int > default 0 > diff --git a/arch/mips/Makefile b/arch/mips/Makefile > index af3f227436..fa1ba7855a 100644 > --- a/arch/mips/Makefile > +++ b/arch/mips/Makefile > @@ -1,6 +1,10 @@ > # SPDX-License-Identifier: GPL-2.0+ > > +ifneq ($(CONFIG_ARCH_OCTEON),y) > head-y := arch/mips/cpu/start.o > +else > +head-y := arch/mips/mach-octeon/start.o > +endif > > ifeq ($(CONFIG_SPL_BUILD),y) > ifneq ($(CONFIG_SPL_START_S_PATH),) > @@ -17,6 +21,7 @@ machine-$(CONFIG_ARCH_JZ47XX) += jz47xx > machine-$(CONFIG_MACH_PIC32) += pic32 > machine-$(CONFIG_ARCH_MTMIPS) += mtmips > machine-$(CONFIG_ARCH_MSCC) += mscc > +machine-${CONFIG_ARCH_OCTEON} += octeon > > machdirs := $(patsubst %,arch/mips/mach-%/,$(machine-y)) > libs-y += $(machdirs) > @@ -30,6 +35,7 @@ arch-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 > -Wa,-mips32r6 > arch-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,-mips64 > arch-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,-mips64r2 > arch-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,-mips64r6 > +arch-${CONFIG_CPU_MIPS64_OCTEON} += -march=octeon3 > > # Allow extra optimization for specific CPUs/SoCs > tune-$(CONFIG_MIPS_TUNE_4KC) += -mtune=4kc > @@ -37,6 +43,7 @@ tune-$(CONFIG_MIPS_TUNE_14KC) += -mtune=14kc > tune-$(CONFIG_MIPS_TUNE_24KC) += -mtune=24kc > tune-$(CONFIG_MIPS_TUNE_34KC) += -mtune=34kc > tune-$(CONFIG_MIPS_TUNE_74KC) += -mtune=74kc > +tune-${CONFIG_MIPS_TUNE_OCTEON3} += -mtune=octeon3 > > # Include default header files > cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic > diff --git a/arch/mips/cpu/Makefile b/arch/mips/cpu/Makefile > index 6df7bb4e48..732015d6f3 100644 > --- a/arch/mips/cpu/Makefile > +++ b/arch/mips/cpu/Makefile > @@ -1,6 +1,8 @@ > # SPDX-License-Identifier: GPL-2.0+ > > -extra-y = start.o > +ifneq ($(CONFIG_ARCH_OCTEON),y) > +extra-y = start.o > +endif > > obj-y += time.o > obj-y += interrupts.o > diff --git a/arch/mips/include/asm/arch-octeon/cavm-reg.h > b/arch/mips/include/asm/arch-octeon/cavm-reg.h > new file mode 100644 > index 0000000000..b961e54956 > --- /dev/null > +++ b/arch/mips/include/asm/arch-octeon/cavm-reg.h > @@ -0,0 +1,42 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (C) 2020 Marvell International Ltd. > + */ > + > +#ifndef __CAVM_REG_H__ > + > +/* Register offsets */ > +#define CAVM_CIU_FUSE ((u64 *)0x80010100000001a0) > +#define CAVM_MIO_BOOT_REG_CFG0 ((u64 *)0x8001180000000000) > +#define CAVM_RST_BOOT ((u64 *)0x8001180006001600) > + > +/* Register structs */ > + > +/** > + * Register (RSL) rst_boot > + * > + * RST Boot Register > + */ > +union cavm_rst_boot { > + u64 u; > + struct cavm_rst_boot_s { > + u64 chipkill : 1; > + u64 jtcsrdis : 1; > + u64 ejtagdis : 1; > + u64 romen : 1; > + u64 ckill_ppdis : 1; > + u64 jt_tstmode : 1; > + u64 vrm_err : 1; > + u64 reserved_37_56 : 20; > + u64 c_mul : 7; > + u64 pnr_mul : 6; > + u64 reserved_21_23 : 3; > + u64 lboot_oci : 3; > + u64 lboot_ext : 6; > + u64 lboot : 10; > + u64 rboot : 1; > + u64 rboot_pin : 1; > + } s; > +}; > + > +#endif /* __CAVM_REG_H__ */ > diff --git a/arch/mips/include/asm/arch-octeon/clock.h > b/arch/mips/include/asm/arch-octeon/clock.h > new file mode 100644 > index 0000000000..a844a222c9 > --- /dev/null > +++ b/arch/mips/include/asm/arch-octeon/clock.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (C) 2018, 2019 Marvell International Ltd. > + * > + * https://spdx.org/licenses > + */ > + > +#ifndef __CLOCK_H__ > + > +/** System PLL reference clock */ > +#define PLL_REF_CLK 50000000 /* 50 MHz */ > +#define NS_PER_REF_CLK_TICK (1000000000 / PLL_REF_CLK) > + > +/** > + * Returns the I/O clock speed in Hz > + */ > +u64 octeon_get_io_clock(void); > + > +/** > + * Returns the core clock speed in Hz > + */ > +u64 octeon_get_core_clock(void); > + > +#endif /* __CLOCK_H__ */ > diff --git a/arch/mips/mach-octeon/Kconfig b/arch/mips/mach-octeon/Kconfig > new file mode 100644 > index 0000000000..67fcb6058c > --- /dev/null > +++ b/arch/mips/mach-octeon/Kconfig > @@ -0,0 +1,92 @@ > +menu "Octeon platforms" > + depends on ARCH_OCTEON > + > +config SYS_SOC > + string > + default "octeon" > + > +config OCTEON_CN7XXX > + bool "Octeon CN7XXX SoC" > + > +config OCTEON_CN70XX > + bool "Octeon CN70XX SoC" > + select OCTEON_CN7XXX > + > +config OCTEON_CN73XX > + bool "Octeon CN73XX SoC" > + select OCTEON_CN7XXX > + > +config OCTEON_CN78XX > + bool "Octeon CN78XX SoC" > + select OCTEON_CN7XXX > + > +choice > + prompt "Octeon MIPS family select" > + > +config SOC_OCTEON2 > + bool "Octeon II family" > + help > + This selects the Octeon II SoC family this should be added later when needed > + > +config SOC_OCTEON3 > + bool "Octeon III family" > + help > + This selects the Octeon III SoC family CN70xx, CN73XX, CN78xx > + and CNF75XX. > + > +endchoice > + > +config SYS_DCACHE_SIZE > + default 32768 > + > +config SYS_DCACHE_LINE_SIZE > + default 128 > + > +config SYS_ICACHE_SIZE > + default 79872 > + > +config SYS_ICACHE_LINE_SIZE > + default 128 > + > +config OCTEON_BIG_STACK_SIZE > + hex > + default 0x4000 > + help > + This enables a larger stack needed for Octeon 3 DRAM initialization. > + If this is disabled then a part of the L1 cache will be reserved for > + the stack, resulting in a smaller image. If this is true then > + a portion of the TEXT address space will be reserved for the stack. > + Note that this requires that U-Boot MUST be able to fit entirely > + within the L2 cache and cannot be executed from a parallel NOR flash. > + The default size is 16KiB. > + > +config OCTEON_COPY_FROM_FLASH_TO_L2 > + bool > + default y > + help > + Set this for U-Boot to attempt to copy itself from flash memory into > + the L2 cache. This significantly improvess the boot performance. > + > +config OCTEON_L2_MEMCPY_IN_CACHE > + bool > + default y > + help > + If this is set then the memcpy code that is used to copy U-Boot from > + the flash to the L2 cache is written to the L2 cache. This > + significantly speeds up the memcpy operation. > + > +config OCTEON_L2_UBOOT_ADDR > + hex > + default 0xffffffff81000000 > + help > + This specifies the address where U-Boot will be copied into the L2 > + cache. > + > +config OCTEON_L2_MEMCPY_ADDR > + hex > + default 0xffffffff81400000 > + help > + This specifies where U-Boot will place the memcpy routine used for > + copying U-Boot from flash to L2 cache. > + > +endmenu > diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile > new file mode 100644 > index 0000000000..a5fda682a7 > --- /dev/null > +++ b/arch/mips/mach-octeon/Makefile > @@ -0,0 +1,10 @@ > +# (C) Copyright 2019 Marvell, Inc. > +# > +# SPDX-License-Identifier: GPL-2.0+ > +# > + > +extra-y = start.o > + > +obj-y += clock.o > +obj-y += cpu.o > +obj-y += dram.o > diff --git a/arch/mips/mach-octeon/clock.c b/arch/mips/mach-octeon/clock.c > new file mode 100644 > index 0000000000..6e32008641 > --- /dev/null > +++ b/arch/mips/mach-octeon/clock.c > @@ -0,0 +1,22 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2018, 2019 Marvell International Ltd. > + */ > + > +#include <common.h> > +#include <asm/arch/clock.h> > + > +DECLARE_GLOBAL_DATA_PTR; > + > +int octeon_get_timer_freq(void) > +{ > + return gd->cpu_clk; > +} > + > +/** > + * Returns the I/O clock speed in Hz > + */ > +u64 octeon_get_io_clock(void) > +{ > + return gd->bus_clk; > +} > diff --git a/arch/mips/mach-octeon/cpu.c b/arch/mips/mach-octeon/cpu.c > new file mode 100644 > index 0000000000..a1373c6d56 > --- /dev/null > +++ b/arch/mips/mach-octeon/cpu.c > @@ -0,0 +1,55 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * Copyright (C) 2020 Marvell International Ltd. > + */ > + > +#include <common.h> > +#include <linux/io.h> > +#include <asm/arch/clock.h> > +#include <asm/arch-octeon/cavm-reg.h> > + > +DECLARE_GLOBAL_DATA_PTR; > + > +static int get_clocks(void) > +{ > + const u64 ref_clock = PLL_REF_CLK; > + union cavm_rst_boot rst_boot; > + > + rst_boot.u = ioread64(CAVM_RST_BOOT); > + gd->cpu_clk = ref_clock * rst_boot.s.c_mul; > + gd->bus_clk = ref_clock * rst_boot.s.pnr_mul; > + > + debug("%s: cpu: %lu, bus: %lu\n", __func__, gd->cpu_clk, gd->bus_clk); > + > + return 0; > +} > + > +/* Early mach init code run from flash */ > +int mach_cpu_init(void) > +{ > + /* Remap boot-bus 0x1fc0.0000 -> 0x1f40.0000 */ > + /* ToDo: Move this to an early running bus (bootbus) DM driver */ > + clrsetbits_be64(CAVM_MIO_BOOT_REG_CFG0, 0xffff, 0x1f40); > + > + /* Get clocks and store them in GD */ > + get_clocks(); > + > + return 0; > +} > + > +/** > + * Returns number of cores > + * > + * @return number of CPU cores for the specified node > + */ > +static int cavm_octeon_num_cores(void) > +{ > + return fls64(ioread64(CAVM_CIU_FUSE) & 0xffffffffffff); > +} > + > +int print_cpuinfo(void) > +{ > + printf("SoC: Octeon CN73xx (%d cores)\n", cavm_octeon_num_cores()); > + > + return 0; > +} > diff --git a/arch/mips/mach-octeon/dram.c b/arch/mips/mach-octeon/dram.c > new file mode 100644 > index 0000000000..c16a73e8e6 > --- /dev/null > +++ b/arch/mips/mach-octeon/dram.c > @@ -0,0 +1,27 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * Copyright (C) 2020 Marvell International Ltd. > + */ > + > +#include <common.h> > +#include <dm.h> > +#include <ram.h> > + > +DECLARE_GLOBAL_DATA_PTR; > + > +int dram_init(void) > +{ > + /* > + * No DDR init yet -> run in L2 cache > + */ > + gd->ram_size = (2 << 20); > + gd->bd->bi_dram[0].size = gd->ram_size; > + gd->bd->bi_dram[1].size = 0; > + > + return 0; > +} > + > +ulong board_get_usable_ram_top(ulong total_size) > +{ > + return gd->ram_top; > +} > diff --git a/arch/mips/mach-octeon/include/ioremap.h > b/arch/mips/mach-octeon/include/ioremap.h > new file mode 100644 > index 0000000000..59b75008a2 > --- /dev/null > +++ b/arch/mips/mach-octeon/include/ioremap.h > @@ -0,0 +1,30 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __ASM_MACH_OCTEON_IOREMAP_H > +#define __ASM_MACH_OCTEON_IOREMAP_H > + > +#include <linux/types.h> > + > +/* > + * Allow physical addresses to be fixed up to help peripherals located > + * outside the low 32-bit range -- generic pass-through version. > + */ > +static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, > + phys_addr_t size) > +{ > + return phys_addr; > +} > + > +static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long > size, > + unsigned long flags) > +{ > + return (void __iomem *)(XKPHYS | offset); > +} > + > +static inline int plat_iounmap(const volatile void __iomem *addr) > +{ > + return 0; > +} > + > +#define _page_cachable_default _CACHE_CACHABLE_NONCOHERENT > + > +#endif /* __ASM_MACH_OCTEON_IOREMAP_H */ > diff --git a/arch/mips/mach-octeon/start.S b/arch/mips/mach-octeon/start.S > new file mode 100644 > index 0000000000..acb967201a > --- /dev/null > +++ b/arch/mips/mach-octeon/start.S > @@ -0,0 +1,1241 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +/* > + * Startup Code for OCTEON 64-bit CPU-core > + * > + * Copyright (c) 2003 Wolfgang Denk <[email protected]> > + * Copyright 2004, 2005, 2010 - 2015 Cavium Inc.. > + */ > + > +#include <asm-offsets.h> > +#include <config.h> > +#include <asm/regdef.h> > +#include <asm/mipsregs.h> > +#include <asm/asm.h> > + > +#define BOOT_VECTOR_NUM_WORDS 8 > + > +#define OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET 0x70 > +#define OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET 0x78 > + > +#define OCTEON_BOOT_MOVEABLE_MAGIC1_RAW 0xdb00110ad358eacd > +#define OCTEON_BOOT_MOVEABLE_MAGIC1 OCTEON_BOOT_MOVEABLE_MAGIC1_RAW > + > +#define OCTEON_CIU_SOFT_RST 0x8001070000000740 > + > +#define OCTEON_L2C_WPAR_PP0 0x8001180080840000 > +#define OCTEON_MIO_BOOT_BASE 0x8001180000000000 > +#define OCTEON_MIO_BOOT_REG_CFG0_OFF 0x0000 > +#define OCTEON_MIO_BOOT_LOC_CFG0_OFF 0x0080 > +#define OCTEON_MIO_BOOT_LOC_ADR_OFF 0x0090 > +#define OCTEON_MIO_BOOT_LOC_DAT_OFF 0x0098 > +#define OCTEON_MIO_RST_BOOT 0x8001180000001600 > +#define OCTEON_MIO_BOOT_REG_CFG0 0x8001180000000000 > +#define OCTEON_MIO_BOOT_REG_TIM0 0x8001180000000040 > +#define OCTEON_MIO_BOOT_LOC_CFG0 0x8001180000000080 > +#define OCTEON_MIO_BOOT_LOC_ADR 0x8001180000000090 > +#define OCTEON_MIO_BOOT_LOC_DAT 0x8001180000000098 > +#define OCTEON_MIO_FUSE_DAT3 0x8001180000001418 > +#define OCTEON_L2D_FUS3 0x80011800800007B8 > +#define OCTEON_LMC0_DDR_PLL_CTL 0x8001180088000258 > + > +#define OCTEON_RST 0x8001180006000000 > +#define OCTEON_RST_BOOT_OFFSET 0x1600 > +#define OCTEON_RST_SOFT_RST_OFFSET 0x1680 > +#define OCTEON_RST_COLD_DATAX_OFFSET(X) (0x17C0 + (X) * 8) > +#define OCTEON_RST_BOOT 0x8001180006001600 > +#define OCTEON_RST_SOFT_RST 0x8001180006001680 > +#define OCTEON_RST_COLD_DATAX(X) (0x80011800060017C0 + (X) * 8) > + > +#define OCTEON_OCX_COM_NODE 0x8001180011000000 > +#define OCTEON_L2C_OCI_CTL 0x8001180080800020 > +#define OCTEON_L2C_TAD_CTL 0x8001180080800018 > +#define OCTEON_L2C_CTL 0x8001180080800000 > + > +#define OCTEON_DBG_DATA 0x80011F00000001E8 > +#define OCTEON_PCI_READ_CMD_E 0x80011F0000001188 > +#define OCTEON_NPEI_DBG_DATA 0x80011F0000008510 > +#define OCTEON_CIU_WDOG(X) (0x8001070000000500 + (X) * 8) > +#define OCTEON_CIU_PP_POKE(X) (0x8001070000000580 + (X) * 8) > +#define OCTEON_CIU3_WDOG(X) (0x8001010000020000 + (X) * 8) > +#define OCTEON_CIU3_PP_POKE(X) (0x8001010000030000 + (X) * 8) > +#define OCTEON_OCX_COM_LINKX_CTL(X) (0x8001180011000020 + (X) * 8) > +#define OCTEON_SLI_CTL_STATUS 0x80011F0000028570 > +#define OCTEON_GSERX_SCRATCH(X) (0x8001180090000020 + (X) * > 0x1000000) > + > +/** PRID for CN56XX */ > +#define OCTEON_PRID_CN56XX 0x04 > +/** PRID for CN52XX */ > +#define OCTEON_PRID_CN52XX 0x07 > +/** PRID for CN63XX */ > +#define OCTEON_PRID_CN63XX 0x90 > +/** PRID for CN68XX */ > +#define OCTEON_PRID_CN68XX 0x91 > +/** PRID for CN66XX */ > +#define OCTEON_PRID_CN66XX 0x92 > +/** PRID for CN61XX */ > +#define OCTEON_PRID_CN61XX 0x93 > +/** PRID for CNF71XX */ > +#define OCTEON_PRID_CNF71XX 0x94 > +/** PRID for CN78XX */ > +#define OCTEON_PRID_CN78XX 0x95 > +/** PRID for CN70XX */ > +#define OCTEON_PRID_CN70XX 0x96 > +/** PRID for CN73XX */ > +#define OCTEON_PRID_CN73XX 0x97 > +/** PRID for CNF75XX */ > +#define OCTEON_PRID_CNF75XX 0x98 > + > +/* func argument is used to create a mark, must be unique */ > +#define GETOFFSET(reg, func) \ > + .balign 8; \ > + bal func ##_mark; \ > + nop; \ > + .dword .; \ > +func ##_mark: \ > + ld reg, 0(ra); \ > + dsubu reg, ra, reg; > + > +#define JAL(func) \ > + .balign 8; \ > + bal func ##_mark; \ > + nop; \ > + .dword .; \ > +func ##_mark: \ > + ld t8, 0(ra); \ > + dsubu t8, ra, t8; \ > + dla t9, func; \ > + daddu t9, t9, t8; \ > + jalr t9; \ > + nop; > + > + .set arch=octeon3 > + .set noreorder > + > + .macro uhi_mips_exception > + move k0, t9 # preserve t9 in k0 > + move k1, a0 # preserve a0 in k1 > + li t9, 15 # UHI exception operation > + li a0, 0 # Use hard register context > + sdbbp 1 # Invoke UHI operation > + .endm > + > + .macro setup_stack_gd > + li t0, -16 > + PTR_LI t1, big_stack_start > + and sp, t1, t0 # force 16 byte alignment > + PTR_SUBU \ > + sp, sp, GD_SIZE # reserve space for gd > + and sp, sp, t0 # force 16 byte alignment > + move k0, sp # save gd pointer > +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ > + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) > + li t2, CONFIG_VAL(SYS_MALLOC_F_LEN) > + PTR_SUBU \ > + sp, sp, t2 # reserve space for early malloc > + and sp, sp, t0 # force 16 byte alignment > +#endif > + move fp, sp > + > + /* Clear gd */ > + move t0, k0 > +1: > + PTR_S zero, 0(t0) > + PTR_ADDIU t0, PTRSIZE > + blt t0, t1, 1b > + nop > + > +#if CONFIG_VAL(SYS_MALLOC_F_LEN) && \ > + !CONFIG_IS_ENABLED(INIT_STACK_WITHOUT_MALLOC_F) > + PTR_S sp, GD_MALLOC_BASE(k0) # gd->malloc_base offset > +#endif > + .endm > + > +/* Saved register usage: > + * s0: not used > + * s1: not used > + * s2: Address U-Boot loaded into in L2 cache > + * s3: Start address > + * s4: flags > + * 1: booting from RAM > + * 2: executing out of cache > + * 4: booting from flash > + * s5: u-boot size (data end - _start) > + * s6: offset in flash. > + * s7: _start physical address > + * s8: > + */ > + > +ENTRY(_start) > + /* U-Boot entry point */ > + b reset > + > + /* The above jump instruction/nop are considered part of the > + * bootloader_header_t structure but are not changed when the header is > + * updated. > + */ > + > + /* Leave room for bootloader_header_t header at start of binary. This > + * header is used to identify the board the bootloader is for, what > + * address it is linked at, failsafe/normal, etc. It also contains a > + * CRC of the entire image. > + */ > + > +#if defined(CONFIG_ROM_EXCEPTION_VECTORS) > + /* > + * Exception vector entry points. When running from ROM, an exception > + * cannot be handled. Halt execution and transfer control to debugger, > + * if one is attached. > + */ > + .org 0x200 > + /* TLB refill, 32 bit task */ > + uhi_mips_exception > + > + .org 0x280 > + /* XTLB refill, 64 bit task */ > + uhi_mips_exception > + > + .org 0x300 > + /* Cache error exception */ > + uhi_mips_exception > + > + .org 0x380 > + /* General exception */ > + uhi_mips_exception > + > + .org 0x400 > + /* Catch interrupt exceptions */ > + uhi_mips_exception > + > + .org 0x480 > + /* EJTAG debug exception */ > +1: b 1b > + nop > + > + .org 0x500 > +#endif > + > +/* Reserve extra space so that when we use the boot bus local memory > + * segment to remap the debug exception vector we don't overwrite > + * anything useful > + */ > + > +/* Basic exception handler (dump registers) in all ASM. When using the > TLB for > + * mapping u-boot C code, we can't branch to that C code for exception > handling > + * (TLB is disabled for some exceptions. > + */ > + > +/* RESET/start here */ > + .balign 8 > +reset: > + nop > + synci 0(zero) > + mfc0 k0, CP0_STATUS > + ori k0, 0x00E0 /* enable 64 bit mode for CSR access */ > + mtc0 k0, CP0_STATUS > + > + /* Save the address we're booting from, strip off low bits */ > + bal 1f > + nop > +1: > + move s3, ra > + dins s3, zero, 0, 12 > + > + /* Disable boot bus moveable regions */ > + PTR_LI k0, OCTEON_MIO_BOOT_LOC_CFG0 > + sd zero, 0(k0) > + sd zero, 8(k0) > + > + /* Disable the watchdog timer > + * First we check if we're running on CN78XX, CN73XX or CNF75XX to see > + * if we use CIU3 or CIU. > + */ > + mfc0 t0, CP0_PRID > + ext t0, t0, 8, 8 > + /* Assume CIU */ > + PTR_LI t1, OCTEON_CIU_WDOG(0) > + PTR_LI t2, OCTEON_CIU_PP_POKE(0) > + blt t0, OCTEON_PRID_CN78XX, wd_use_ciu > + nop > + beq t0, OCTEON_PRID_CN70XX, wd_use_ciu > + nop > + /* Use CIU3 */ > + PTR_LI t1, OCTEON_CIU3_WDOG(0) > + PTR_LI t2, OCTEON_CIU3_PP_POKE(0) > +wd_use_ciu: > + sd zero, 0(t2) /* Pet the dog */ > + sd zero, 0(t1) /* Disable watchdog timer */ > + > + /* Errata: CN76XX has a node ID of 3. change it to zero here. > + * This needs to be done before we relocate to L2 as addresses change > + * For 76XX pass 1.X we need to zero out the OCX_COM_NODE[ID], > + * L2C_OCI_CTL[GKSEGNODE] and CP0 of Root.CvmMemCtl2[KSEGNODE]. > + */ > + mfc0 a4, CP0_PRID > + /* Check for 78xx pass 1.x processor ID */ > + andi a4, 0xffff > + blt a4, (OCTEON_PRID_CN78XX << 8), 1f > + nop > + > + /* Zero out alternate package for now */ > + dins a4, zero, 6, 1 > + bge a4, ((OCTEON_PRID_CN78XX << 8) | 0x08), 1f > + nop > + > + /* 78xx or 76xx here, first check for bug #27141 */ > + PTR_LI a5, OCTEON_SLI_CTL_STATUS > + ld a6, 0(a5) > + andi a7, a4, 0xff > + andi a6, a6, 0xff > + > + beq a6, a7, not_bug27141 > + nop > + > + /* core 0 proc_id rev_id field does not match SLI_CTL_STATUS rev_id */ > + /* We just hit bug #27141. Need to reset the chip and try again */ > + > + PTR_LI a4, OCTEON_RST_SOFT_RST > + ori a5, zero, 0x1 /* set the reset bit */ > + > +reset_78xx_27141: > + sync > + synci 0(zero) > + cache 9, 0(zero) > + sd a5, 0(a4) > + wait > + b reset_78xx_27141 > + nop > + > +not_bug27141: > + /* 76XX pass 1.x has the node number set to 3 */ > + mfc0 a4, CP0_EBASE > + ext a4, a4, 0, 10 > + bne a4, 0x180, 1f /* Branch if not node 3 core 0 */ > + nop > + > + /* Clear OCX_COM_NODE[ID] */ > + PTR_LI a5, OCTEON_OCX_COM_NODE > + ld a4, 0(a5) > + dins a4, zero, 0, 2 > + sd a4, 0(a5) > + ld zero, 0(a5) > + > + /* Clear L2C_OCI_CTL[GKSEGNODE] */ > + PTR_LI a5, OCTEON_L2C_OCI_CTL > + ld a4, 0(a5) > + dins a4, zero, 4, 2 > + sd a4, 0(a5) > + ld zero, 0(a5) > + > + /* Clear CP0 Root.CvmMemCtl2[KSEGNODE] */ > + dmfc0 a4, CP0_CVMMEMCTL2 > + dins a4, zero, 12, 2 > + dmtc0 a4, CP0_CVMMEMCTL2 > + > + /* Put the flash address in the start of the EBASE register to > + * enable our exception handler but only for core 0. > + */ > + mfc0 a4, CP0_EBASE > + dext a4, a4, 0, 10 > + bnez a4, no_flash > + /* OK in delay slot */ > + dext a6, a6, 0, 16 /* Get the base address in flash */ > + sll a6, a6, 16 > + mtc0 a6, CP0_EBASE /* Enable exceptions */ > + > +no_flash: > + /* Zero out various registers */ > + mtc0 zero, CP0_DEPC > + mtc0 zero, CP0_EPC > + mtc0 zero, CP0_CAUSE > + mfc0 a4, CP0_PRID > + ext a4, a4, 8, 8 > + mtc0 zero, CP0_DESAVE > + > + /* The following are only available on Octeon 2 or later */ > + mtc0 zero, CP0_KSCRATCH1 > + mtc0 zero, CP0_KSCRATCH2 > + mtc0 zero, CP0_KSCRATCH3 > + mtc0 zero, CP0_USERLOCAL > + > + /* Turn off ROMEN bit to disable ROM */ > + PTR_LI a1, OCTEON_MIO_RST_BOOT > + /* For OCTEON 3 we use RST_BOOT instead of MIO_RST_BOOT. > + * The difference is bits 24-26 are 6 instead of 0 for the address. > + */ > + /* For Octeon 2 and CN70XX we can ignore the watchdog */ > + blt a4, OCTEON_PRID_CN78XX, watchdog_ok > + nop > + > + PTR_LI a1, OCTEON_RST_BOOT > + > + beq a4, OCTEON_PRID_CN70XX, watchdog_ok > + nop > + > + ld a2, 0(a1) > + /* There is a bug where some registers don't get properly reset when > + * the watchdog timer causes a reset. In this case we need to force > + * a reset. > + */ > + bbit0 a2, 11, watchdog_ok /* Skip if watchdog not hit */ > + dins a2, zero, 2, 18 /* Don't clear LBOOT, LBOOT_EXT or LBOOT_OCI */ > + /* Clear bit indicating reset due to watchdog */ > + ori a2, 1 << 11 > + sd a2, 0(a1) > + > + /* Disable watchdog */ > + PTR_LI a1, OCTEON_CIU3_PP_POKE(0) > + sd zero, 0(a1) > + PTR_LI a1, OCTEON_CIU3_WDOG(0) > + sd zero, 0(a1) > + > + /* Record this in the GSER0_SCRATCH register in bit 11 */ > + PTR_LI a1, OCTEON_GSERX_SCRATCH(0) > + ld a2, 0(a1) > + ori a2, 1 << 11 > + sd a2, 0(a1) > + > + PTR_LI a1, OCTEON_RST_SOFT_RST > + li a2, 1 > + sd a2, 0(a1) > + wait > + > + /* We should never get here */ > + > +watchdog_ok: > + ld a2, 0(a1) > + /* Don't clear LBOOT/LBOOT_EXT or LBOOT_OCI */ > + dins a2, zero, 2, 18 > + dins a2, zero, 60, 1 /* Clear ROMEN bit */ > + sd a2, 0(a1) > + > + /* Start of Octeon setup */ > + > + /* Check what core we are - if core 0, branch to init tlb > + * loop in flash. Otherwise, look up address of init tlb > + * loop that was saved in the boot vector block. > + */ > + mfc0 a0, CP0_EBASE > + andi a0, EBASE_CPUNUM /* get core */ > + beqz a0, InitTLBStart_local > + nop > + > + break > + /* We should never get here - non-zero cores now go directly to > + * tlb init from the boot stub in movable region. > + */ > + > + .globl InitTLBStart > +InitTLBStart: > +InitTLBStart_local: > + /* If we don't have working memory yet configure a bunch of > + * scratch memory, and set the stack pointer to the top > + * of it. This allows us to go to C code without having > + * memory set up > + * > + * Warning: do not change SCRATCH_STACK_LINES as this can impact the > + * transition from start.S to crti.asm. crti requires 590 bytes of > + * stack space. > + */ > + cache 1,0(zero) /* Clear Dcache so cvmseg works right */ > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + rdhwr v0, $0 > + bnez v0, 1f > + nop > + PTR_LA sp, big_stack_start - 16 > + b stack_clear_done > + nop > +1: > +#endif > +#define SCRATCH_STACK_LINES 0x36 /* MAX is 0x36 */ > + dmfc0 v0, CP0_CVMMEMCTL > + dins v0, zero, 0, 9 > + /* setup SCRATCH_STACK_LINES scratch lines of scratch */ > + ori v0, 0x100 | SCRATCH_STACK_LINES > + dmtc0 v0, CP0_CVMMEMCTL > + /* set stack to top of scratch memory */ > + li sp, 0xffffffffffff8000 + (SCRATCH_STACK_LINES * 128) > + /* Clear scratch for CN63XX pass 2.0 errata Core-15169*/ > + li t0, 0xffffffffffff8000 > +clear_scratch: > + sd zero, 0(t0) > + addiu t0, 8 > + bne t0, sp, clear_scratch > + nop > + > + /* This code run on all cores - core 0 from flash, > + * the rest from DRAM. When booting from PCI, non-zero cores > + * come directly here from the boot vector - no earlier code in this > + * file is executed. > + */ > + > + /* Some generic initialization is done here as well, as we need this > + * done on all cores even when booting from PCI > + */ > +stack_clear_done: > + /* Clear watch registers. */ > + mtc0 zero, CP0_WATCHLO > + mtc0 zero, CP0_WATCHHI > + > + /* STATUS register */ > + mfc0 k0, CP0_STATUS > + li k1, ~ST0_IE > + and k0, k1 > + mtc0 k0, CP0_STATUS > + > + /* CAUSE register */ > + mtc0 zero, CP0_CAUSE > + > + /* Init Timer */ > + dmtc0 zero, CP0_COUNT > + dmtc0 zero, CP0_COMPARE > + > + > + mfc0 a5, CP0_STATUS > + li v0, 0xE0 /* enable 64 bit mode for CSR access */ > + or v0, v0, a5 > + mtc0 v0, CP0_STATUS > + > + > + dli v0, 1 << 29 /* Enable large physical address support in TLB */ > + mtc0 v0, CP0_PAGEGRAIN > + > +InitTLB: > + dmtc0 zero, CP0_ENTRYLO0 > + dmtc0 zero, CP0_ENTRYLO1 > + mtc0 zero, CP0_PAGEMASK > + dmtc0 zero, CP0_CONTEXT > + /* Use an offset into kseg0 so we won't conflict with Mips1 legacy > + * TLB clearing > + */ > + PTR_LI v0, 0xFFFFFFFF90000000 > + mfc0 a0, CP0_CONFIG1 > + srl a0, a0, 25 > + /* Check if config4 reg present */ > + mfc0 a1, CP0_CONFIG3 > + bbit0 a1, 31, 2f > + and a0, a0, 0x3F /* a0 now has the max mmu entry index */ > + mfc0 a1, CP0_CONFIG4 > + bbit0 a1, 14, 2f /* check config4[MMUExtDef] */ > + nop > + /* append config4[MMUSizeExt] to most significant bit of > + * config1[MMUSize-1] > + */ > + ins a0, a1, 6, 8 > + and a0, a0, 0x3fff /* a0 now includes max entries for cn6xxx */ > +2: > + dmtc0 zero, CP0_XCONTEXT > + mtc0 zero, CP0_WIRED > + > +InitTLBloop: > + dmtc0 v0, CP0_ENTRYHI > + tlbp > + mfc0 v1, CP0_INDEX > + daddiu v0, v0, 1<<13 > + bgez v1, InitTLBloop > + > + mtc0 a0, CP0_INDEX > + tlbwi > + bnez a0, InitTLBloop > + daddiu a0, -1 > + > + mthi zero > + mtlo zero > + > + /* Set up status register */ > + mfc0 v0, CP0_STATUS > + /* Enable COP0 and COP2 access */ > + li a4, (1 << 28) | (1 << 30) > + or v0, a4 > + > + /* Must leave BEV set here, as DRAM is not configured for core 0. > + * Also, BEV must be 1 later on when the exception base address is set. > + */ > + > + /* Mask all interrupts */ > + ins v0, zero, 0, 16 > + /* Clear NMI (used to start cores other than core 0) */ > + ori v0, 0xE4 /* enable 64 bit, disable interrupts */ > + mtc0 v0, CP0_STATUS > + > + dli v0,0xE000000F /* enable all readhw locations */ > + mtc0 v0, CP0_HWRENA > + > + dmfc0 v0, CP0_CVMCTL > + ori v0, 1<<14 /* enable fixup of unaligned mem access */ > + dmtc0 v0, CP0_CVMCTL > + > + /* Setup scratch memory. This is also done in > + * cvmx_user_app_init, and this code will be removed > + * from the bootloader in the near future. > + */ > + > + /* Set L2C_LAD_CTL[MAXLFB] = 0 on CN73XX */ > + mfc0 a4, CP0_PRID > + ext a4, a4, 8, 8 > + blt a4, OCTEON_PRID_CN73XX, 72f > + nop > + PTR_LI v0, OCTEON_L2C_TAD_CTL > + ld t1, 0(v0) > + dins t1, zero, 0, 4 > + sd t1, 0(v0) > + ld zero, 0(v0) > + > +72: > + > + /* clear these to avoid immediate interrupt in noperf mode */ > + dmtc0 zero, CP0_COMPARE /* clear timer interrupt */ > + dmtc0 zero, CP0_COUNT /* clear timer interrupt */ > + dmtc0 zero, CP0_PERF_CNT0 /* clear perfCnt0 */ > + dmtc0 zero, CP0_PERF_CNT1 /* clear perfCnt1 */ > + dmtc0 zero, CP0_PERF_CNT2 > + dmtc0 zero, CP0_PERF_CNT3 > + > + /* If we're running on a node other than 0 then we need to set KSEGNODE > + * to 0. The nice thing with this code is that it also autodetects if > + * we're running on a processor that supports CVMMEMCTL2 or not since > + * only processors that have this will have a non-zero node ID. Because > + * of this there's no need to check if we're running on a 78XX. > + */ > + mfc0 t1, CP0_EBASE > + dext t1, t1, 7, 3 /* Extract node number */ > + beqz t1, is_node0 /* If non-zero then we're not node 0 */ > + nop > + dmfc0 t1, CP0_CVMMEMCTL2 > + dins t1, zero, 12, 4 > + dmtc0 t1, CP0_CVMMEMCTL2 > +is_node0: > + > + /* Set up TLB mappings for u-boot code in flash. */ > + > + /* Use a bal to get the current PC into ra. Since this bal is to > + * the address immediately following the delay slot, the ra is > + * the address of the label. We then use this to get the actual > + * address that we are executing from. > + */ > + bal __dummy > + nop > + > +__dummy: > + /* Get the actual address that we are running at */ > + PTR_LA a6, _start /* Linked address of _start */ > + PTR_LA a7, __dummy > + dsubu t0, a7, a6 /* offset of __dummy label from _start*/ > + dsubu a7, ra, t0 /* a7 now has actual address of _start*/ > + > + /* Save actual _start address in s7. This is where we > + * are executing from, as opposed to where the code is > + * linked. > + */ > + move s7, a7 > + move s4, zero > + > + /* s7 has actual address of _start. If this is > + * on the boot bus, it will be between 0xBFC000000 and 0xBFFFFFFF. > + * If it is on the boot bus, use 0xBFC00000 as the physical address > + * for the TLB mapping, as we will be adjusting the boot bus > + * to make this adjustment. > + * If we are running from DRAM (remote-boot), then we want to use the > + * real address in DRAM. > + */ > + > + /* Check to see if we are running from flash - we expect that to > + * be 0xffffffffb0000000-0xffffffffbfffffff > + * (0x10000000-0x1fffffff, unmapped/uncached) > + */ > + dli t2, 0xffffffffb0000000 > + dsubu t2, s7 > + slt s4, s7, t2 > + bltz t2, uboot_in_flash > + nop > + > + /* If we're not core 0 then we don't care about cache */ > + mfc0 t2, CP0_EBASE > + andi t2, EBASE_CPUNUM > + bnez t2, uboot_in_ram > + nop > + > + /* Find out if we're OCTEON I or OCTEON + which don't support running > + * out of cache. > + */ > + mfc0 t2, CP0_PRID > + ext t2, t2, 8, 8 > + li s4, 1 > + blt t2, 0x90, uboot_in_ram > + nop > + > + /* U-Boot can be executing either in RAM or L2 cache. Now we need to > + * check if DRAM is initialized. The way we do that is to look at > + * the reset bit of the LMC0_DDR_PLL_CTL register (bit 7) > + */ > + PTR_LI t2, OCTEON_LMC0_DDR_PLL_CTL > + ld t2, 0(t2) > + bbit1 t2, 7, uboot_in_ram > + nop > + > + /* We must be executing out of cache */ > + b uboot_in_ram > + li s4, 2 > + > +uboot_in_flash: > + /* Set s4 to 4 to indicate we're running in FLASH */ > + li s4, 4 > + > +#if defined(CONFIG_OCTEON_DISABLE_L2_CACHE_INDEX_ALIASING) > + /* By default, L2C index aliasing is enabled. In some cases it may > + * need to be disabled. The L2C index aliasing can only be disabled > + * if U-Boot is running out of L2 cache and the L2 cache has not been > + * used to store anything. > + */ > + PTR_LI t1, OCTEON_L2C_CTL > + ld t2, 0(t1) > + ori t2, 1 > + sd t2, 0(t1) > +#endif > + > + /* Use BFC00000 as physical address for TLB mappings when booting > + * from flash, as we will adjust the boot bus mappings to make this > + * mapping correct. > + */ > + dli a7, 0xFFFFFFFFBFC00000 > + dsubu s6, s7, a7 /* Save flash offset in s6 */ > + > +#if defined(CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2) > + /* For OCTEON II we check to see if the L2 cache is big enough to hold > + * U-Boot. If it is big enough then we copy ourself from flash to the > + * L2 cache in order to speed up execution. > + */ > + > + /* Check for OCTEON 2 */ > + mfc0 t1, CP0_PRID > + ext t1, t1, 8, 8 > + /* Get number of L2 cache sets */ > + beq t1, OCTEON_PRID_CNF71XX, got_l2_sets /* CNF71XX */ > + li t2, 1 << 9 > + beq t1, OCTEON_PRID_CN78XX, got_l2_sets /* CN78XX */ > + li t2, 1 << 13 > + beq t1, OCTEON_PRID_CN70XX, got_l2_sets /* CN70XX */ > + li t2, 1 << 10 > + beq t1, OCTEON_PRID_CN73XX, got_l2_sets /* CN73XX */ > + li t2, 1 << 11 > + beq t1, OCTEON_PRID_CNF75XX, got_l2_sets /* CNF75XX */ > + li t2, 1 << 11 > + b l2_cache_too_small /* Unknown OCTEON model */ > + nop > + > +got_l2_sets: > + /* Get number of associations */ > + PTR_LI t0, OCTEON_MIO_FUSE_DAT3 > + ld t0, 0(t0) > + dext t0, t0, 32, 3 > + > + beq t1, OCTEON_PRID_CN70XX, process_70xx_l2sets > + nop > + /* 0 = 16-way, 1 = 12-way, 2 = 8-way, 3 = 4-way, 4-7 reserved */ > + beqz t0, got_l2_ways > + li t3, 16 > + beq t0, 1, got_l2_ways > + li t3, 12 > + beq t0, 2, got_l2_ways > + li t3, 8 > + beq t0, 3, got_l2_ways > + li t3, 4 > + b l2_cache_too_small > + nop > + > +process_70xx_l2sets: > + /* For 70XX, the number of ways is defined as: > + * 0 - full cache (4-way) 512K > + * 1 - 3/4 ways (3-way) 384K > + * 2 - 1/2 ways (2-way) 256K > + * 3 - 1/4 ways (1-way) 128K > + * 4-7 illegal (aliased to 0-3) > + */ > + andi t0, 3 > + beqz t0, got_l2_ways > + li t3, 4 > + beq t0, 1, got_l2_ways > + li t3, 3 > + beq t0, 2, got_l2_ways > + li t3, 2 > + li t3, 1 > + > +got_l2_ways: > + dmul a1, t2, t3 /* Calculate cache size */ > + dsll a1, 7 /* Ways * Sets * cache line sz (128) */ > + daddiu a1, a1, -128 /* Adjust cache size for copy code */ > + > + /* Calculate size of U-Boot image */ > + /* > + * "uboot_end - _start" is not correct, as the image also > + * includes the DTB appended to the end (OF_EMBED is deprecated). > + * Lets use a defined max for now here. > + */ > + PTR_LI s5, CONFIG_BOARD_SIZE_LIMIT > + > + daddu t2, s5, s7 /* t2 = end address */ > + daddiu t2, t2, 127 > + ins t2, zero, 0, 7 /* Round up to cache line for memcpy */ > + > + slt t1, a1, s5 /* See if we're bigger than the L2 cache */ > + bnez t1, l2_cache_too_small > + nop > + /* Address we plan to load at in the L2 cache */ > + PTR_LI t9, CONFIG_OCTEON_L2_UBOOT_ADDR > +# ifdef CONFIG_OCTEON_L2_MEMCPY_IN_CACHE > + /* Enable all ways for PP0. Authentik ROM may have disabled these */ > + PTR_LI a1, OCTEON_L2C_WPAR_PP0 > + sd zero, 0(a1) > + > + /* Address to place our memcpy code */ > + PTR_LI a0, CONFIG_OCTEON_L2_MEMCPY_ADDR > + /* The following code writes a simple memcpy routine into the cache > + * to copy ourself from flash into the L2 cache. This makes the > + * memcpy routine a lot faster since each instruction can potentially > + * require four read cycles to flash over the boot bus. > + */ > + /* Zero cache line in the L2 cache */ > + zcb (a0) > + synci 0(zero) > + dli a1, 0xdd840000dd850008 /* ld a0, 0(t0); ld a1, 8(t0) */ > + sd a1, 0(a0) > + dli a1, 0xdd860010dd870018 /* ld a2, 16(t0); ld a3, 24(t0) */ > + sd a1, 8(a0) > + dli a1, 0xfda40000fda50008 /* sd a0, 0(t1); sd a1, 8(t1) */ > + sd a1, 16(a0) > + dli a1, 0xfda60010fda70018 /* sd a2, 16(t1); sd a3, 24(t1) */ > + sd a1, 24(a0) > + dli a1, 0x258c0020158efff6 /* addiu t0, 32; bne t0, t2, -40 */ > + sd a1, 32(a0) > + dli a1, 0x25ad002003e00008 /* addiu t1, 32; jr ra */ > + sd a1, 40(a0) > + sd zero, 48(a0) /* nop; nop */ > + > + /* Synchronize the caches */ > + sync > + synci 0(zero) > + > + move t0, s7 > + move t1, t9 > + > + /* Do the memcpy operation in L2 cache to copy ourself from flash > + * to the L2 cache. > + */ > + jalr a0 > + nop > + > +# else > + /* Copy ourself to the L2 cache from flash, 32 bytes at a time */ > + /* This code is now written to the L2 cache using the code above */ > +1: > + ld a0, 0(t0) > + ld a1, 8(t0) > + ld a2, 16(t0) > + ld a3, 24(t0) > + sd a0, 0(t1) > + sd a1, 8(t1) > + sd a2, 16(t1) > + sd a3, 24(t1) > + addiu t0, 32 > + bne t0, t2, 1b > + addiu t1, 32 > +# endif /* CONFIG_OCTEON_L2_MEMCPY_IN_CACHE */ > + > + /* Adjust the start address of U-Boot and the global pointer */ > + subu t0, s7, t9 /* t0 = address difference */ > + move s7, t9 /* Update physical address */ > + move s2, t9 > + sync > + synci 0(zero) > + > + /* Now we branch to the L2 cache. We first get our PC then adjust it > + */ > + bal 3f > + nop > +3: > + /* Don't add any instructions here! */ > + subu t9, ra, t0 > + /* Give ourself 16 bytes */ > + addiu t9, 0x10 > + > + jal t9 /* Branch to address in L2 cache */ > + > + nop > + nop > + /* Add instructions after here */ > + > + move a7, s7 > + > + b uboot_in_ram > + ori s4, 2 /* Running out of L2 cache */ > + > +l2_cache_too_small: /* We go here if we can't copy ourself to L2 */ > +#endif /* CONFIG_OCTEON_COPY_FROM_FLASH_TO_L2 */ > + > + /* This code is only executed if booting from flash. */ > + /* For flash boot (_not_ RAM boot), we do a workaround for > + * an LLM errata on CN38XX and CN58XX parts. > + */ > + > +uboot_in_ram: > + /* U-boot address is now in reg a7, and is 4 MByte aligned. > + * (boot bus addressing has been adjusted to make this happen for flash, > + * and for DRAM this alignment must be provided by the remote boot > + * utility. > + */ > + /* See if we're in KSEG0 range, if so set EBASE register to handle > + * exceptions. > + */ > + dli a1, 0x20000000 > + bge a7, a1, 1f > + nop > + /* Convert our physical address to KSEG0 */ > + PTR_LI a1, 0xffffffff80000000 > + or a1, a1, a7 > + mtc0 a1, CP0_EBASE > +1: > + /* U-boot now starts at 0xBFC00000. Use a single 4 MByte TLB mapping > + * to map u-boot. > + */ > + move a0, a6 /* Virtual addr in a0 */ > + dins a0, zero, 0, 16 /* Zero out offset bits */ > + move a1, a7 /* Physical addr in a1 */ > + > + /* Now we need to remove the MIPS address space bits. For this we > + * need to determine if it is a 32 bit compatibility address or not. > + */ > + > + /* 'lowest' address in compatibility space */ > + PTR_LI t0, 0xffffffff80000000 > + dsubu t0, t0, a1 > + bltz t0, compat_space > + nop > + > + /* We have a xkphys address, so strip off top bit */ > + b addr_fixup_done > + dins a1, zero, 63, 1 > + > +compat_space: > + PTR_LI a2, 0x1fffffff > + and a1, a1, a2 /* Mask phy addr to remove address space bits */ > + > +addr_fixup_done: > + /* Currenty the u-boot image size is limited to 4 MBytes. In order to > + * support larger images the flash mapping will need to be changed to > + * be able to access more than that before C code is run. Until that > + * is done, we just use a 4 MByte mapping for the secondary cores as > + * well. > + */ > + /* page size (only support 4 Meg binary size for now for core 0) > + * This limitation is due to the fact that the boot vector is > + * 0xBFC00000 which only makes 4MB available. Later more flash > + * address space will be available after U-Boot has been copied to > + * RAM. For now assume that it is in flash. > + */ > + li a2, 2*1024*1024 > + > + mfc0 a4, CP0_EBASE > + andi a4, EBASE_CPUNUM /* get core */ > + beqz a4, core_0_tlb > + nop > + > + /* Now determine how big a mapping to use for secondary cores, > + * which need to map all of u-boot + heap in DRAM > + */ > + /* Here we look at the alignment of the the physical address, > + * and use the largest page size possible. In some cases > + * this can result in an oversize mapping, but for secondary cores > + * this mapping is very short lived. > + */ > + > + /* Physical address in a1 */ > + li a2, 1 > +1: > + sll a2, 1 > + and a5, a1, a2 > + beqz a5, 1b > + nop > + > + /* a2 now contains largest page size we can use */ > +core_0_tlb: > + JAL(single_tlb_setup) > + > + /* Check if we're running from cache */ > + bbit1 s4, 1, uboot_in_cache > + nop > + > + /* If we are already running from ram, we don't need to muck > + * with boot bus mappings. > + */ > + PTR_LI t2, 0xffffffffb0000000 > + dsubu t2, s7 > + /* See if our starting address is lower than the boot bus */ > + bgez t2, uboot_in_ram2 /* If yes, booting from RAM */ > + nop > + > +uboot_in_cache: > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + /* The large stack is only for core 0. For all other cores we need to > + * use the L1 cache otherwise the other cores will stomp on top of each > + * other unless even more space is reserved for the stack space for > + * each core. With potentially 96 cores this gets excessive. > + */ > + mfc0 v0, CP0_EBASE > + andi a0, EBASE_CPUNUM > + bnez a0, no_big_stack > + nop > + PTR_LA sp, big_stack_start > + daddiu sp, -16 > + > +no_big_stack: > +#endif > + /* We now have the TLB set up, so we need to remap the boot bus. > + * This is tricky, as we are running from flash, and will be changing > + * the addressing of the flash. > + */ > + /* Enable movable boot bus region 0, at address 0x10000000 */ > + PTR_LI a4, OCTEON_MIO_BOOT_BASE > + dli a5, 0x81000000 /* EN + base address 0x11000000 */ > + sd a5, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) > + > + /* Copy code to that remaps the boot bus to movable region */ > + sd zero, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + > + PTR_LA a6, change_boot_mappings > + GETOFFSET(a5, change_boot_mappings); > + daddu a5, a5, a6 > + > + /* The code is 16 bytes (2 DWORDS) */ > + ld a7, 0(a5) > + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + ld a7, 8(a5) > + sd a7, OCTEON_MIO_BOOT_LOC_DAT_OFF(a4) > + > + /* Read from an RML register to ensure that the previous writes have > + * completed before we branch to the movable region. > + */ > + ld zero, OCTEON_MIO_BOOT_LOC_CFG0_OFF(a4) > + > + /* Compute value for boot bus configuration register */ > + /* Read region 0 config so we can _modify_ the base address field */ > + PTR_LI a4, OCTEON_MIO_BOOT_REG_CFG0 /* region 0 config */ > + ld a0, 0(a4) > + dli a4, 0xf0000000 /* Mask off bits we want to save */ > + and a4, a4, a0 > + dli a0, 0x0fff0000 /* Force size to max */ > + or a4, a4, a0 > + > + move a5, s6 > + /* Convert to 64k blocks, as used by boot bus config */ > + srl a5, 16 > + li a6, 0x1fc0 /* 'normal' boot bus base config value */ > + subu a6, a6, a5 /* Subtract offset */ > + /* combine into register value to pass to boot bus routine */ > + or a0, a4, a6 > + > + /* Branch there */ > + PTR_LA a1, __mapped_continue_label > + PTR_LI a2, OCTEON_MIO_BOOT_REG_CFG0 > + /* If region 0 is not enabled we can skip it */ > + ld a4, 0(a2) > + bbit0 a4, 31, __mapped_continue_label > + nop > + li a4, 0x10000000 > + j a4 > + synci 0(zero) > + > + /* We never get here, as we go directly to __mapped_continue_label */ > + break > + > + > +uboot_in_ram2: > + > + /* Now jump to address in TLB mapped memory to continue execution */ > + PTR_LA a4, __mapped_continue_label > + synci 0(a4) > + j a4 > + nop > + > +__mapped_continue_label: > + /* Check if we are core 0, if we are not then we need > + * to vector to code in DRAM to do application setup, and > + * skip the rest of the bootloader. Only core 0 runs the bootloader > + * and sets up the tables that the other cores will use for > + * configuration. > + */ > + mfc0 a0, CP0_EBASE > + andi a0, EBASE_CPUNUM /* get core */ > + /* if (__all_cores_are_equal==0 && core==0), > + * then jump to execute BL on core 0; else 'go to next line' > + * (core_0_cont1 is executed ONLY when k0=a0=0(core0_ID)) > + */ > + lw t0, __all_cores_are_equal > + beq a0, t0, core_0_cont1 > + nop > + > + /* other cores look up addr from dram */ > + /* DRAM controller already set up by first core */ > + li a1, (BOOT_VECTOR_NUM_WORDS * 4) > + mul a0, a0, a1 > + > + /* Now find out the boot vector base address from the moveable boot > + * bus region. > + */ > + > + /* Get the address of the boot bus moveable region */ > + PTR_LI t8, OCTEON_MIO_BOOT_BASE > + ld t9, OCTEON_MIO_BOOT_LOC_CFG0_OFF(t8) > + /* Make sure it's enabled */ > + bbit0 t9, 31, invalid_boot_vector > + dext t9, t9, 3, 24 > + dsll t9, t9, 7 > + /* Make address XKPHYS */ > + li t0, 1 > + dins t9, t0, 63, 1 > + > + ld t0, OCTEON_BOOT_MOVEABLE_MAGIC_OFFSET(t9) > + dli t1, OCTEON_BOOT_MOVEABLE_MAGIC1 > + bne t0, t1, invalid_boot_vector > + nop > + > + /* Load base address of boot vector table */ > + ld t0, OCTEON_BOOT_VECTOR_MOVEABLE_OFFSET(t9) > + /* Add offset for core */ > + daddu a1, t0, a0 > + > + mfc0 v0, CP0_STATUS > + move v1, v0 > + ins v1, zero, 19, 1 /* Clear NMI bit */ > + mtc0 v1, CP0_STATUS > + > + /* Get app start function address */ > + lw t9, 8(a1) > + beqz t9, invalid_boot_vector > + nop > + > + j t9 > + lw k0, 12(a1) /* Load global data (deprecated) */ > + > +invalid_boot_vector: > + wait > + b invalid_boot_vector > + nop > + > +__all_cores_are_equal: > + /* The following .word tell if 'all_cores_are_equal' or core0 is special > + * By default (for the first execution) the core0 should be special, > + * in order to behave like the old(existing not-modified) bootloader > + * and run the bootloader on core 0 to follow the existing design. > + * However after that we make 'all_cores_equal' which allows to run SE > + * applications on core0 like on any other core. NOTE that value written > + * to '__all_cores_are_equal' should not match any core ID. > + */ > + .word 0 > + > +core_0_cont1: > + li t0, 0xffffffff > + sw t0, __all_cores_are_equal > + /* From here on, only core 0 runs, other cores have branched > + * away. > + */ > +#ifdef CONFIG_MIPS_INIT_STACK_IN_SRAM > + /* Set up initial stack and global data */ > + setup_stack_gd > +# ifdef CONFIG_DEBUG_UART > + PTR_LA t9, debug_uart_init > + jalr t9 > + nop > +# endif > +#endif > + move a0, zero # a0 <-- boot_flags = 0 > + PTR_LA t9, board_init_f > + > + jr t9 > + move ra, zero > + END(_start) > + > + .balign 8 > + .globl single_tlb_setup > + .ent single_tlb_setup > + /* Sets up a single TLB entry. Virtual/physical addresses > + * must be properly aligned. > + * a0 Virtual address > + * a1 Physical address > + * a2 page (_not_ mapping) size > + */ > +single_tlb_setup: > + /* Determine the number of TLB entries available, and > + * use the top one. > + */ > + mfc0 a3, CP0_CONFIG1 > + dext a3, a3, 25, 6 /* a3 now has the max mmu entry index */ > + mfc0 a5, CP0_CONFIG3 /* Check if config4 reg present */ > + bbit0 a5, 31, single_tlb_setup_cont > + nop > + mfc0 a5, CP0_CONFIG4 > + bbit0 a5, 14, single_tlb_setup_cont /* check config4[MMUExtDef] */ > + nop > + /* append config4[MMUSizeExt] to most significant bit of > + * config1[MMUSize-1] > + */ > + dins a3, a5, 6, 8 > + and a3, a3, 0x3fff /* a3 now includes max entries for cn6xxx */ > + > +single_tlb_setup_cont: > + > + /* Format physical address for entry low */ > + nop > + dsrl a1, a1, 12 > + dsll a1, a1, 6 > + ori a1, a1, 0x7 /* set DVG bits */ > + > + move a4, a2 > + daddu a5, a4, a4 /* mapping size */ > + dsll a6, a4, 1 > + daddiu a6, a6, -1 /* pagemask */ > + dsrl a4, a4, 6 /* adjust for adding with entrylo */ > + > + /* Now set up mapping */ > + mtc0 a6, CP0_PAGEMASK > + mtc0 a3, CP0_INDEX > + > + dmtc0 a1, CP0_ENTRYLO0 > + daddu a1, a1, a4 > + > + dmtc0 a1, CP0_ENTRYLO1 > + daddu a1, a1, a4 > + > + dmtc0 a0, CP0_ENTRYHI > + daddu a0, a0, a5 > + > + ehb > + tlbwi > + jr ra > + nop > + .end single_tlb_setup > + > + > +/** > + * This code is moved to a movable boot bus region, > + * and it is responsible for changing the flash mappings and > + * jumping to run from the TLB mapped address. > + * > + * @param a0 New address for boot bus region 0 > + * @param a1 Address to branch to afterwards > + * @param a2 Address of MIO_BOOT_REG_CFG0 > + */ > + .balign 8 > +change_boot_mappings: > + sd a0, 0(a2) > + sync > + j a1 /* Jump to new TLB mapped location */ > + synci 0(zero) > + > +/* If we need a large stack, allocate it here. */ > +#if CONFIG_OCTEON_BIG_STACK_SIZE > + /* Allocate the stack here so it's in L2 cache or DRAM */ > + .balign 16 > +big_stack_end: > + .skip CONFIG_OCTEON_BIG_STACK_SIZE, 0 > +big_stack_start: > + .dword 0 > +#endif > -- - Daniel

