Module Name:    src
Committed By:   marty
Date:           Wed Nov 25 04:03:34 UTC 2015

Added Files:
        src/sys/arch/arm/cortex: cortex_init.S

Log Message:
something temporary that will go away once odroid xu4 works


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/arch/arm/cortex/cortex_init.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Added files:

Index: src/sys/arch/arm/cortex/cortex_init.S
diff -u /dev/null src/sys/arch/arm/cortex/cortex_init.S:1.1
--- /dev/null	Wed Nov 25 04:03:34 2015
+++ src/sys/arch/arm/cortex/cortex_init.S	Wed Nov 25 04:03:34 2015
@@ -0,0 +1,780 @@
+/*	$NetBSD: cortex_init.S,v 1.1 2015/11/25 04:03:34 marty Exp $	*/
+/*-
+ * Copyright (c) 2012 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Matt Thomas of 3am Software Foundry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_cpuoptions.h"
+#include "opt_cputypes.h"
+#include "opt_multiprocessor.h"
+
+#include <arm/asm.h>
+#include <arm/armreg.h>
+#include <arm/cortex/scu_reg.h>
+#include "assym.h"
+
+#define A15	0xf
+//#define MPDEBUG
+
+// Marco to call routines in .text
+#if defined(KERNEL_BASES_EQUAL)
+#define CALL(f)		bl	_C_LABEL(f)
+#else
+#define	CALL(f)	\
+	movw	ip, #:lower16:_C_LABEL(f); \
+	movt	ip, #:upper16:_C_LABEL(f); \
+	sub	ip, ip, #KERNEL_BASE_VOFFSET; \
+	blx	ip
+#endif
+
+
+// We'll modify va and pa at run time so we can use relocatable addresses.
+#define MMU_INIT(va,pa,n_sec,attr) \
+	.word	((va) & 0xffffffff)|(n_sec)		    ; \
+	.word	((pa) & 0xffffffff)|(attr)		    ; \
+
+// Set up a preliminary mapping in the MMU to allow us to run at KERNEL_BASE
+// with caches on.  If we are MULTIPROCESSOR, save the TTB address.
+//
+arm_boot_l1pt_init:
+#if defined(MULTIPROCESSOR)
+	movw	r3, #:lower16:cortex_mmuinfo
+	movt	r3, #:upper16:cortex_mmuinfo
+#if !defined(KERNEL_BASES_EQUAL)
+	sub	r3, r3, #KERNEL_BASE_VOFFSET
+#endif
+	str	r0, [r3]
+
+	// Make sure the info makes into memory
+	mcr	p15, 0, r3, c7, c10, 1		// writeback the cache line
+	dsb
+#endif
+
+	mov	ip, r1			// save mmu table addr
+	// Build page table from scratch
+	mov	r1, r0			// Start address to clear memory.
+	// Zero the entire table so all virtual addresses are invalid.
+	add	r2, r1, #L1_TABLE_SIZE	// Ending address
+	mov	r4, #0
+	mov	r5, #0
+	mov	r6, #0
+	mov	r7, #0
+1:	stmia	r1!, {r4-r7}		// 16 bytes at a time
+	cmp	r1, r2
+	blt	1b
+
+	// Now create our entries per the mmu_init_table.
+	l1table	.req r0
+	va	.req r1
+	pa	.req r2
+	n_sec	.req r3
+	attr	.req r4
+	itable	.req r5
+
+	mov	attr, #0
+	mrc	p15, 0, r3, c0, c0, 5	// MPIDR read
+	cmp	r3, #0			// not zero?
+	movne	attr, #L1_S_V6_S	//    yes, shareable attribute
+	mov	itable, ip		// reclaim table address
+	b	3f
+
+2:	str	pa, [l1table, va, lsl #2]
+	add	va, va, #1
+	add	pa, pa, #(L1_S_SIZE)
+	subs	n_sec, n_sec, #1
+	bhi	2b
+
+3:	ldmia	itable!, {va, pa}
+	// Convert va to l1 offset:	va = 4 * (va >> L1_S_SHIFT)
+	ubfx	n_sec, va, #0, #L1_S_SHIFT
+	lsr	va, va, #L1_S_SHIFT
+
+	// Do we need add sharing for this?
+	tst	pa, #(L1_S_C|L1_S_B)	// is this entry cacheable?
+	orrne	pa, pa, attr		// add sharing
+
+4:	cmp	n_sec, #0
+	bne	2b
+	bx	lr			// return
+
+	.unreq	va
+	.unreq	pa
+	.unreq	n_sec
+	.unreq	attr
+	.unreq	itable
+	.unreq	l1table
+
+//
+// Coprocessor register initialization values
+//
+#if defined(CPU_CORTEXA8)
+#undef CPU_CONTROL_SWP_ENABLE		// not present on A8
+#define CPU_CONTROL_SWP_ENABLE		0
+#endif
+#ifdef __ARMEL__
+#define CPU_CONTROL_EX_BEND_SET		0
+#else
+#define CPU_CONTROL_EX_BEND_SET		CPU_CONTROL_EX_BEND
+#endif
+#ifdef ARM32_DISABLE_ALIGNMENT_FAULTS
+#define CPU_CONTROL_AFLT_ENABLE_CLR	CPU_CONTROL_AFLT_ENABLE
+#define CPU_CONTROL_AFLT_ENABLE_SET	0
+#else
+#define CPU_CONTROL_AFLT_ENABLE_CLR	0
+#define CPU_CONTROL_AFLT_ENABLE_SET	CPU_CONTROL_AFLT_ENABLE
+#endif
+
+// bits to set in the Control Register
+//
+#define CPU_CONTROL_SET \
+	(CPU_CONTROL_MMU_ENABLE		|	\
+	 CPU_CONTROL_AFLT_ENABLE_SET	|	\
+	 CPU_CONTROL_DC_ENABLE		|	\
+	 CPU_CONTROL_SWP_ENABLE		|	\
+	 CPU_CONTROL_BPRD_ENABLE	|	\
+	 CPU_CONTROL_IC_ENABLE		|	\
+	 CPU_CONTROL_EX_BEND_SET	|	\
+	 CPU_CONTROL_UNAL_ENABLE)
+
+// bits to clear in the Control Register
+//
+#define CPU_CONTROL_CLR \
+	(CPU_CONTROL_AFLT_ENABLE_CLR)
+
+arm_cpuinit:
+	// Because the MMU may already be on do a typical sequence to set
+	// the Translation Table Base(s).
+	mov	ip, lr
+	mov	r10, r0			// save TTBR
+	mov	r1, #0
+
+	mcr     p15, 0, r1, c7, c5, 0	// invalidate I cache
+
+	mrc	p15, 0, r2, c1, c0, 0	// SCTLR read
+	movw	r1, #(CPU_CONTROL_DC_ENABLE|CPU_CONTROL_IC_ENABLE)
+	bic	r2, r2, r1		// clear I+D cache enable
+
+#ifdef __ARMEB__
+	// SCTLR.EE determines the endianness of translation table lookups.
+	// So we need to make sure it's set before starting to use the new
+	// translation tables (which are big endian).
+	//
+	orr	r2, r2, #CPU_CONTROL_EX_BEND
+	bic	r2, r2, #CPU_CONTROL_MMU_ENABLE
+	pli	[pc, #32]		// preload the next few cachelines
+	pli	[pc, #64]
+	pli	[pc, #96]
+	pli	[pc, #128]
+#endif
+
+	mcr	p15, 0, r2, c1, c0, 0	// SCTLR write
+
+	XPUTC(#'F')
+	dsb				// Drain the write buffers.
+
+	XPUTC(#'G')
+	mrc	p15, 0, r1, c0, c0, 5	// MPIDR read
+	cmp	r1, #0
+	orrlt	r10, r10, #TTBR_MPATTR	// MP, cachable (Normal WB)
+	orrge	r10, r10, #TTBR_UPATTR	// Non-MP, cacheable, normal WB
+	XPUTC(#'0')
+	mcr	p15, 0, r10, c2, c0, 0	// TTBR0 write
+#if defined(ARM_MMU_EXTENDED)
+	// When using split TTBRs, we need to set both since the physical
+	// addresses we were/are using might be in either.
+	XPUTC(#'1')
+	mcr	p15, 0, r10, c2, c0, 1	// TTBR1 write
+#endif
+
+	XPUTC(#'H')
+#if defined(ARM_MMU_EXTENDED)
+	XPUTC(#'1')
+	mov	r1, #TTBCR_S_N_1	// make sure TTBCR_S_N is 1
+#else
+	XPUTC(#'0')
+	mov	r1, #0			// make sure TTBCR is 0
+#endif
+	mcr	p15, 0, r1, c2, c0, 2	// TTBCR write
+
+	isb
+
+#if !defined(CPU_CORTEXA5)
+	XPUTC(#'I')
+	mov	r1, #0
+	mcr	p15, 0, r1, c8, c7, 0	// TLBIALL (just this core)
+	dsb
+	isb
+#endif
+
+	XPUTC(#'J')
+	mov	r1, #0			// get KERNEL_PID
+	mcr	p15, 0, r1, c13, c0, 1	// CONTEXTIDR write
+
+	// Set the Domain Access register.  Very important!
+	XPUTC(#'K')
+	mov     r1, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
+	mcr	p15, 0, r1, c3, c0, 0	// DACR write
+
+	//
+	// Enable the MMU, etc.
+	//
+	XPUTC(#'L')
+	mrc	p15, 0, r1, c1, c0, 0	// SCTLR read
+
+	movw	r3, #:lower16:CPU_CONTROL_SET
+	movt	r3, #:upper16:CPU_CONTROL_SET
+	orr	r0, r1, r3
+#if defined(CPU_CONTROL_CLR) && (CPU_CONTROL_CLR != 0)
+	bic	r0, r0, #CPU_CONTROL_CLR
+#endif
+
+	pli	1f
+	dsb
+
+	// turn mmu on!
+	//
+	mov	r0, r0			// fetch instruction cacheline
+1:	mcr	p15, 0, r0, c1, c0, 0	// SCTLR write
+
+	// Ensure that the coprocessor has finished turning on the MMU.
+	//
+	mrc	p15, 0, r0, c0, c0, 0	// Read an arbitrary value.
+	mov	r0, r0			// Stall until read completes.
+	XPUTC(#'M')
+
+	bx	ip			// return
+
+	.p2align 2
+
+#if defined(VERBOSE_INIT_ARM) && defined(XPUTC_COM)
+#define TIMO		0x25000
+#ifndef COM_MULT
+#define COM_MULT	1
+#endif
+xputc:
+	mov	r2, #TIMO
+#ifdef CONADDR
+	movw	r3, #:lower16:CONADDR
+	movt	r3, #:upper16:CONADDR
+#elif defined(CONSADDR)
+	movw	r3, #:lower16:CONSADDR
+	movt	r3, #:upper16:CONSADDR
+#endif
+1:
+#if COM_MULT == 1
+	ldrb	r1, [r3, #(COM_LSR*COM_MULT)]
+#else
+#if COM_MULT == 2
+	ldrh	r1, [r3, #(COM_LSR*COM_MULT)]
+#elif COM_MULT == 4
+	ldr	r1, [r3, #(COM_LSR*COM_MULT)]
+#endif
+#ifdef COM_BSWAP
+	lsr	r1, r1, #(COM_MULT-1)*8
+#endif
+#endif
+	tst	r1, #LSR_TXRDY
+	bne	2f
+	subs	r2, r2, #1
+	bne	1b
+2:
+#if COM_MULT == 1
+	strb	r0, [r3, #COM_DATA]
+#else
+#ifdef COM_BSWAP
+	lsl	r0, r0, #(COM_MULT-1)*8
+#endif
+#if COM_MULT == 2
+	strh	r0, [r3, #COM_DATA]
+#else
+	str	r0, [r3, #COM_DATA]
+#endif
+#endif
+
+	mov	r2, #TIMO
+3:
+#if COM_MULT == 1
+	ldrb	r1, [r3, #(COM_LSR*COM_MULT)]
+#else
+#if COM_MULT == 2
+	ldrh	r1, [r3, #(COM_LSR*COM_MULT)]
+#elif COM_MULT == 4
+	ldr	r1, [r3, #(COM_LSR*COM_MULT)]
+#endif
+#ifdef COM_BSWAP
+	lsr	r1, r1, #(COM_MULT-1)*8
+#endif
+#endif
+	tst	r1, #LSR_TSRE
+	bne	4f
+	subs	r2, r2, #1
+	bne	3b
+4:
+	bx	lr
+#endif /* VERBOSE_INIT_ARM */
+
+//
+// Perform the initialization of the Cortex core required by NetBSD.
+//
+//
+cortex_init:
+	mov	r10, lr				// save lr
+
+	cpsid	if, #PSR_SVC32_MODE		// SVC32 with no interrupts
+        mov	r0, #0
+        msr	spsr_sxc, r0			// set SPSR[23:8] to known value
+
+	mrc	p15, 0, r0, c0, c0, 0		// MIDR read
+	ubfx	r0, r0, #4, #4			// extract cortex part.
+	mov	r5, r0				// Save it for use
+	XPUTC(#'@')
+	
+	mrc	p15, 0, r4, c1, c0, 0		// SCTLR read
+#if defined(CPU_CORTEXA7) || defined(CPU_CORTEXA15) || defined(CPU_CORTEXA17)
+	//
+	// Before turning on SMP, turn off the caches and the MMU.
+	//
+	dsb
+	movw	r1,#(CPU_CONTROL_IC_ENABLE|CPU_CONTROL_DC_ENABLE\
+			|CPU_CONTROL_MMU_ENABLE)
+	bic	r0, r4, r1			// disable icache/dcache/mmu
+	mcr	p15, 0, r0, c1, c0, 0		// SCTLR write
+	dsb
+	isb
+#endif
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		// toss i-cache
+
+#if defined(CPU_CORTEXA5) || defined(CPU_CORTEXA9)
+	//
+	// Step 1a, invalidate the all cache tags in all ways on the SCU.
+	//
+	XPUTC(#'A')
+#if defined(ARM_CBAR)
+	movw	r3, #:lower16:ARM_CBAR
+	movt	r3, #:upper16:ARM_CBAR
+#else
+	mrc	p15, 4, r3, c15, c0, 0		// read cbar
+#endif
+#ifdef __ARMEB__
+	setend	le
+#endif
+	mrc	p15, 0, r0, c0, c0, 5		// MPIDR get
+	and	r0, r0, #3			// get our cpu numder
+	lsl	r0, r0, #2			// adjust to cpu num shift
+	mov	r1, #0xf			// select all ways
+	lsl	r1, r1, r0			// shift into place
+	str	r1, [r3, #SCU_INV_ALL_REG]	// write scu invalidate all
+#ifdef __ARMEB__
+	setend	be
+#endif
+	dsb
+	isb
+#endif
+
+	//
+	// Step 1b, invalidate the data cache
+	//
+	XPUTC(#'B')
+	CALL(armv7_dcache_wbinv_all)
+	XPUTC(#'C')
+
+	//
+	// Check to see if we are really MP before enabling SMP mode
+	//
+	mrc	p15, 0, r1, c0, c0, 5		// MPIDR get
+	ubfx	r1, r1, #30, #2			// get MP bits
+	cmp	r1, #2				// is it MP?
+	bxne	r10				//    no, return
+
+	XPUTC(#'D')
+#if !defined(CPU_CORTEXA7) && !defined(CPU_CORTEXA17)
+	//
+	// Step 2, disable the data cache
+	//
+	mrc	p15, 0, r2, c1, c0, 0		// SCTLR read
+	bic	r2, r2, #CPU_CONTROL_DC_ENABLE	// clear data cache enable
+	mcr	p15, 0, r2, c1, c0, 0		// SCTLR write
+	isb
+	XPUTC(#'1')
+#endif
+
+#if defined(CPU_CORTEXA5) || defined(CPU_CORTEXA9)
+	//
+	// Step 3, enable the SCU
+	//
+#if defined(ARM_CBAR)
+	movw	r3, #:lower16:ARM_CBAR
+	movt	r3, #:upper16:ARM_CBAR
+#else
+	mrc	p15, 4, r3, c15, c0, 0		// read cbar
+#endif
+#ifdef __ARMEB__
+	setend	le
+#endif
+	ldr	r1, [r3, #SCU_CTL]		// read scu control
+	orr	r1, r1, #SCU_CTL_SCU_ENA	// set scu enable flag
+	str	r1, [r3, #SCU_CTL]		// write scu control
+#ifdef __ARMEB__
+	setend	be
+#endif
+	dsb
+	isb
+	XPUTC(#'2')
+#endif /* CORTEXA5 || CORTEXA9 */
+
+#if defined(CPU_CORTEXA7) || defined(CPU_CORTEXA15) || defined(CPU_CORTEXA17)
+	//
+	// The MMU is off.  Make sure the TLB is invalidated before
+	// turning on SMP.
+	//
+	mov	r0, #0
+	mcr	p15, 0, r1, c8, c7, 0	// TLBIALL (just this core)
+#endif
+
+	// For the A7, SMP must be on ldrex/strex to work.
+	//
+#if defined(MULTIPROCESSOR)
+#if defined(CPU_CORTEXA5) || defined(CPU_CORTEXA7) || defined(CPU_CORTEXA9) || defined(CPU_CORTEXA15) || defined(CPU_CORTEXA17)
+	//
+	// Step 4a, set ACTLR.SMP=1
+	//
+	mrc	p15, 0, r0, c1, c0, 1		// ACTLR read
+	orr	r0, r0, #CORTEXA9_AUXCTL_SMP	// enable SMP
+
+#if defined(CPU_CORTEXA15)
+	// The A15 requires snoop-delayed exclusive handling to be set
+	// if there are 3 or more CPUs.
+	tst	r5, #A15	      		// make sure we've got an a15
+	bne	1f
+	mrc	p15, 1, r2, c9, c0, 2		// L2CTRL read
+	ubfx	r2, r2, #25, #1			// bit 25 is set when 3+ CPUs
+	bfi	r0, r2, #31, #1			// copy it to bit 31 in ACTRL
+1:
+#endif
+
+#if defined(TEGRAK1_PMAP_WORKAROUND)
+	orr	r0, r0, #CORTEXA15_ACTLR_IOBEU
+#endif
+
+#if defined(CPU_CORTEXA5) || defined(CPU_CORTEXA9)
+	//
+	// Step 4a (continued on A5/A9), ACTLR.FW=1)
+	//
+	orr	r0, r0, #CORTEXA9_AUXCTL_FW	// enable cache/tlb/coherency
+#endif	/* A5 || A9 */
+#if defined(CPU_CORTEXA9)
+	//
+	// Step 4b (continued on A9), ACTLR.L2PE=1)
+	//
+	orr	r0, r0, #CORTEXA9_AUXCTL_L2PE	// enable L2 cache prefetch
+#endif
+
+	mcr	p15, 0, r0, c1, c0, 1		// ACTLR write
+	isb
+	dsb
+#endif	/* A5 || A7 || A9 || A15 || A17 */
+#endif	/* MULTIPROCESSOR */
+
+	//
+	// Step 4b, restore SCTLR (enable the data cache)
+	//
+	orr	r4, r4, #CPU_CONTROL_IC_ENABLE	// enable icache
+	orr	r4, r4, #CPU_CONTROL_DC_ENABLE	// enable dcache
+	mcr	p15, 0, r4, c1, c0, 0		// SCTLR write
+
+	isb
+	XPUTC(#'-')
+
+	bx	r10
+ASEND(cortex_init)
+
+#ifdef MULTIPROCESSOR
+	.pushsection .data
+	.align	2
+	.globl	cortex_mmuinfo
+	.type	cortex_mmuinfo,%object
+cortex_mmuinfo:
+	.space	4
+//
+// If something goes wrong in the inital mpstartup, catch and record it.
+//
+#ifdef MPDEBUG
+	.globl	cortex_mpfault
+	.type	cortex_mpfault,%object
+cortex_mpfault:
+	.space	16		// PC, LR, FSR, FAR
+#endif
+	.popsection
+#endif // MULTIPROCESSOR
+
+// Secondary processors come here after exiting the SKU ROM.
+// Switches to kernel's endian almost immediately.
+//
+
+	.global	cortex_mpstart
+	.type	cortex_mpstart,%object
+
+cortex_mpstart:
+#ifndef MULTIPROCESSOR
+	//
+	// If not MULTIPROCESSOR, drop CPU into power saving state.
+	//
+3:	wfi
+	b	3b
+#else
+#ifdef __ARMEB__
+	setend	be				// switch to BE now
+#endif
+
+	// We haven't used anything from memory yet so we can invalidate the
+	// L1 cache without fear of losing valuable data.  Afterwards, we can
+	// flush icache without worrying about anything getting written back
+	// to memory.
+	CALL(armv7_dcache_l1inv_all)		// toss-dcache
+	CALL(armv7_icache_inv_all)		// toss i-cache after d-cache
+
+#if 0
+	mrc	p15, 0, r0, c1, c1, 2		// NSACR read
+	// Allow non-secure access to ACTLR[SMP]
+	orr	r0, r0, #NSACR_SMP
+#ifdef FPU_VFP
+	// Allow non-secure access to VFP/Neon
+	orr	r0, r0, #NSACR_VFPCP
+#endif
+	mcr	p15, 0, r0, c1, c1, 2		// NSACR write
+
+	// Allow non-secure access to CPSR[A,F], go to non-secure mode
+	mrc	p15, 0, r0, c1, c1, 0		// SCR read
+	orr	r0, r0, #0x31
+	bic	r0, r4, #0x0e		// non monitor extabt, irq, fiq
+	mcr	p15, 0, r0, c1, c1, 0		// SCR write
+	isb
+#endif
+
+	bl	cortex_init
+
+	// We are in SMP mode now.
+	//
+
+	// Get our initial temporary TTB so we can switch to it.
+	movw	r7, #:lower16:_C_LABEL(cortex_mmuinfo)
+	movt	r7, #:upper16:_C_LABEL(cortex_mmuinfo)
+#if !defined(KERNEL_BASES_EQUAL)
+	sub	r7, r7, #KERNEL_BASE_VOFFSET
+#endif
+	dmb
+	ldr	r0, [r7]			// load saved TTB address
+
+	// After we turn on the MMU, we will return to do rest of the
+	// MP startup code in .text.
+	//
+	movw	lr, #:lower16:cortex_mpcontinuation
+	movt	lr, #:upper16:cortex_mpcontinuation
+	b	arm_cpuinit
+#endif // MULTIPROCESSOR
+ASEND(cortex_mpstart)
+
+#ifdef MULTIPROCESSOR
+	.pushsection .text
+cortex_mpcontinuation:
+#ifdef MPDEBUG
+	//
+	// Setup VBAR to catch errors
+	//
+	adr	r2, cortex_mpvector
+	mcr	p15, 0, r2, c12, c0, 0		// VBAR set
+	isb
+
+	mrc	p15, 0, r0, c1, c0, 0		// SCTLR read
+#ifdef MULTIPROCESSOR
+	bic	r0, r0, #CPU_CONTROL_VECRELOC	// use VBAR
+#endif
+	mcr	p15, 0, r0, c1, c0, 0		// SCTLR write
+	dsb
+	isb
+#endif
+
+#ifdef MPDEBUG
+	movw	r9, #:lower16:_C_LABEL(arm_cpu_marker)
+	movt	r9, #:upper16:_C_LABEL(arm_cpu_marker)
+	str	pc, [r9]
+	str	r2, [r9, #4]
+#endif
+
+	mrc	p15, 0, r4, c0, c0, 5		// MPIDR get
+	and	r4, r4, #7			// get our cpu numder
+	mov	r5, #1				// make a bitmask of it
+	lsl	r5, r5, r4			// shift into position
+#ifdef MPDEBUG
+	str	pc, [r9]
+#endif
+
+	mov	r1, r5
+	movw	r0, #:lower16:_C_LABEL(arm_cpu_hatched)
+	movt	r0, #:upper16:_C_LABEL(arm_cpu_hatched)
+	bl	_C_LABEL(atomic_or_32)		// show we've hatched
+	sev
+
+	//
+	// Now we wait for cpu_boot_secondary_processors to kick us the
+	// first time.  This means the kernel L1PT is ready for us to use.
+	//
+	movw	r6, #:lower16:_C_LABEL(arm_cpu_mbox)
+	movt	r6, #:upper16:_C_LABEL(arm_cpu_mbox)
+#ifdef MPDEBUG
+	str	pc, [r9]
+#endif
+3:	dmb					// make stores visible
+	ldr	r2, [r6]			// load mbox
+	tst	r2, r5				// is our bit set?
+#ifdef MPDEBUG
+	str	pc, [r9]
+	str	r2, [r9, #4]
+#endif
+	wfeeq					//   no, back to sleep
+	beq	3b				//   no, and try again
+
+#ifdef MPDEBUG
+	str	pc, [r9]
+#endif
+
+	movw	r0, #:lower16:_C_LABEL(kernel_l1pt)
+	movt	r0, #:upper16:_C_LABEL(kernel_l1pt)
+	ldr	r0, [r0, #PV_PA]		// now get the phys addr
+#ifdef MPDEBUG
+	str	pc, [r9]
+	str	r0, [r9, #4]
+#endif
+#ifdef ARM_MMU_EXTENDED
+	mov	r1, #0
+#endif
+	bl	_C_LABEL(armv7_setttb)		// set the TTB
+
+	mov	r0, #DOMAIN_DEFAULT
+	mcr	p15, 0, r0, c3, c0, 0		// DACR write
+
+	mov	r1, #0
+	mcr	p15, 0, r1, c8, c7, 0		// invalidate the TLB
+
+	mrc	p15, 0, r1, c2, c0, 2		// TTBCR get
+	orr	r1, r1, #TTBCR_S_PD0		// prevent lookups via TTBR0
+	mcr	p15, 0, r1, c2, c0, 2		// TTBCR set
+
+#ifdef MPDEBUG
+	str	pc, [r9]			// we've got this far
+	str	r4, [r9, #4]
+#endif
+
+	//
+	// Tell arm32_kvminit we've load the new TTB
+	//
+	mov	r0, r6
+	mvn	r1, r5				// pass inverted mask to clear
+	bl	_C_LABEL(atomic_and_32)
+	sev					// wake the master
+
+#ifdef MPDEBUG
+	str	pc, [r9]			// we've got this far
+#endif
+
+	// Wait for cpu_boot_secondary_processors the second time.
+	//
+4:	dmb					// data memory barrier
+	ldr	r2, [r6]			// load mbox
+	tst	r2, r5				// is our bit set?
+	wfeeq					//    no, back to waiting
+	beq	4b				//    no, and try again
+
+#ifdef MPDEBUG
+	str	pc, [r9]			// we've got this far
+#endif
+
+	movw	r0, #:lower16:cpu_info
+	movt	r0, #:upper16:cpu_info		// get pointer to cpu_infos
+	ldr	r5, [r0, r4, lsl #2]		// load our cpu_info
+	ldr	r6, [r5, #CI_IDLELWP]		// get the idlelwp
+	ldr	r7, [r6, #L_PCB]		// now get its pcb
+	ldr	sp, [r7, #PCB_KSP]		// finally, we can load our SP
+#ifdef TPIDRPRW_IS_CURCPU
+	mcr	p15, 0, r5, c13, c0, 4		// squirrel away curcpu()
+#elif defined(TPIDRPRW_IS_CURLWP)
+	mcr	p15, 0, r6, c13, c0, 4		// squirrel away curlwp()
+#else
+#error either TPIDRPRW_IS_CURCPU or TPIDRPRW_IS_CURLWP must be defined
+#endif
+	str	r6, [r5, #CI_CURLWP]		// and note we are running on it
+
+#ifdef MPDEBUG
+	str	pc, [r9]			// r9 still has arm_cpu_marker
+#endif
+
+	mov	r0, r5				// pass cpu_info
+	mov	r1, r4				// pass cpu_id
+	movw	r2, #:lower16:MD_CPU_HATCH	// pass md_cpu_hatch
+	movt	r2, #:upper16:MD_CPU_HATCH	// pass md_cpu_hatch
+	bl	_C_LABEL(cpu_hatch)
+	b	_C_LABEL(idle_loop)		// never to return
+ASEND(cortex_mpcontinuation)
+
+#ifdef MPDEBUG
+// Our exception table.  We only care about prefetch/data/address aborts.
+//
+	.p2align 5
+cortex_mpvector:
+	b	.	@ reset
+	b	.	@ undefined
+	b	.	@ swi
+	b	xprefetch_abort
+	b	xdata_abort
+	b	xaddress_abort
+	b	.	@ irq
+	b	.	@ fiq
+
+xprefetch_abort:
+	adr	r10, xprefetch_abort
+	mrc	p15, 0, r11, c5, c0, 1		// IFSR
+	mrc	p15, 0, r12, c6, c0, 1		// IFAR
+	b	xcommon_abort
+xdata_abort:
+	adr	r10, xdata_abort
+	mrc	p15, 0, r11, c5, c0, 0		// DFSR
+	mrc	p15, 0, r12, c6, c0, 0		// DFAR
+	b	xcommon_abort
+xaddress_abort:
+	adr	r10, xaddress_abort
+	mrc	p15, 0, r11, c5, c0, 0		// DFSR
+	mrc	p15, 0, r12, c6, c0, 0		// DFAR
+xcommon_abort:
+	movw	r8, #:lower16:cortex_mpfault	// where we should be
+	movt	r8, #:upper16:cortex_mpfault	// where we should be
+	stmia	r8, {r10-r12,lr}		// save type, PC, FSR, FAR
+	b	.				// loop forever
+#endif
+	.popsection
+#endif // MULTIPROCESSOR

Reply via email to