On 22/06/2023 9:57 pm, Shawn Anastasio wrote:
> Update ppc64/head.S to set up an initial boot stack, zero the .bss
> section, and jump to C.
>
> Also refactor the endian fixup trampoline into its own macro, since it
> will need to be used in multiple places, including every time we make a
> call into firmware (see next commit).
>
> Signed-off-by: Shawn Anastasio <sanasta...@raptorengineering.com>

Thankyou for making this change - it definitely simplifies things.

> ---
>  xen/arch/ppc/Makefile                |  1 +
>  xen/arch/ppc/include/asm/asm-defns.h | 36 ++++++++++++++++++
>  xen/arch/ppc/ppc64/head.S            | 55 ++++++++++++++++++----------
>  xen/arch/ppc/setup.c                 | 13 +++++++
>  4 files changed, 85 insertions(+), 20 deletions(-)
>  create mode 100644 xen/arch/ppc/include/asm/asm-defns.h
>  create mode 100644 xen/arch/ppc/setup.c
>
> diff --git a/xen/arch/ppc/Makefile b/xen/arch/ppc/Makefile
> index 98220648af..fdbcd730d0 100644
> --- a/xen/arch/ppc/Makefile
> +++ b/xen/arch/ppc/Makefile
> @@ -1,4 +1,5 @@
>  obj-$(CONFIG_PPC64) += ppc64/
> +obj-y += setup.o
>  
>  $(TARGET): $(TARGET)-syms
>       cp -f $< $@
> diff --git a/xen/arch/ppc/include/asm/asm-defns.h 
> b/xen/arch/ppc/include/asm/asm-defns.h
> new file mode 100644
> index 0000000000..3db2063a39
> --- /dev/null
> +++ b/xen/arch/ppc/include/asm/asm-defns.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ASM_PPC_ASM_DEFNS_H
> +#define _ASM_PPC_ASM_DEFNS_H
> +
> +/*
> + * Load a 64-bit immediate value into the specified GPR.
> + */
> +#define LOAD_IMM64(reg, val)                                                 
> \
> +    lis reg, (val) @highest;                                                 
> \
> +    ori reg, reg, (val) @higher;                                             
> \
> +    rldicr reg, reg, 32, 31;                                                 
> \
> +    oris reg, reg, (val) @h;                                                 
> \
> +    ori reg, reg, (val) @l;
> +
> +/*
> + * Depending on how we were booted, the CPU could be running in either
> + * Little Endian or Big Endian mode. The following trampoline from Linux
> + * cleverly uses an instruction that encodes to a NOP if the CPU's
> + * endianness matches the assumption of the assembler (LE, in our case)
> + * or a branch to code that performs the endian switch in the other case.
> + */
> +#define FIXUP_ENDIAN                                                         
>   \
> +    tdi 0, 0, 0x48;   /* Reverse endian of b . + 8          */               
>   \
> +    b . + 44;         /* Skip trampoline if endian is good  */               
>   \
> +    .long 0xa600607d; /* mfmsr r11                          */               
>   \
> +    .long 0x01006b69; /* xori r11,r11,1                     */               
>   \
> +    .long 0x00004039; /* li r10,0                           */               
>   \
> +    .long 0x6401417d; /* mtmsrd r10,1                       */               
>   \
> +    .long 0x05009f42; /* bcl 20,31,$+4                      */               
>   \
> +    .long 0xa602487d; /* mflr r10                           */               
>   \
> +    .long 0x14004a39; /* addi r10,r10,20                    */               
>   \
> +    .long 0xa6035a7d; /* mtsrr0 r10                         */               
>   \
> +    .long 0xa6037b7d; /* mtsrr1 r11                         */               
>   \
> +    .long 0x2400004c  /* rfid                               */
> +
> +#endif /* _ASM_PPC_ASM_DEFNS_H */
> diff --git a/xen/arch/ppc/ppc64/head.S b/xen/arch/ppc/ppc64/head.S
> index 2fcefb40d8..a7db5b7de2 100644
> --- a/xen/arch/ppc/ppc64/head.S
> +++ b/xen/arch/ppc/ppc64/head.S
> @@ -1,30 +1,45 @@
>  /* SPDX-License-Identifier: GPL-2.0-or-later */
>  
> +#include <asm/asm-defns.h>
> +
>      .section .text.header, "ax", %progbits
>  
>  ENTRY(start)
>      /*
> -     * Depending on how we were booted, the CPU could be running in either
> -     * Little Endian or Big Endian mode. The following trampoline from Linux
> -     * cleverly uses an instruction that encodes to a NOP if the CPU's
> -     * endianness matches the assumption of the assembler (LE, in our case)
> -     * or a branch to code that performs the endian switch in the other case.
> +     * NOTE: argument registers (r3-r9) must be preserved until the C 
> entrypoint
>       */
> -    tdi 0, 0, 0x48    /* Reverse endian of b . + 8          */
> -    b . + 44          /* Skip trampoline if endian is good  */
> -    .long 0xa600607d  /* mfmsr r11                          */
> -    .long 0x01006b69  /* xori r11,r11,1                     */
> -    .long 0x00004039  /* li r10,0                           */
> -    .long 0x6401417d  /* mtmsrd r10,1                       */
> -    .long 0x05009f42  /* bcl 20,31,$+4                      */
> -    .long 0xa602487d  /* mflr r10                           */
> -    .long 0x14004a39  /* addi r10,r10,20                    */
> -    .long 0xa6035a7d  /* mtsrr0 r10                         */
> -    .long 0xa6037b7d  /* mtsrr1 r11                         */
> -    .long 0x2400004c  /* rfid                               */
> -
> -    /* Now that the endianness is confirmed, continue */
> -1:  b 1b
> +    FIXUP_ENDIAN
> +
> +    /* set up the initial stack */
> +    LOAD_IMM64(%r1, cpu0_boot_stack)
> +    li %r11, 0
> +    std %r11, 0(%r1)

I've done a bit of reading around, and it's rather sad that things prior
to Power10 don't have PC-relative addressing.  So the LOAD_IMM64()'s do
look necessary for the stack and bss.  I guess that means we can't be
sensibly be position independent in head.S?

Also, why store 0 onto the stack ?

> +
> +    /* clear .bss */
> +    LOAD_IMM64(%r14, __bss_start)
> +    LOAD_IMM64(%r15, __bss_end)
> +1:
> +    std %r11, 0(%r14)
> +    addi %r14, %r14, 8
> +    cmpld %r14, %r15
> +    blt 1b

This loop is correct, except for the corner case of this patch alone,
where the BSS is empty and this will write one word out-of-bounds.

For RISC-V, we put a temporary "char bss_tmp;" in setup.c, and I suggest
you do the same here, deleting it at a later point when there's real
data in the bss.

> +
> +    /* call the C entrypoint */
> +    LOAD_IMM64(%r12, start_xen)
> +    mtctr %r12
> +    bctrl

Why is this a LOAD_IMM64(), and not just:

    b start_xen

?  From the same reading around, PPC64 seems to have +/- 32M addressing
for branches, and the entire x86 hypervisor (.init included) is within 3M.

> +
> +    /* should never return */
> +    trap
>  
>      .size start, . - start
>      .type start, %function
> +
> +    .section .init.data, "aw", @progbits
> +
> +    /* Early init stack */
> +    .p2align 4
> +cpu0_boot_stack_bottom:
> +    .space STACK_SIZE
> +cpu0_boot_stack:
> +    .space STACK_FRAME_OVERHEAD

Why the extra space beyond the stack?

Also, I'd put cpu0_boot_stack in C, and just LOAD_IMM64(x,
cpu0_boot_stack + STACK_SIZE)

> diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c
> new file mode 100644
> index 0000000000..4d1b72fa23
> --- /dev/null
> +++ b/xen/arch/ppc/setup.c
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#include <xen/init.h>
> +
> +void __init noreturn start_xen(unsigned long r3, unsigned long r4,
> +                               unsigned long r5, unsigned long r6,
> +                               unsigned long r7)
> +{
> +    for ( ;; )
> +        /* Set current hardware thread to very low priority */
> +        asm volatile("or %r31, %r31, %r31");

Is there something magic about the OR instruction, or something magic
about %r31?

For a RISC architecture, this seems subtle.

~Andrew

Reply via email to