Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Boris Ostrovsky
On 10/14/2016 03:14 PM, Konrad Rzeszutek Wilk wrote:
>
>> +
>> +memset(_bootparams, 0, sizeof(pvh_bootparams));
>> +
>> +memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
>> +set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
>> +if (HYPERVISOR_memory_op(XENMEM_memory_map, )) {
>> +xen_raw_console_write("XENMEM_memory_map failed\n");
> Should we print the error value at least?

I will have to check again but IIRC there was something about not being
able to format strings properly this early. But if we can --- sure.

>> +BUG();
>> +}
>> +
>> +pvh_bootparams.e820_map[memmap.nr_entries].addr =
>> +ISA_START_ADDRESS;
> What if nr_entries is 128? Should we double-check for that?
>

OK.



>> + */
>> +void __init xen_prepare_pvh(void)
>> +{
>> +u32 eax, ecx, edx, msr;
> msr = 0 ?

Won't cpuid() (or cpuid_ebx()) overwrite it anyway?

>> +u64 pfn;
>> +
>> +xen_pvh = 1;
>> +
>> +cpuid(xen_cpuid_base() + 2, , , , );
> cpuid_ebx ? And that way you don't have have ecx and edx?



>> +cli
>> +cld
>> +
>> +mov $_pa(gdt), %eax
>> +lgdt (%eax)
>> +
>> +movl $(__BOOT_DS),%eax
>> +movl %eax,%ds
>> +movl %eax,%es
>> +movl %eax,%ss
>> +
>> +/* Stash hvm_start_info */
>> +mov $_pa(pvh_start_info), %edi
>> +mov %ebx, %esi
> Should we derference the first byte or such to check for the magic
> string? Actually I am not even seeing the check in the C code?


Yes, good idea.


>> +.code64
>> +1:
>> +call xen_prepare_pvh
>> +
>> +/* startup_64 expects boot_params in %rsi */
> ..
>> +mov $_pa(pvh_bootparams), %rsi
>> +movq $_pa(startup_64), %rax
>> +jmp *%rax
>> +
>> +#else /* CONFIG_X86_64 */
>> +
>> +call setup_pgtable_32
>> +
>> +mov $_pa(initial_page_table), %eax
>> +movl %eax, %cr3
>> +
>> +movl %cr0, %eax
>> +orl $(X86_CR0_PG | X86_CR0_PE), %eax
>> +movl %eax, %cr0
>> +
>> +ljmp $__BOOT_CS,$1f
>> +1:
>> +call xen_prepare_pvh
>> +mov $_pa(pvh_bootparams), %esi
>> +
>> +/* startup_32 doesn't expect paging and PAE to be on */
> Should 'startup_32' be documented with this?

It is documented in Documentation/x86/boot.txt and in the startup_64 code.


-boris



Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Boris Ostrovsky
On 10/14/2016 03:14 PM, Konrad Rzeszutek Wilk wrote:
>
>> +
>> +memset(_bootparams, 0, sizeof(pvh_bootparams));
>> +
>> +memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
>> +set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
>> +if (HYPERVISOR_memory_op(XENMEM_memory_map, )) {
>> +xen_raw_console_write("XENMEM_memory_map failed\n");
> Should we print the error value at least?

I will have to check again but IIRC there was something about not being
able to format strings properly this early. But if we can --- sure.

>> +BUG();
>> +}
>> +
>> +pvh_bootparams.e820_map[memmap.nr_entries].addr =
>> +ISA_START_ADDRESS;
> What if nr_entries is 128? Should we double-check for that?
>

OK.



>> + */
>> +void __init xen_prepare_pvh(void)
>> +{
>> +u32 eax, ecx, edx, msr;
> msr = 0 ?

Won't cpuid() (or cpuid_ebx()) overwrite it anyway?

>> +u64 pfn;
>> +
>> +xen_pvh = 1;
>> +
>> +cpuid(xen_cpuid_base() + 2, , , , );
> cpuid_ebx ? And that way you don't have have ecx and edx?



>> +cli
>> +cld
>> +
>> +mov $_pa(gdt), %eax
>> +lgdt (%eax)
>> +
>> +movl $(__BOOT_DS),%eax
>> +movl %eax,%ds
>> +movl %eax,%es
>> +movl %eax,%ss
>> +
>> +/* Stash hvm_start_info */
>> +mov $_pa(pvh_start_info), %edi
>> +mov %ebx, %esi
> Should we derference the first byte or such to check for the magic
> string? Actually I am not even seeing the check in the C code?


Yes, good idea.


>> +.code64
>> +1:
>> +call xen_prepare_pvh
>> +
>> +/* startup_64 expects boot_params in %rsi */
> ..
>> +mov $_pa(pvh_bootparams), %rsi
>> +movq $_pa(startup_64), %rax
>> +jmp *%rax
>> +
>> +#else /* CONFIG_X86_64 */
>> +
>> +call setup_pgtable_32
>> +
>> +mov $_pa(initial_page_table), %eax
>> +movl %eax, %cr3
>> +
>> +movl %cr0, %eax
>> +orl $(X86_CR0_PG | X86_CR0_PE), %eax
>> +movl %eax, %cr0
>> +
>> +ljmp $__BOOT_CS,$1f
>> +1:
>> +call xen_prepare_pvh
>> +mov $_pa(pvh_bootparams), %esi
>> +
>> +/* startup_32 doesn't expect paging and PAE to be on */
> Should 'startup_32' be documented with this?

It is documented in Documentation/x86/boot.txt and in the startup_64 code.


-boris



Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Konrad Rzeszutek Wilk
On Fri, Oct 14, 2016 at 02:05:14PM -0400, Boris Ostrovsky wrote:
> Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall
> page, initialize boot_params, enable early page tables.
> 
> Since this stub is executed before kernel entry point we cannot use
> variables in .bss which is cleared by kernel. We explicitly place
> variables that are initialized here into .data.
> 
> Signed-off-by: Boris Ostrovsky 
> Signed-off-by: Matt Fleming 
> ---
>  arch/x86/xen/Kconfig |   2 +-
>  arch/x86/xen/Makefile|   1 +
>  arch/x86/xen/enlighten.c |  87 +++-
>  arch/x86/xen/xen-pvh.S   | 143 
> +++
>  include/xen/xen.h|   5 ++
>  5 files changed, 236 insertions(+), 2 deletions(-)
>  create mode 100644 arch/x86/xen/xen-pvh.S
> 
> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
> index c7b15f3..76b6dbd 100644
> --- a/arch/x86/xen/Kconfig
> +++ b/arch/x86/xen/Kconfig
> @@ -53,5 +53,5 @@ config XEN_DEBUG_FS
>  
>  config XEN_PVH
>   bool "Support for running as a PVH guest"
> - depends on X86_64 && XEN && XEN_PVHVM
> + depends on XEN && XEN_PVHVM && ACPI
>   def_bool n
> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
> index e47e527..cb0164a 100644
> --- a/arch/x86/xen/Makefile
> +++ b/arch/x86/xen/Makefile
> @@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS)  += debugfs.o
>  obj-$(CONFIG_XEN_DOM0)   += vga.o
>  obj-$(CONFIG_SWIOTLB_XEN)+= pci-swiotlb-xen.o
>  obj-$(CONFIG_XEN_EFI)+= efi.o
> +obj-$(CONFIG_XEN_PVH)+= xen-pvh.o
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index dc4ed0c..d38d568 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -45,6 +45,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -121,7 +122,8 @@
>  DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
>  EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
>  
> -enum xen_domain_type xen_domain_type = XEN_NATIVE;
> +enum xen_domain_type xen_domain_type
> + __attribute__((section(".data"))) = XEN_NATIVE;
>  EXPORT_SYMBOL_GPL(xen_domain_type);
>  
>  unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
> @@ -176,6 +178,17 @@ struct tls_descs {
>   */
>  static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
>  
> +#ifdef CONFIG_XEN_PVH
> +/*
> + * PVH variables. These need to live in data segment since they are
> + * initialized before startup_{32|64}, which clear .bss, are invoked.
> + */
> +int xen_pvh __attribute__((section(".data"))) = 0;

unsigned int?
> +struct hvm_start_info pvh_start_info __attribute__((section(".data")));
> +uint pvh_start_info_sz = sizeof(pvh_start_info);

unsigned int please. Typedefs in Linux are frowned upon.

> +struct boot_params pvh_bootparams __attribute__((section(".data")));
> +#endif
> +
>  static void clamp_max_cpus(void)
>  {
>  #ifdef CONFIG_SMP
> @@ -1669,6 +1682,78 @@ asmlinkage __visible void __init xen_start_kernel(void)
>  #endif
>  }
>  
> +#ifdef CONFIG_XEN_PVH
> +static void __init init_pvh_bootparams(void)
> +{
> + struct xen_memory_map memmap;
> + int i;

unsigned int?
> +
> + memset(_bootparams, 0, sizeof(pvh_bootparams));
> +
> + memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
> + set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
> + if (HYPERVISOR_memory_op(XENMEM_memory_map, )) {
> + xen_raw_console_write("XENMEM_memory_map failed\n");

Should we print the error value at least?
> + BUG();
> + }
> +
> + pvh_bootparams.e820_map[memmap.nr_entries].addr =
> + ISA_START_ADDRESS;

What if nr_entries is 128? Should we double-check for that?

> + pvh_bootparams.e820_map[memmap.nr_entries].size =
> + ISA_END_ADDRESS - ISA_START_ADDRESS;
> + pvh_bootparams.e820_map[memmap.nr_entries++].type =
> + E820_RESERVED;
> +
> + sanitize_e820_map(pvh_bootparams.e820_map,
> +   ARRAY_SIZE(pvh_bootparams.e820_map),
> +   _entries); 
> +
> + pvh_bootparams.e820_entries = memmap.nr_entries;
> + for (i = 0; i < pvh_bootparams.e820_entries; i++)
> + e820_add_region(pvh_bootparams.e820_map[i].addr,
> + pvh_bootparams.e820_map[i].size,
> + pvh_bootparams.e820_map[i].type);
> +
> + pvh_bootparams.hdr.cmd_line_ptr =
> + pvh_start_info.cmdline_paddr;
> +
> + /* The first module is always ramdisk */

Could you add an period at end please?
> + if (pvh_start_info.nr_modules) {
> + struct hvm_modlist_entry *modaddr =
> + __va(pvh_start_info.modlist_paddr);
> + pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
> + pvh_bootparams.hdr.ramdisk_size = modaddr->size;
> + }
> +
> + /*
> +  * See 

Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Konrad Rzeszutek Wilk
On Fri, Oct 14, 2016 at 02:05:14PM -0400, Boris Ostrovsky wrote:
> Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall
> page, initialize boot_params, enable early page tables.
> 
> Since this stub is executed before kernel entry point we cannot use
> variables in .bss which is cleared by kernel. We explicitly place
> variables that are initialized here into .data.
> 
> Signed-off-by: Boris Ostrovsky 
> Signed-off-by: Matt Fleming 
> ---
>  arch/x86/xen/Kconfig |   2 +-
>  arch/x86/xen/Makefile|   1 +
>  arch/x86/xen/enlighten.c |  87 +++-
>  arch/x86/xen/xen-pvh.S   | 143 
> +++
>  include/xen/xen.h|   5 ++
>  5 files changed, 236 insertions(+), 2 deletions(-)
>  create mode 100644 arch/x86/xen/xen-pvh.S
> 
> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
> index c7b15f3..76b6dbd 100644
> --- a/arch/x86/xen/Kconfig
> +++ b/arch/x86/xen/Kconfig
> @@ -53,5 +53,5 @@ config XEN_DEBUG_FS
>  
>  config XEN_PVH
>   bool "Support for running as a PVH guest"
> - depends on X86_64 && XEN && XEN_PVHVM
> + depends on XEN && XEN_PVHVM && ACPI
>   def_bool n
> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
> index e47e527..cb0164a 100644
> --- a/arch/x86/xen/Makefile
> +++ b/arch/x86/xen/Makefile
> @@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS)  += debugfs.o
>  obj-$(CONFIG_XEN_DOM0)   += vga.o
>  obj-$(CONFIG_SWIOTLB_XEN)+= pci-swiotlb-xen.o
>  obj-$(CONFIG_XEN_EFI)+= efi.o
> +obj-$(CONFIG_XEN_PVH)+= xen-pvh.o
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index dc4ed0c..d38d568 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -45,6 +45,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -121,7 +122,8 @@
>  DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
>  EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
>  
> -enum xen_domain_type xen_domain_type = XEN_NATIVE;
> +enum xen_domain_type xen_domain_type
> + __attribute__((section(".data"))) = XEN_NATIVE;
>  EXPORT_SYMBOL_GPL(xen_domain_type);
>  
>  unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
> @@ -176,6 +178,17 @@ struct tls_descs {
>   */
>  static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
>  
> +#ifdef CONFIG_XEN_PVH
> +/*
> + * PVH variables. These need to live in data segment since they are
> + * initialized before startup_{32|64}, which clear .bss, are invoked.
> + */
> +int xen_pvh __attribute__((section(".data"))) = 0;

unsigned int?
> +struct hvm_start_info pvh_start_info __attribute__((section(".data")));
> +uint pvh_start_info_sz = sizeof(pvh_start_info);

unsigned int please. Typedefs in Linux are frowned upon.

> +struct boot_params pvh_bootparams __attribute__((section(".data")));
> +#endif
> +
>  static void clamp_max_cpus(void)
>  {
>  #ifdef CONFIG_SMP
> @@ -1669,6 +1682,78 @@ asmlinkage __visible void __init xen_start_kernel(void)
>  #endif
>  }
>  
> +#ifdef CONFIG_XEN_PVH
> +static void __init init_pvh_bootparams(void)
> +{
> + struct xen_memory_map memmap;
> + int i;

unsigned int?
> +
> + memset(_bootparams, 0, sizeof(pvh_bootparams));
> +
> + memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
> + set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
> + if (HYPERVISOR_memory_op(XENMEM_memory_map, )) {
> + xen_raw_console_write("XENMEM_memory_map failed\n");

Should we print the error value at least?
> + BUG();
> + }
> +
> + pvh_bootparams.e820_map[memmap.nr_entries].addr =
> + ISA_START_ADDRESS;

What if nr_entries is 128? Should we double-check for that?

> + pvh_bootparams.e820_map[memmap.nr_entries].size =
> + ISA_END_ADDRESS - ISA_START_ADDRESS;
> + pvh_bootparams.e820_map[memmap.nr_entries++].type =
> + E820_RESERVED;
> +
> + sanitize_e820_map(pvh_bootparams.e820_map,
> +   ARRAY_SIZE(pvh_bootparams.e820_map),
> +   _entries); 
> +
> + pvh_bootparams.e820_entries = memmap.nr_entries;
> + for (i = 0; i < pvh_bootparams.e820_entries; i++)
> + e820_add_region(pvh_bootparams.e820_map[i].addr,
> + pvh_bootparams.e820_map[i].size,
> + pvh_bootparams.e820_map[i].type);
> +
> + pvh_bootparams.hdr.cmd_line_ptr =
> + pvh_start_info.cmdline_paddr;
> +
> + /* The first module is always ramdisk */

Could you add an period at end please?
> + if (pvh_start_info.nr_modules) {
> + struct hvm_modlist_entry *modaddr =
> + __va(pvh_start_info.modlist_paddr);
> + pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
> + pvh_bootparams.hdr.ramdisk_size = modaddr->size;
> + }
> +
> + /*
> +  * See Documentation/x86/boot.txt.
> +  *
> +  * 

Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Andrew Cooper
On 14/10/16 19:55, Boris Ostrovsky wrote:
> On 10/14/2016 02:38 PM, Andrew Cooper wrote:
>>> +   jmp *%rax
>>> +
>>> +#else /* CONFIG_X86_64 */
>>> +
>>> +   call setup_pgtable_32
>>> +
>>> +   mov $_pa(initial_page_table), %eax
>>> +   movl %eax, %cr3
>>> +
>>> +   movl %cr0, %eax
>>> +   orl $(X86_CR0_PG | X86_CR0_PE), %eax
>>> +   movl %eax, %cr0
>>> +
>>> +   ljmp $__BOOT_CS,$1f
>>> +1:
>>> +   call xen_prepare_pvh
>> Why does xen_prepare_pvh need paging?  I can't spot anything which
>> should need it, and it feels conceptually wrong.
> xen_prepare_pvh() deals with virtual addresses. How can we run without paging?

Ah yes - with a high-half kernel, that way around doesn't work.  Sorry
for the noise - I have been spending too long working with virtual
addresses down round 0, where that specifically can be solved by setting
%ds with a suitable non-zero base.

~Andrew


Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Andrew Cooper
On 14/10/16 19:55, Boris Ostrovsky wrote:
> On 10/14/2016 02:38 PM, Andrew Cooper wrote:
>>> +   jmp *%rax
>>> +
>>> +#else /* CONFIG_X86_64 */
>>> +
>>> +   call setup_pgtable_32
>>> +
>>> +   mov $_pa(initial_page_table), %eax
>>> +   movl %eax, %cr3
>>> +
>>> +   movl %cr0, %eax
>>> +   orl $(X86_CR0_PG | X86_CR0_PE), %eax
>>> +   movl %eax, %cr0
>>> +
>>> +   ljmp $__BOOT_CS,$1f
>>> +1:
>>> +   call xen_prepare_pvh
>> Why does xen_prepare_pvh need paging?  I can't spot anything which
>> should need it, and it feels conceptually wrong.
> xen_prepare_pvh() deals with virtual addresses. How can we run without paging?

Ah yes - with a high-half kernel, that way around doesn't work.  Sorry
for the noise - I have been spending too long working with virtual
addresses down round 0, where that specifically can be solved by setting
%ds with a suitable non-zero base.

~Andrew


Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Boris Ostrovsky
On 10/14/2016 02:38 PM, Andrew Cooper wrote:
>> +jmp *%rax
>> +
>> +#else /* CONFIG_X86_64 */
>> +
>> +call setup_pgtable_32
>> +
>> +mov $_pa(initial_page_table), %eax
>> +movl %eax, %cr3
>> +
>> +movl %cr0, %eax
>> +orl $(X86_CR0_PG | X86_CR0_PE), %eax
>> +movl %eax, %cr0
>> +
>> +ljmp $__BOOT_CS,$1f
>> +1:
>> +call xen_prepare_pvh
> Why does xen_prepare_pvh need paging?  I can't spot anything which
> should need it, and it feels conceptually wrong.

xen_prepare_pvh() deals with virtual addresses. How can we run without paging?

(Also, startup_64, which is where we jump from here in 64-bit mode expects 
paging to be on).

-boris




Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Boris Ostrovsky
On 10/14/2016 02:38 PM, Andrew Cooper wrote:
>> +jmp *%rax
>> +
>> +#else /* CONFIG_X86_64 */
>> +
>> +call setup_pgtable_32
>> +
>> +mov $_pa(initial_page_table), %eax
>> +movl %eax, %cr3
>> +
>> +movl %cr0, %eax
>> +orl $(X86_CR0_PG | X86_CR0_PE), %eax
>> +movl %eax, %cr0
>> +
>> +ljmp $__BOOT_CS,$1f
>> +1:
>> +call xen_prepare_pvh
> Why does xen_prepare_pvh need paging?  I can't spot anything which
> should need it, and it feels conceptually wrong.

xen_prepare_pvh() deals with virtual addresses. How can we run without paging?

(Also, startup_64, which is where we jump from here in 64-bit mode expects 
paging to be on).

-boris




Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Andrew Cooper
On 14/10/16 19:05, Boris Ostrovsky wrote:
> diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
> new file mode 100644
> index 000..58c477b
> --- /dev/null
> +++ b/arch/x86/xen/xen-pvh.S
> @@ -0,0 +1,143 @@
> +/*
> + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program.  If not, see .
> + */
> +
> + .code32
> + .text
> +#define _pa(x)  ((x) - __START_KERNEL_map)
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> + __HEAD
> + .code32

Duplicated .code32

> +
> +/* Entry point for PVH guests */
> +ENTRY(pvh_start_xen)
> + cli
> + cld

The ABI states that these will be clear.

> +
> + mov $_pa(gdt), %eax
> + lgdt (%eax)

I am fairly sure you can express this without an intermediate in %eax.

> +
> + movl $(__BOOT_DS),%eax
> + movl %eax,%ds
> + movl %eax,%es
> + movl %eax,%ss
> +
> + /* Stash hvm_start_info */
> + mov $_pa(pvh_start_info), %edi
> + mov %ebx, %esi
> + mov $_pa(pvh_start_info_sz), %ecx
> + mov (%ecx), %ecx

No need for an intermediate.

> + rep
> + movsb

Surely we can guarentee the size is a multiple of 4? movsl would be better.

> +
> + movl $_pa(early_stack_end), %eax
> + movl %eax, %esp

You can mov straight into %esp.

> +
> + /* Enable PAE mode */
> + movl %cr4, %eax
> + orl $X86_CR4_PAE, %eax
> + movl %eax, %cr4
> +
> +#ifdef CONFIG_X86_64
> + /* Enable Long mode */
> + movl $MSR_EFER, %ecx
> + rdmsr
> + btsl $_EFER_LME, %eax
> + wrmsr
> +
> + /* Enable pre-constructed page tables */
> + mov $_pa(init_level4_pgt), %eax
> + movl %eax, %cr3
> + movl $(X86_CR0_PG | X86_CR0_PE), %eax
> + movl %eax, %cr0
> +
> + /* Jump to 64-bit mode. */
> + pushl $__KERNEL_CS
> + leal _pa(1f), %eax
> + pushl %eax
> + lret

You are still in compat mode, so can ljmp $__KERNEL_CS, $_pa(1f)

> +
> + /* 64-bit entry point */
> + .code64
> +1:
> + call xen_prepare_pvh
> +
> + /* startup_64 expects boot_params in %rsi */
> + mov $_pa(pvh_bootparams), %rsi
> + movq $_pa(startup_64), %rax

You seem to have an inconsistent mix of writing the explicit suffixes
when they aren't required.

> + jmp *%rax
> +
> +#else /* CONFIG_X86_64 */
> +
> + call setup_pgtable_32
> +
> + mov $_pa(initial_page_table), %eax
> + movl %eax, %cr3
> +
> + movl %cr0, %eax
> + orl $(X86_CR0_PG | X86_CR0_PE), %eax
> + movl %eax, %cr0
> +
> + ljmp $__BOOT_CS,$1f
> +1:
> + call xen_prepare_pvh

Why does xen_prepare_pvh need paging?  I can't spot anything which
should need it, and it feels conceptually wrong.

~Andrew

> + mov $_pa(pvh_bootparams), %esi
> +
> + /* startup_32 doesn't expect paging and PAE to be on */
> + ljmp $__BOOT_CS,$_pa(2f)
> +2:
> + movl %cr0, %eax
> + andl $~X86_CR0_PG, %eax
> + movl %eax, %cr0
> + movl %cr4, %eax
> + andl $~X86_CR4_PAE, %eax
> + movl %eax, %cr4
> +
> + ljmp$0x10, $_pa(startup_32)
> +#endif
> +
> + .data
> +gdt:
> + .word   gdt_end - gdt
> + .long   _pa(gdt)
> + .word   0
> + .quad   0x /* NULL descriptor */
> +#ifdef CONFIG_X86_64
> + .quad   0x00af9a00 /* __KERNEL_CS */
> +#else
> + .quad   0x00cf9a00 /* __KERNEL_CS */
> +#endif
> + .quad   0x00cf9200 /* __KERNEL_DS */
> +gdt_end:
> +
> + .bss
> + .balign 4
> +early_stack:
> + .fill 16, 1, 0
> +early_stack_end:
> +
> + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
> +  _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
>



Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Andrew Cooper
On 14/10/16 19:05, Boris Ostrovsky wrote:
> diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
> new file mode 100644
> index 000..58c477b
> --- /dev/null
> +++ b/arch/x86/xen/xen-pvh.S
> @@ -0,0 +1,143 @@
> +/*
> + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program.  If not, see .
> + */
> +
> + .code32
> + .text
> +#define _pa(x)  ((x) - __START_KERNEL_map)
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> + __HEAD
> + .code32

Duplicated .code32

> +
> +/* Entry point for PVH guests */
> +ENTRY(pvh_start_xen)
> + cli
> + cld

The ABI states that these will be clear.

> +
> + mov $_pa(gdt), %eax
> + lgdt (%eax)

I am fairly sure you can express this without an intermediate in %eax.

> +
> + movl $(__BOOT_DS),%eax
> + movl %eax,%ds
> + movl %eax,%es
> + movl %eax,%ss
> +
> + /* Stash hvm_start_info */
> + mov $_pa(pvh_start_info), %edi
> + mov %ebx, %esi
> + mov $_pa(pvh_start_info_sz), %ecx
> + mov (%ecx), %ecx

No need for an intermediate.

> + rep
> + movsb

Surely we can guarentee the size is a multiple of 4? movsl would be better.

> +
> + movl $_pa(early_stack_end), %eax
> + movl %eax, %esp

You can mov straight into %esp.

> +
> + /* Enable PAE mode */
> + movl %cr4, %eax
> + orl $X86_CR4_PAE, %eax
> + movl %eax, %cr4
> +
> +#ifdef CONFIG_X86_64
> + /* Enable Long mode */
> + movl $MSR_EFER, %ecx
> + rdmsr
> + btsl $_EFER_LME, %eax
> + wrmsr
> +
> + /* Enable pre-constructed page tables */
> + mov $_pa(init_level4_pgt), %eax
> + movl %eax, %cr3
> + movl $(X86_CR0_PG | X86_CR0_PE), %eax
> + movl %eax, %cr0
> +
> + /* Jump to 64-bit mode. */
> + pushl $__KERNEL_CS
> + leal _pa(1f), %eax
> + pushl %eax
> + lret

You are still in compat mode, so can ljmp $__KERNEL_CS, $_pa(1f)

> +
> + /* 64-bit entry point */
> + .code64
> +1:
> + call xen_prepare_pvh
> +
> + /* startup_64 expects boot_params in %rsi */
> + mov $_pa(pvh_bootparams), %rsi
> + movq $_pa(startup_64), %rax

You seem to have an inconsistent mix of writing the explicit suffixes
when they aren't required.

> + jmp *%rax
> +
> +#else /* CONFIG_X86_64 */
> +
> + call setup_pgtable_32
> +
> + mov $_pa(initial_page_table), %eax
> + movl %eax, %cr3
> +
> + movl %cr0, %eax
> + orl $(X86_CR0_PG | X86_CR0_PE), %eax
> + movl %eax, %cr0
> +
> + ljmp $__BOOT_CS,$1f
> +1:
> + call xen_prepare_pvh

Why does xen_prepare_pvh need paging?  I can't spot anything which
should need it, and it feels conceptually wrong.

~Andrew

> + mov $_pa(pvh_bootparams), %esi
> +
> + /* startup_32 doesn't expect paging and PAE to be on */
> + ljmp $__BOOT_CS,$_pa(2f)
> +2:
> + movl %cr0, %eax
> + andl $~X86_CR0_PG, %eax
> + movl %eax, %cr0
> + movl %cr4, %eax
> + andl $~X86_CR4_PAE, %eax
> + movl %eax, %cr4
> +
> + ljmp$0x10, $_pa(startup_32)
> +#endif
> +
> + .data
> +gdt:
> + .word   gdt_end - gdt
> + .long   _pa(gdt)
> + .word   0
> + .quad   0x /* NULL descriptor */
> +#ifdef CONFIG_X86_64
> + .quad   0x00af9a00 /* __KERNEL_CS */
> +#else
> + .quad   0x00cf9a00 /* __KERNEL_CS */
> +#endif
> + .quad   0x00cf9200 /* __KERNEL_DS */
> +gdt_end:
> +
> + .bss
> + .balign 4
> +early_stack:
> + .fill 16, 1, 0
> +early_stack_end:
> +
> + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
> +  _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
>