Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Boris Ostrovsky
On 10/14/2016 03:14 PM, Konrad Rzeszutek Wilk wrote:
>
>> +
>> +memset(_bootparams, 0, sizeof(pvh_bootparams));
>> +
>> +memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
>> +set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
>> +if (HYPERVISOR_memory_op(XENMEM_memory_map, )) {
>> +xen_raw_console_write("XENMEM_memory_map failed\n");
> Should we print the error value at least?

I will have to check again but IIRC there was something about not being
able to format strings properly this early. But if we can --- sure.

>> +BUG();
>> +}
>> +
>> +pvh_bootparams.e820_map[memmap.nr_entries].addr =
>> +ISA_START_ADDRESS;
> What if nr_entries is 128? Should we double-check for that?
>

OK.



>> + */
>> +void __init xen_prepare_pvh(void)
>> +{
>> +u32 eax, ecx, edx, msr;
> msr = 0 ?

Won't cpuid() (or cpuid_ebx()) overwrite it anyway?

>> +u64 pfn;
>> +
>> +xen_pvh = 1;
>> +
>> +cpuid(xen_cpuid_base() + 2, , , , );
> cpuid_ebx ? And that way you don't have have ecx and edx?



>> +cli
>> +cld
>> +
>> +mov $_pa(gdt), %eax
>> +lgdt (%eax)
>> +
>> +movl $(__BOOT_DS),%eax
>> +movl %eax,%ds
>> +movl %eax,%es
>> +movl %eax,%ss
>> +
>> +/* Stash hvm_start_info */
>> +mov $_pa(pvh_start_info), %edi
>> +mov %ebx, %esi
> Should we derference the first byte or such to check for the magic
> string? Actually I am not even seeing the check in the C code?


Yes, good idea.


>> +.code64
>> +1:
>> +call xen_prepare_pvh
>> +
>> +/* startup_64 expects boot_params in %rsi */
> ..
>> +mov $_pa(pvh_bootparams), %rsi
>> +movq $_pa(startup_64), %rax
>> +jmp *%rax
>> +
>> +#else /* CONFIG_X86_64 */
>> +
>> +call setup_pgtable_32
>> +
>> +mov $_pa(initial_page_table), %eax
>> +movl %eax, %cr3
>> +
>> +movl %cr0, %eax
>> +orl $(X86_CR0_PG | X86_CR0_PE), %eax
>> +movl %eax, %cr0
>> +
>> +ljmp $__BOOT_CS,$1f
>> +1:
>> +call xen_prepare_pvh
>> +mov $_pa(pvh_bootparams), %esi
>> +
>> +/* startup_32 doesn't expect paging and PAE to be on */
> Should 'startup_32' be documented with this?

It is documented in Documentation/x86/boot.txt and in the startup_64 code.


-boris

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Konrad Rzeszutek Wilk
On Fri, Oct 14, 2016 at 02:05:14PM -0400, Boris Ostrovsky wrote:
> Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall
> page, initialize boot_params, enable early page tables.
> 
> Since this stub is executed before kernel entry point we cannot use
> variables in .bss which is cleared by kernel. We explicitly place
> variables that are initialized here into .data.
> 
> Signed-off-by: Boris Ostrovsky 
> Signed-off-by: Matt Fleming 
> ---
>  arch/x86/xen/Kconfig |   2 +-
>  arch/x86/xen/Makefile|   1 +
>  arch/x86/xen/enlighten.c |  87 +++-
>  arch/x86/xen/xen-pvh.S   | 143 
> +++
>  include/xen/xen.h|   5 ++
>  5 files changed, 236 insertions(+), 2 deletions(-)
>  create mode 100644 arch/x86/xen/xen-pvh.S
> 
> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
> index c7b15f3..76b6dbd 100644
> --- a/arch/x86/xen/Kconfig
> +++ b/arch/x86/xen/Kconfig
> @@ -53,5 +53,5 @@ config XEN_DEBUG_FS
>  
>  config XEN_PVH
>   bool "Support for running as a PVH guest"
> - depends on X86_64 && XEN && XEN_PVHVM
> + depends on XEN && XEN_PVHVM && ACPI
>   def_bool n
> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
> index e47e527..cb0164a 100644
> --- a/arch/x86/xen/Makefile
> +++ b/arch/x86/xen/Makefile
> @@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS)  += debugfs.o
>  obj-$(CONFIG_XEN_DOM0)   += vga.o
>  obj-$(CONFIG_SWIOTLB_XEN)+= pci-swiotlb-xen.o
>  obj-$(CONFIG_XEN_EFI)+= efi.o
> +obj-$(CONFIG_XEN_PVH)+= xen-pvh.o
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index dc4ed0c..d38d568 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -45,6 +45,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -121,7 +122,8 @@
>  DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
>  EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
>  
> -enum xen_domain_type xen_domain_type = XEN_NATIVE;
> +enum xen_domain_type xen_domain_type
> + __attribute__((section(".data"))) = XEN_NATIVE;
>  EXPORT_SYMBOL_GPL(xen_domain_type);
>  
>  unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
> @@ -176,6 +178,17 @@ struct tls_descs {
>   */
>  static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
>  
> +#ifdef CONFIG_XEN_PVH
> +/*
> + * PVH variables. These need to live in data segment since they are
> + * initialized before startup_{32|64}, which clear .bss, are invoked.
> + */
> +int xen_pvh __attribute__((section(".data"))) = 0;

unsigned int?
> +struct hvm_start_info pvh_start_info __attribute__((section(".data")));
> +uint pvh_start_info_sz = sizeof(pvh_start_info);

unsigned int please. Typedefs in Linux are frowned upon.

> +struct boot_params pvh_bootparams __attribute__((section(".data")));
> +#endif
> +
>  static void clamp_max_cpus(void)
>  {
>  #ifdef CONFIG_SMP
> @@ -1669,6 +1682,78 @@ asmlinkage __visible void __init xen_start_kernel(void)
>  #endif
>  }
>  
> +#ifdef CONFIG_XEN_PVH
> +static void __init init_pvh_bootparams(void)
> +{
> + struct xen_memory_map memmap;
> + int i;

unsigned int?
> +
> + memset(_bootparams, 0, sizeof(pvh_bootparams));
> +
> + memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
> + set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
> + if (HYPERVISOR_memory_op(XENMEM_memory_map, )) {
> + xen_raw_console_write("XENMEM_memory_map failed\n");

Should we print the error value at least?
> + BUG();
> + }
> +
> + pvh_bootparams.e820_map[memmap.nr_entries].addr =
> + ISA_START_ADDRESS;

What if nr_entries is 128? Should we double-check for that?

> + pvh_bootparams.e820_map[memmap.nr_entries].size =
> + ISA_END_ADDRESS - ISA_START_ADDRESS;
> + pvh_bootparams.e820_map[memmap.nr_entries++].type =
> + E820_RESERVED;
> +
> + sanitize_e820_map(pvh_bootparams.e820_map,
> +   ARRAY_SIZE(pvh_bootparams.e820_map),
> +   _entries); 
> +
> + pvh_bootparams.e820_entries = memmap.nr_entries;
> + for (i = 0; i < pvh_bootparams.e820_entries; i++)
> + e820_add_region(pvh_bootparams.e820_map[i].addr,
> + pvh_bootparams.e820_map[i].size,
> + pvh_bootparams.e820_map[i].type);
> +
> + pvh_bootparams.hdr.cmd_line_ptr =
> + pvh_start_info.cmdline_paddr;
> +
> + /* The first module is always ramdisk */

Could you add an period at end please?
> + if (pvh_start_info.nr_modules) {
> + struct hvm_modlist_entry *modaddr =
> + __va(pvh_start_info.modlist_paddr);
> + pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
> + pvh_bootparams.hdr.ramdisk_size = modaddr->size;
> + }
> +
> + /*
> +  * See 

Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Andrew Cooper
On 14/10/16 19:55, Boris Ostrovsky wrote:
> On 10/14/2016 02:38 PM, Andrew Cooper wrote:
>>> +   jmp *%rax
>>> +
>>> +#else /* CONFIG_X86_64 */
>>> +
>>> +   call setup_pgtable_32
>>> +
>>> +   mov $_pa(initial_page_table), %eax
>>> +   movl %eax, %cr3
>>> +
>>> +   movl %cr0, %eax
>>> +   orl $(X86_CR0_PG | X86_CR0_PE), %eax
>>> +   movl %eax, %cr0
>>> +
>>> +   ljmp $__BOOT_CS,$1f
>>> +1:
>>> +   call xen_prepare_pvh
>> Why does xen_prepare_pvh need paging?  I can't spot anything which
>> should need it, and it feels conceptually wrong.
> xen_prepare_pvh() deals with virtual addresses. How can we run without paging?

Ah yes - with a high-half kernel, that way around doesn't work.  Sorry
for the noise - I have been spending too long working with virtual
addresses down round 0, where that specifically can be solved by setting
%ds with a suitable non-zero base.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Boris Ostrovsky
On 10/14/2016 02:38 PM, Andrew Cooper wrote:
>> +jmp *%rax
>> +
>> +#else /* CONFIG_X86_64 */
>> +
>> +call setup_pgtable_32
>> +
>> +mov $_pa(initial_page_table), %eax
>> +movl %eax, %cr3
>> +
>> +movl %cr0, %eax
>> +orl $(X86_CR0_PG | X86_CR0_PE), %eax
>> +movl %eax, %cr0
>> +
>> +ljmp $__BOOT_CS,$1f
>> +1:
>> +call xen_prepare_pvh
> Why does xen_prepare_pvh need paging?  I can't spot anything which
> should need it, and it feels conceptually wrong.

xen_prepare_pvh() deals with virtual addresses. How can we run without paging?

(Also, startup_64, which is where we jump from here in 64-bit mode expects 
paging to be on).

-boris



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Andrew Cooper
On 14/10/16 19:05, Boris Ostrovsky wrote:
> diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
> new file mode 100644
> index 000..58c477b
> --- /dev/null
> +++ b/arch/x86/xen/xen-pvh.S
> @@ -0,0 +1,143 @@
> +/*
> + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with this program.  If not, see .
> + */
> +
> + .code32
> + .text
> +#define _pa(x)  ((x) - __START_KERNEL_map)
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> + __HEAD
> + .code32

Duplicated .code32

> +
> +/* Entry point for PVH guests */
> +ENTRY(pvh_start_xen)
> + cli
> + cld

The ABI states that these will be clear.

> +
> + mov $_pa(gdt), %eax
> + lgdt (%eax)

I am fairly sure you can express this without an intermediate in %eax.

> +
> + movl $(__BOOT_DS),%eax
> + movl %eax,%ds
> + movl %eax,%es
> + movl %eax,%ss
> +
> + /* Stash hvm_start_info */
> + mov $_pa(pvh_start_info), %edi
> + mov %ebx, %esi
> + mov $_pa(pvh_start_info_sz), %ecx
> + mov (%ecx), %ecx

No need for an intermediate.

> + rep
> + movsb

Surely we can guarentee the size is a multiple of 4? movsl would be better.

> +
> + movl $_pa(early_stack_end), %eax
> + movl %eax, %esp

You can mov straight into %esp.

> +
> + /* Enable PAE mode */
> + movl %cr4, %eax
> + orl $X86_CR4_PAE, %eax
> + movl %eax, %cr4
> +
> +#ifdef CONFIG_X86_64
> + /* Enable Long mode */
> + movl $MSR_EFER, %ecx
> + rdmsr
> + btsl $_EFER_LME, %eax
> + wrmsr
> +
> + /* Enable pre-constructed page tables */
> + mov $_pa(init_level4_pgt), %eax
> + movl %eax, %cr3
> + movl $(X86_CR0_PG | X86_CR0_PE), %eax
> + movl %eax, %cr0
> +
> + /* Jump to 64-bit mode. */
> + pushl $__KERNEL_CS
> + leal _pa(1f), %eax
> + pushl %eax
> + lret

You are still in compat mode, so can ljmp $__KERNEL_CS, $_pa(1f)

> +
> + /* 64-bit entry point */
> + .code64
> +1:
> + call xen_prepare_pvh
> +
> + /* startup_64 expects boot_params in %rsi */
> + mov $_pa(pvh_bootparams), %rsi
> + movq $_pa(startup_64), %rax

You seem to have an inconsistent mix of writing the explicit suffixes
when they aren't required.

> + jmp *%rax
> +
> +#else /* CONFIG_X86_64 */
> +
> + call setup_pgtable_32
> +
> + mov $_pa(initial_page_table), %eax
> + movl %eax, %cr3
> +
> + movl %cr0, %eax
> + orl $(X86_CR0_PG | X86_CR0_PE), %eax
> + movl %eax, %cr0
> +
> + ljmp $__BOOT_CS,$1f
> +1:
> + call xen_prepare_pvh

Why does xen_prepare_pvh need paging?  I can't spot anything which
should need it, and it feels conceptually wrong.

~Andrew

> + mov $_pa(pvh_bootparams), %esi
> +
> + /* startup_32 doesn't expect paging and PAE to be on */
> + ljmp $__BOOT_CS,$_pa(2f)
> +2:
> + movl %cr0, %eax
> + andl $~X86_CR0_PG, %eax
> + movl %eax, %cr0
> + movl %cr4, %eax
> + andl $~X86_CR4_PAE, %eax
> + movl %eax, %cr4
> +
> + ljmp$0x10, $_pa(startup_32)
> +#endif
> +
> + .data
> +gdt:
> + .word   gdt_end - gdt
> + .long   _pa(gdt)
> + .word   0
> + .quad   0x /* NULL descriptor */
> +#ifdef CONFIG_X86_64
> + .quad   0x00af9a00 /* __KERNEL_CS */
> +#else
> + .quad   0x00cf9a00 /* __KERNEL_CS */
> +#endif
> + .quad   0x00cf9200 /* __KERNEL_DS */
> +gdt_end:
> +
> + .bss
> + .balign 4
> +early_stack:
> + .fill 16, 1, 0
> +early_stack_end:
> +
> + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
> +  _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
>


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 4/8] xen/pvh: Bootstrap PVH guest

2016-10-14 Thread Boris Ostrovsky
Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall
page, initialize boot_params, enable early page tables.

Since this stub is executed before kernel entry point we cannot use
variables in .bss which is cleared by kernel. We explicitly place
variables that are initialized here into .data.

Signed-off-by: Boris Ostrovsky 
Signed-off-by: Matt Fleming 
---
 arch/x86/xen/Kconfig |   2 +-
 arch/x86/xen/Makefile|   1 +
 arch/x86/xen/enlighten.c |  87 +++-
 arch/x86/xen/xen-pvh.S   | 143 +++
 include/xen/xen.h|   5 ++
 5 files changed, 236 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/xen/xen-pvh.S

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index c7b15f3..76b6dbd 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -53,5 +53,5 @@ config XEN_DEBUG_FS
 
 config XEN_PVH
bool "Support for running as a PVH guest"
-   depends on X86_64 && XEN && XEN_PVHVM
+   depends on XEN && XEN_PVHVM && ACPI
def_bool n
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index e47e527..cb0164a 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS)+= debugfs.o
 obj-$(CONFIG_XEN_DOM0) += vga.o
 obj-$(CONFIG_SWIOTLB_XEN)  += pci-swiotlb-xen.o
 obj-$(CONFIG_XEN_EFI)  += efi.o
+obj-$(CONFIG_XEN_PVH)  += xen-pvh.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index dc4ed0c..d38d568 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -121,7 +122,8 @@
 DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 
-enum xen_domain_type xen_domain_type = XEN_NATIVE;
+enum xen_domain_type xen_domain_type
+   __attribute__((section(".data"))) = XEN_NATIVE;
 EXPORT_SYMBOL_GPL(xen_domain_type);
 
 unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
@@ -176,6 +178,17 @@ struct tls_descs {
  */
 static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
 
+#ifdef CONFIG_XEN_PVH
+/*
+ * PVH variables. These need to live in data segment since they are
+ * initialized before startup_{32|64}, which clear .bss, are invoked.
+ */
+int xen_pvh __attribute__((section(".data"))) = 0;
+struct hvm_start_info pvh_start_info __attribute__((section(".data")));
+uint pvh_start_info_sz = sizeof(pvh_start_info);
+struct boot_params pvh_bootparams __attribute__((section(".data")));
+#endif
+
 static void clamp_max_cpus(void)
 {
 #ifdef CONFIG_SMP
@@ -1669,6 +1682,78 @@ asmlinkage __visible void __init xen_start_kernel(void)
 #endif
 }
 
+#ifdef CONFIG_XEN_PVH
+static void __init init_pvh_bootparams(void)
+{
+   struct xen_memory_map memmap;
+   int i;
+
+   memset(_bootparams, 0, sizeof(pvh_bootparams));
+
+   memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map);
+   set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map);
+   if (HYPERVISOR_memory_op(XENMEM_memory_map, )) {
+   xen_raw_console_write("XENMEM_memory_map failed\n");
+   BUG();
+   }
+
+   pvh_bootparams.e820_map[memmap.nr_entries].addr =
+   ISA_START_ADDRESS;
+   pvh_bootparams.e820_map[memmap.nr_entries].size =
+   ISA_END_ADDRESS - ISA_START_ADDRESS;
+   pvh_bootparams.e820_map[memmap.nr_entries++].type =
+   E820_RESERVED;
+
+   sanitize_e820_map(pvh_bootparams.e820_map,
+ ARRAY_SIZE(pvh_bootparams.e820_map),
+ _entries);
+
+   pvh_bootparams.e820_entries = memmap.nr_entries;
+   for (i = 0; i < pvh_bootparams.e820_entries; i++)
+   e820_add_region(pvh_bootparams.e820_map[i].addr,
+   pvh_bootparams.e820_map[i].size,
+   pvh_bootparams.e820_map[i].type);
+
+   pvh_bootparams.hdr.cmd_line_ptr =
+   pvh_start_info.cmdline_paddr;
+
+   /* The first module is always ramdisk */
+   if (pvh_start_info.nr_modules) {
+   struct hvm_modlist_entry *modaddr =
+   __va(pvh_start_info.modlist_paddr);
+   pvh_bootparams.hdr.ramdisk_image = modaddr->paddr;
+   pvh_bootparams.hdr.ramdisk_size = modaddr->size;
+   }
+
+   /*
+* See Documentation/x86/boot.txt.
+*
+* Version 2.12 supports Xen entry point but we will use default x86/PC
+* environment (i.e. hardware_subarch 0).
+*/
+   pvh_bootparams.hdr.version = 0x212;
+   pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+}
+
+/*
+ * This routine (and those that it might call) should not use
+ * anything that lives in .bss since that segment will be cleared later
+ */
+void __init xen_prepare_pvh(void)
+{
+