Module Name: src Committed By: imil Date: Tue Feb 18 10:16:04 UTC 2025
Modified Files: src/sys/arch/i386/i386: genassym.cf locore.S machdep.c Added Files: src/sys/arch/i386/conf: MICROVM Log Message: Add support for non-Xen PVH guests to i386, PR kern/57813 Tested on QEMU by me, Xen PV & PVH by bouyer@. To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 src/sys/arch/i386/conf/MICROVM cvs rdiff -u -r1.135 -r1.136 src/sys/arch/i386/i386/genassym.cf cvs rdiff -u -r1.198 -r1.199 src/sys/arch/i386/i386/locore.S cvs rdiff -u -r1.842 -r1.843 src/sys/arch/i386/i386/machdep.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/i386/i386/genassym.cf diff -u src/sys/arch/i386/i386/genassym.cf:1.135 src/sys/arch/i386/i386/genassym.cf:1.136 --- src/sys/arch/i386/i386/genassym.cf:1.135 Wed Oct 4 20:28:05 2023 +++ src/sys/arch/i386/i386/genassym.cf Tue Feb 18 10:16:03 2025 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.135 2023/10/04 20:28:05 ad Exp $ +# $NetBSD: genassym.cf,v 1.136 2025/02/18 10:16:03 imil Exp $ # # Copyright (c) 1998, 2006, 2007, 2008, 2023 The NetBSD Foundation, Inc. @@ -377,6 +377,7 @@ define L2_FRAME L2_FRAME define VM_GUEST_XENPV VM_GUEST_XENPV define VM_GUEST_XENPVH VM_GUEST_XENPVH +define VM_GUEST_GENPVH VM_GUEST_GENPVH ifdef XEN define CPU_INFO_VCPU offsetof(struct cpu_info, ci_vcpu) @@ -391,7 +392,12 @@ define START_INFO_STORE_MFN offsetof(str define SIF_INITDOMAIN SIF_INITDOMAIN define EVTCHN_UPCALL_PENDING offsetof(struct vcpu_info, evtchn_upcall_pending) define EVTCHN_UPCALL_MASK offsetof(struct vcpu_info, evtchn_upcall_mask) - +define HVM_START_INFO_SIZE sizeof(struct hvm_start_info) +define START_INFO_VERSION offsetof(struct hvm_start_info, version) +define MMAP_PADDR offsetof(struct hvm_start_info, memmap_paddr) +define MMAP_ENTRIES offsetof(struct hvm_start_info, memmap_entries) +define MMAP_ENTRY_SIZE sizeof(struct hvm_memmap_table_entry) +define CMDLINE_PADDR offsetof(struct hvm_start_info, cmdline_paddr) define HYPERVISOR_sched_op __HYPERVISOR_sched_op define SCHEDOP_yield SCHEDOP_yield endif /* XEN */ Index: src/sys/arch/i386/i386/locore.S diff -u src/sys/arch/i386/i386/locore.S:1.198 src/sys/arch/i386/i386/locore.S:1.199 --- src/sys/arch/i386/i386/locore.S:1.198 Wed Jul 31 20:05:28 2024 +++ src/sys/arch/i386/i386/locore.S Tue Feb 18 10:16:03 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $ */ +/* $NetBSD: locore.S,v 1.199 2025/02/18 10:16:03 imil Exp $ */ /* * Copyright-o-rama! @@ -128,7 +128,7 @@ */ #include <machine/asm.h> -__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $"); +__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.199 2025/02/18 10:16:03 imil Exp $"); #include "opt_copy_symtab.h" #include "opt_ddb.h" @@ -244,11 +244,12 @@ __KERNEL_RCSID(0, "$NetBSD: locore.S,v 1 #ifdef XEN #define __ASSEMBLY__ +#include <xen/include/public/arch-x86/cpuid.h> #include <xen/include/public/elfnote.h> #include <xen/include/public/xen.h> #define ELFNOTE(name, type, desctype, descdata...) \ -.pushsection .note.name ; \ +.pushsection .note.name, "a", @note ; \ .align 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ @@ -272,7 +273,7 @@ __KERNEL_RCSID(0, "$NetBSD: locore.S,v 1 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, start) #else ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0) - ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_xenpvh)) + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_pvh)) #endif /* XENPV */ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) @@ -1193,7 +1194,7 @@ END(start) #if defined(XEN) #ifndef XENPV /* entry point for Xen PVH */ -ENTRY(start_xenpvh) +ENTRY(start_pvh) /* Xen doesn't start us with a valid gdt */ movl $RELOC(gdtdesc_xenpvh), %eax lgdt (%eax) @@ -1217,6 +1218,93 @@ ENTRY(start_xenpvh) stosb /* + * Here, we have 2 cases : + * + * 1) We have been started by Xen + * 2) We have been started by another VMM (Qemu, Firecracker, ...) + * + * The main difference is that, when we are started by Xen, + * %ebx (addr of the hvm_start_info structure) is pointing to a + * location that will be mapped correctly later. + * + * In the second case, we have to copy this structure (and all + * the information contained in it) to a location that will be + * mapped later : __kernel_end + * + * To distinguish between the 2 cases, we'll use the 'cpuid' instruction + */ + + push %ebx + xorl %eax, %eax + cpuid + cmpl $0x1, %eax /* Check if we can call CPUID with eax=1 */ + jb .start_genpvh + xorl %eax, %eax + inc %eax + cpuid + shr $31, %ecx + testb $1, %cl /* Check if bit 31 of ECX (hypervisor) is set */ + jz .start_genpvh + xorl %eax, %eax + inc %eax + shl $30, %eax + cpuid /* Calling cpuid with eax=0x40000000 */ + cmp $XEN_CPUID_SIGNATURE_EBX, %ebx /* "VneX" */ + je .start_xen + + /* We have been started by a VMM that is *not* Xen */ + +.start_genpvh: + + /* First, copy the hvm_start_info structure to __kernel_end */ + pop %ebx + movl %ebx, %esi + movl $RELOC(__kernel_end), %edi + movl $HVM_START_INFO_SIZE, %ecx + shrl $2, %ecx + rep movsl + + /* Copy cmdline_paddr after hvm_start_info */ + movl CMDLINE_PADDR(%ebx), %esi + movl $RELOC(__kernel_end), %ecx + movl %edi, CMDLINE_PADDR(%ecx) /* Set new cmdline_paddr in hvm_start_info */ + .cmdline_copy: + movb (%esi), %al + movsb + cmp $0, %al + jne .cmdline_copy + + /* Copy memmap_paddr after cmdline (only if hvm_start_info->version != 0) */ + xorl %eax, %eax + cmpl START_INFO_VERSION(%ebx), %eax + je .reload_ebx + movl MMAP_PADDR(%ebx), %esi + movl $RELOC(__kernel_end), %ecx + movl %edi, MMAP_PADDR(%ecx) /* Set new memmap_paddr in hvm_start_info */ + movl MMAP_ENTRIES(%ebx), %eax /* Get memmap_entries */ + movl $MMAP_ENTRY_SIZE, %ebx + mull %ebx /* eax * ebx => edx:eax */ + movl %eax, %ecx + shrl $2, %ecx + rep movsl + +.reload_ebx: + movl $RELOC(__kernel_end), %ebx + + /* announce ourself */ + movl $VM_GUEST_GENPVH, RELOC(vm_guest) + + jmp .save_hvm_start_paddr + +.start_xen: + pop %ebx + movl $VM_GUEST_XENPVH, RELOC(vm_guest) + +.save_hvm_start_paddr: + /* + * save addr of the hvm_start_info structure. This is also the end + * of the symbol table + /* * save addr of the hvm_start_info structure. This is also the end * of the symbol table */ @@ -1226,22 +1314,25 @@ ENTRY(start_xenpvh) movl $RELOC(esym),%ebp movl %eax,(%ebp) /* get a page for HYPERVISOR_shared_info */ + /* this is only needed if we are running on Xen */ + cmpl $VM_GUEST_XENPVH, RELOC(vm_guest) + jne .add_hvm_start_info_page addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx andl $~PGOFSET,%ebx movl $RELOC(HYPERVISOR_shared_info_pa),%ebp movl %ebx,(%ebp) /* XXX assume hvm_start_info+dependant structure fits in a single page */ +.add_hvm_start_info_page: addl $PAGE_SIZE, %ebx addl $PGOFSET,%ebx andl $~PGOFSET,%ebx addl $KERNBASE,%ebx movl $RELOC(eblob),%ebp movl %ebx,(%ebp) - /* announce ourself */ - movl $VM_GUEST_XENPVH, RELOC(vm_guest) + jmp .Lstart_common -END(start_xenpvh) +END(start_pvh) .align 8 gdtdesc_xenpvh: .word gdt_xenpvhend - gdt_xenpvh Index: src/sys/arch/i386/i386/machdep.c diff -u src/sys/arch/i386/i386/machdep.c:1.842 src/sys/arch/i386/i386/machdep.c:1.843 --- src/sys/arch/i386/i386/machdep.c:1.842 Thu Jun 27 23:58:46 2024 +++ src/sys/arch/i386/i386/machdep.c Tue Feb 18 10:16:03 2025 @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.842 2024/06/27 23:58:46 riastradh Exp $ */ +/* $NetBSD: machdep.c,v 1.843 2025/02/18 10:16:03 imil Exp $ */ /* * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009, 2017 @@ -67,7 +67,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.842 2024/06/27 23:58:46 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.843 2025/02/18 10:16:03 imil Exp $"); #include "opt_beep.h" #include "opt_compat_freebsd.h" @@ -1105,6 +1105,11 @@ init386_ksyms(void) return; #endif + if (vm_guest == VM_GUEST_GENPVH) { + ksyms_addsyms_elf(0, ((int *)&end) + 1, esym); + return; + } + if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) { ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym); return; @@ -1184,7 +1189,7 @@ init386(paddr_t first_avail) #endif #ifdef XEN - if (vm_guest == VM_GUEST_XENPVH) + if (vm_guest == VM_GUEST_XENPVH || vm_guest == VM_GUEST_GENPVH) xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); #endif Added files: Index: src/sys/arch/i386/conf/MICROVM diff -u /dev/null src/sys/arch/i386/conf/MICROVM:1.1 --- /dev/null Tue Feb 18 10:16:04 2025 +++ src/sys/arch/i386/conf/MICROVM Tue Feb 18 10:16:03 2025 @@ -0,0 +1,143 @@ +# $NetBSD: MICROVM,v 1.1 2025/02/18 10:16:03 imil Exp $ +# +# MICROVM kernel configuration, for use with Qemu microvm machine type +# or Firecracker. +# Stripped-down configuration with no PCI, use VirtIO over MMIO virtual +# bus instead. ACPI is disabled as Firecracker doesn't support it, +# use legacy MP tables instead. +# +# Exemple qemu usage on a Linux host to boot a NetBSD guest: +# +# qemu-system-x86_64 \ +# -M microvm,x-option-roms=off,rtc=on,acpi=off,pic=off,accel=kvm \ +# -m 256 -cpu host -kernel ${KERNEL} \ +# -append "root=ld0a console=com rw -z" \ +# -display none -device virtio-blk-device,drive=hd0 \ +# -drive file=${IMG},format=raw,id=hd0 \ +# -device virtio-net-device,netdev=net0 \ +# -netdev user,id=net0,ipv6=off,hostfwd=::2200-:22 \ +# -global virtio-mmio.force-legacy=false -serial stdio + +machine i386 x86 xen +include "conf/std" # MI standard options +include "arch/xen/conf/std.xenversion" + +options CPU_IN_CKSUM +options EXEC_ELF32 # exec ELF binaries +options EXEC_SCRIPT # exec #! scripts +options MTRR +options MULTIPROCESSOR + +options CHILD_MAX=1024 # 160 is too few +options OPEN_MAX=1024 # 128 is too few + +mainbus0 at root +cpu* at mainbus? +ioapic* at mainbus? apid ? + +options INCLUDE_CONFIG_FILE # embed config file in kernel binary +maxusers 8 # estimated number of users + +options INSECURE # disable kernel security levels - X needs this + +options RTC_OFFSET=0 # hardware clock is this many mins. west of GMT + +options PIPE_SOCKETPAIR # smaller, but slower pipe(2) + +# Xen PV support for PVH and HVM guests, needed for PVH boot +options XENPVHVM +options XEN +hypervisor* at mainbus? # Xen hypervisor +xenbus* at hypervisor? # Xen virtual bus +xencons* at hypervisor? # Xen virtual console + +# Include NetBSD 10 compatibility +options COMPAT_100 +# +# Because gcc omits the frame pointer for any -O level, the line below +# is needed to make backtraces in DDB work. +# +makeoptions COPTS="-O2 -fno-omit-frame-pointer" + +# File systems +#include "conf/filesystems.config" +file-system FFS +file-system EXT2FS +file-system KERNFS +file-system MFS +file-system TMPFS +file-system PTYFS +file-system MSDOSFS +file-system PROCFS + +options DKWEDGE_AUTODISCOVER +options DKWEDGE_METHOD_GPT +# File system options +# ffs +options FFS_NO_SNAPSHOT # No FFS snapshot support +options WAPBL # File system journaling support + +# Networking options +#options GATEWAY # packet forwarding +options INET # IP + ICMP + TCP + UDP +options INET6 # IPV6 + +# Kernel root file system and dump configuration. +config netbsd root on ? type ? + +# +# Device configuration +# + +# ACPI will be used if present. If not it will fall back to MPBIOS +options MPBIOS # configure CPUs and APICs using MPBIOS +# Provide bug-for-bug compatibility with Linux in MP Table searching +# and parsing. Firecracker relies on these bugs. +options MPTABLE_LINUX_BUG_COMPAT + +#pci* at mainbus? bus ? +#acpi0 at mainbus0 + +# ISA bus support +isa0 at mainbus? + +# ISA serial interfaces +com0 at isa? port 0x3f8 irq 4 # Standard PC serial ports + +# Virtual bus for non-PCI devices +pv* at pvbus? + +## Virtio devices +# Use MMIO by default +virtio* at pv? +#virtio* at acpi? +#virtio* at pci? dev ? function ? # Virtio PCI device +#viomb* at virtio? # Virtio memory balloon device + +ld* at virtio? # Virtio disk device +vioif* at virtio? # Virtio network device +viornd* at virtio? # Virtio entropy device +viocon* at virtio? + +vio9p* at virtio? # Virtio 9P device +#vioscsi* at virtio? +#scsibus* at vioscsi? + +pseudo-device md # memory disk device (ramdisk) +#options MEMORY_DISK_HOOKS # enable md specific hooks +#options MEMORY_DISK_DYNAMIC # enable dynamic resizing +# +pseudo-device vnd # disk-like interface to files +#options VND_COMPRESSION # compressed vnd(4) + +## network pseudo-devices +pseudo-device bpfilter # Berkeley packet filter +pseudo-device loop # network loopback + +## miscellaneous pseudo-devices +pseudo-device pty # pseudo-terminals +# userland interface to drivers, including autoconf and properties retrieval +pseudo-device drvctl + +file-system PUFFS +pseudo-device putter