Module Name:    src
Committed By:   imil
Date:           Tue Feb 18 10:16:04 UTC 2025

Modified Files:
        src/sys/arch/i386/i386: genassym.cf locore.S machdep.c
Added Files:
        src/sys/arch/i386/conf: MICROVM

Log Message:
Add support for non-Xen PVH guests to i386, PR kern/57813
Tested on QEMU by me, Xen PV & PVH by bouyer@.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/arch/i386/conf/MICROVM
cvs rdiff -u -r1.135 -r1.136 src/sys/arch/i386/i386/genassym.cf
cvs rdiff -u -r1.198 -r1.199 src/sys/arch/i386/i386/locore.S
cvs rdiff -u -r1.842 -r1.843 src/sys/arch/i386/i386/machdep.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/i386/i386/genassym.cf
diff -u src/sys/arch/i386/i386/genassym.cf:1.135 src/sys/arch/i386/i386/genassym.cf:1.136
--- src/sys/arch/i386/i386/genassym.cf:1.135	Wed Oct  4 20:28:05 2023
+++ src/sys/arch/i386/i386/genassym.cf	Tue Feb 18 10:16:03 2025
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.135 2023/10/04 20:28:05 ad Exp $
+#	$NetBSD: genassym.cf,v 1.136 2025/02/18 10:16:03 imil Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008, 2023 The NetBSD Foundation, Inc.
@@ -377,6 +377,7 @@ define	L2_FRAME		L2_FRAME
 
 define	VM_GUEST_XENPV		VM_GUEST_XENPV
 define	VM_GUEST_XENPVH		VM_GUEST_XENPVH
+define	VM_GUEST_GENPVH		VM_GUEST_GENPVH
 
 ifdef XEN
 define CPU_INFO_VCPU		offsetof(struct cpu_info, ci_vcpu)
@@ -391,7 +392,12 @@ define START_INFO_STORE_MFN	offsetof(str
 define SIF_INITDOMAIN		SIF_INITDOMAIN
 define EVTCHN_UPCALL_PENDING	offsetof(struct vcpu_info, evtchn_upcall_pending)
 define EVTCHN_UPCALL_MASK	offsetof(struct vcpu_info, evtchn_upcall_mask)
-
+define HVM_START_INFO_SIZE	sizeof(struct hvm_start_info)
+define START_INFO_VERSION	offsetof(struct hvm_start_info, version)
+define MMAP_PADDR		offsetof(struct hvm_start_info, memmap_paddr)
+define MMAP_ENTRIES		offsetof(struct hvm_start_info, memmap_entries)
+define MMAP_ENTRY_SIZE		sizeof(struct hvm_memmap_table_entry)
+define CMDLINE_PADDR		offsetof(struct hvm_start_info, cmdline_paddr)
 define HYPERVISOR_sched_op	__HYPERVISOR_sched_op
 define SCHEDOP_yield		SCHEDOP_yield
 endif /* XEN */

Index: src/sys/arch/i386/i386/locore.S
diff -u src/sys/arch/i386/i386/locore.S:1.198 src/sys/arch/i386/i386/locore.S:1.199
--- src/sys/arch/i386/i386/locore.S:1.198	Wed Jul 31 20:05:28 2024
+++ src/sys/arch/i386/i386/locore.S	Tue Feb 18 10:16:03 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $	*/
+/*	$NetBSD: locore.S,v 1.199 2025/02/18 10:16:03 imil Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -128,7 +128,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.198 2024/07/31 20:05:28 andvar Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.199 2025/02/18 10:16:03 imil Exp $");
 
 #include "opt_copy_symtab.h"
 #include "opt_ddb.h"
@@ -244,11 +244,12 @@ __KERNEL_RCSID(0, "$NetBSD: locore.S,v 1
 
 #ifdef XEN
 #define __ASSEMBLY__
+#include <xen/include/public/arch-x86/cpuid.h>
 #include <xen/include/public/elfnote.h>
 #include <xen/include/public/xen.h>
 
 #define ELFNOTE(name, type, desctype, descdata...) \
-.pushsection .note.name			;	\
+.pushsection .note.name, "a", @note	;	\
   .align 4				;	\
   .long 2f - 1f		/* namesz */	;	\
   .long 4f - 3f		/* descsz */	;	\
@@ -272,7 +273,7 @@ __KERNEL_RCSID(0, "$NetBSD: locore.S,v 1
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  start)
 #else
 	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  0)
-	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,   .long,  RELOC(start_xenpvh))
+	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,   .long,  RELOC(start_pvh))
 #endif /* XENPV */
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  HYPERVISOR_VIRT_START)
@@ -1193,7 +1194,7 @@ END(start)
 #if defined(XEN)
 #ifndef XENPV
 /* entry point for Xen PVH */
-ENTRY(start_xenpvh)
+ENTRY(start_pvh)
 	/* Xen doesn't start us with a valid gdt */
 	movl    $RELOC(gdtdesc_xenpvh), %eax
 	lgdt    (%eax)
@@ -1217,6 +1218,93 @@ ENTRY(start_xenpvh)
 	stosb
 
 	/*
+	 * Here, we have 2 cases :
+	 *
+	 *  1) We have been started by Xen
+	 *  2) We have been started by another VMM (Qemu, Firecracker, ...)
+	 *
+	 * The main difference is that, when we are started by Xen,
+	 * %ebx (addr of the hvm_start_info structure) is pointing to a
+	 * location that will be mapped correctly later.
+	 *
+	 * In the second case, we have to copy this structure (and all
+	 * the information contained in it) to a location that will be
+	 * mapped later : __kernel_end
+	 *
+	 * To distinguish between the 2 cases, we'll use the 'cpuid' instruction
+	 */
+
+	push %ebx
+	xorl %eax, %eax
+	cpuid
+	cmpl $0x1, %eax		/* Check if we can call CPUID with eax=1 */
+	jb .start_genpvh
+	xorl %eax, %eax
+	inc %eax
+	cpuid
+	shr $31, %ecx
+	testb $1, %cl		/* Check if bit 31 of ECX (hypervisor) is set */
+	jz .start_genpvh
+	xorl %eax, %eax
+	inc %eax
+	shl $30, %eax
+	cpuid			/* Calling cpuid with eax=0x40000000 */
+	cmp $XEN_CPUID_SIGNATURE_EBX, %ebx	/* "VneX" */
+	je .start_xen
+
+	/* We have been started by a VMM that is *not* Xen */
+
+.start_genpvh:
+
+	/* First, copy the hvm_start_info structure to __kernel_end */
+	pop %ebx
+	movl %ebx, %esi
+	movl $RELOC(__kernel_end), %edi
+	movl $HVM_START_INFO_SIZE, %ecx
+	shrl $2, %ecx
+	rep movsl
+
+	/* Copy cmdline_paddr after hvm_start_info */
+	movl CMDLINE_PADDR(%ebx), %esi
+	movl $RELOC(__kernel_end), %ecx
+	movl %edi, CMDLINE_PADDR(%ecx)	/* Set new cmdline_paddr in hvm_start_info */
+	.cmdline_copy:
+	movb (%esi), %al
+	movsb
+	cmp $0, %al
+	jne .cmdline_copy
+
+	/* Copy memmap_paddr after cmdline (only if hvm_start_info->version != 0) */
+	xorl %eax, %eax
+	cmpl START_INFO_VERSION(%ebx), %eax
+	je .reload_ebx
+	movl MMAP_PADDR(%ebx), %esi
+	movl $RELOC(__kernel_end), %ecx
+	movl %edi, MMAP_PADDR(%ecx)	/* Set new memmap_paddr in hvm_start_info */
+	movl MMAP_ENTRIES(%ebx), %eax	/* Get memmap_entries */
+	movl $MMAP_ENTRY_SIZE, %ebx
+	mull %ebx			/* eax * ebx => edx:eax */
+	movl %eax, %ecx
+	shrl $2, %ecx
+	rep movsl
+
+.reload_ebx:
+	movl $RELOC(__kernel_end), %ebx
+
+	/* announce ourself */
+	movl	$VM_GUEST_GENPVH, RELOC(vm_guest)
+
+	jmp .save_hvm_start_paddr
+
+.start_xen:
+	pop %ebx
+	movl	$VM_GUEST_XENPVH, RELOC(vm_guest)
+
+.save_hvm_start_paddr:
+	/*
+	 * save addr of the hvm_start_info structure. This is also the end
+	 * of the symbol table
+	/*
 	 * save addr of the hvm_start_info structure. This is also the end
 	 * of the symbol table
 	 */
@@ -1226,22 +1314,25 @@ ENTRY(start_xenpvh)
 	movl	$RELOC(esym),%ebp
 	movl	%eax,(%ebp)
 	/* get a page for HYPERVISOR_shared_info */
+	/* this is only needed if we are running on Xen */
+	cmpl	$VM_GUEST_XENPVH, RELOC(vm_guest)
+	jne	.add_hvm_start_info_page
 	addl	$PAGE_SIZE, %ebx
 	addl	$PGOFSET,%ebx
 	andl	$~PGOFSET,%ebx
 	movl	$RELOC(HYPERVISOR_shared_info_pa),%ebp
 	movl	%ebx,(%ebp)
 	/* XXX assume hvm_start_info+dependant structure fits in a single page */
+.add_hvm_start_info_page:
 	addl	$PAGE_SIZE, %ebx
 	addl	$PGOFSET,%ebx
 	andl	$~PGOFSET,%ebx
 	addl	$KERNBASE,%ebx
 	movl	$RELOC(eblob),%ebp
 	movl	%ebx,(%ebp)
-	/* announce ourself */
-	movl	$VM_GUEST_XENPVH, RELOC(vm_guest)
+
 	jmp	.Lstart_common
-END(start_xenpvh)
+END(start_pvh)
 	.align 8
 gdtdesc_xenpvh:
 	.word	gdt_xenpvhend - gdt_xenpvh

Index: src/sys/arch/i386/i386/machdep.c
diff -u src/sys/arch/i386/i386/machdep.c:1.842 src/sys/arch/i386/i386/machdep.c:1.843
--- src/sys/arch/i386/i386/machdep.c:1.842	Thu Jun 27 23:58:46 2024
+++ src/sys/arch/i386/i386/machdep.c	Tue Feb 18 10:16:03 2025
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.842 2024/06/27 23:58:46 riastradh Exp $	*/
+/*	$NetBSD: machdep.c,v 1.843 2025/02/18 10:16:03 imil Exp $	*/
 
 /*
  * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009, 2017
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.842 2024/06/27 23:58:46 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.843 2025/02/18 10:16:03 imil Exp $");
 
 #include "opt_beep.h"
 #include "opt_compat_freebsd.h"
@@ -1105,6 +1105,11 @@ init386_ksyms(void)
 		return;
 #endif
 
+	if (vm_guest == VM_GUEST_GENPVH) {
+		ksyms_addsyms_elf(0, ((int *)&end) + 1, esym);
+		return;
+	}
+
 	if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) {
 		ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym);
 		return;
@@ -1184,7 +1189,7 @@ init386(paddr_t first_avail)
 #endif
 
 #ifdef XEN
-	if (vm_guest == VM_GUEST_XENPVH)
+	if (vm_guest == VM_GUEST_XENPVH || vm_guest == VM_GUEST_GENPVH)
 		xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
 #endif
 

Added files:

Index: src/sys/arch/i386/conf/MICROVM
diff -u /dev/null src/sys/arch/i386/conf/MICROVM:1.1
--- /dev/null	Tue Feb 18 10:16:04 2025
+++ src/sys/arch/i386/conf/MICROVM	Tue Feb 18 10:16:03 2025
@@ -0,0 +1,143 @@
+# $NetBSD: MICROVM,v 1.1 2025/02/18 10:16:03 imil Exp $
+#
+# MICROVM kernel configuration, for use with Qemu microvm machine type
+# or Firecracker.
+# Stripped-down configuration with no PCI, use VirtIO over MMIO virtual
+# bus instead. ACPI is disabled as Firecracker doesn't support it,
+# use legacy MP tables instead.
+#
+# Exemple qemu usage on a Linux host to boot a NetBSD guest:
+#
+# qemu-system-x86_64							\
+# 	-M microvm,x-option-roms=off,rtc=on,acpi=off,pic=off,accel=kvm	\
+# 	-m 256 -cpu host -kernel ${KERNEL} 				\
+# 	-append "root=ld0a console=com rw -z"				\
+# 	-display none -device virtio-blk-device,drive=hd0		\
+# 	-drive file=${IMG},format=raw,id=hd0				\
+# 	-device virtio-net-device,netdev=net0 				\
+# 	-netdev user,id=net0,ipv6=off,hostfwd=::2200-:22		\
+# 	-global virtio-mmio.force-legacy=false -serial stdio
+
+machine i386 x86 xen
+include         "conf/std"      # MI standard options
+include         "arch/xen/conf/std.xenversion"
+
+options         CPU_IN_CKSUM
+options         EXEC_ELF32      # exec ELF binaries
+options         EXEC_SCRIPT     # exec #! scripts
+options         MTRR
+options         MULTIPROCESSOR
+
+options         CHILD_MAX=1024  # 160 is too few
+options         OPEN_MAX=1024   # 128 is too few
+
+mainbus0 at root
+cpu* at mainbus?
+ioapic* at mainbus? apid ?
+
+options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
+maxusers	8		# estimated number of users
+
+options 	INSECURE	# disable kernel security levels - X needs this
+
+options 	RTC_OFFSET=0	# hardware clock is this many mins. west of GMT
+
+options 	PIPE_SOCKETPAIR	# smaller, but slower pipe(2)
+
+# Xen PV support for PVH and HVM guests, needed for PVH boot
+options 	XENPVHVM
+options 	XEN
+hypervisor*	at mainbus?		# Xen hypervisor
+xenbus*	 	at hypervisor?		# Xen virtual bus
+xencons*	at hypervisor?		# Xen virtual console
+
+# Include NetBSD 10 compatibility
+options		COMPAT_100
+#
+# Because gcc omits the frame pointer for any -O level, the line below
+# is needed to make backtraces in DDB work.
+#
+makeoptions	COPTS="-O2 -fno-omit-frame-pointer"
+
+# File systems
+#include "conf/filesystems.config"
+file-system FFS
+file-system EXT2FS
+file-system KERNFS
+file-system MFS
+file-system TMPFS
+file-system PTYFS
+file-system MSDOSFS
+file-system PROCFS
+
+options		DKWEDGE_AUTODISCOVER
+options		DKWEDGE_METHOD_GPT
+# File system options
+# ffs
+options 	FFS_NO_SNAPSHOT	# No FFS snapshot support
+options 	WAPBL		# File system journaling support
+
+# Networking options
+#options 	GATEWAY		# packet forwarding
+options 	INET		# IP + ICMP + TCP + UDP
+options 	INET6		# IPV6
+
+# Kernel root file system and dump configuration.
+config		netbsd	root on ? type ?
+
+#
+# Device configuration
+#
+
+# ACPI will be used if present. If not it will fall back to MPBIOS
+options 	MPBIOS			# configure CPUs and APICs using MPBIOS
+# Provide bug-for-bug compatibility with Linux in MP Table searching
+# and parsing.  Firecracker relies on these bugs.
+options		MPTABLE_LINUX_BUG_COMPAT
+
+#pci*	at mainbus? bus ?
+#acpi0	at mainbus0
+
+# ISA bus support
+isa0	at mainbus?
+
+# ISA serial interfaces
+com0	at isa? port 0x3f8 irq 4	# Standard PC serial ports
+
+# Virtual bus for non-PCI devices
+pv* at pvbus?
+
+## Virtio devices
+# Use MMIO by default
+virtio* at pv?
+#virtio* at acpi?
+#virtio* at pci? dev ? function ?	# Virtio PCI device
+#viomb*	at virtio?			# Virtio memory balloon device
+
+ld*	at virtio?			# Virtio disk device
+vioif*	at virtio?			# Virtio network device
+viornd* at virtio?			# Virtio entropy device
+viocon* at virtio?
+
+vio9p*	at virtio?			# Virtio 9P device
+#vioscsi* at virtio?
+#scsibus* at vioscsi?
+
+pseudo-device	md			# memory disk device (ramdisk)
+#options 	MEMORY_DISK_HOOKS	# enable md specific hooks
+#options 	MEMORY_DISK_DYNAMIC	# enable dynamic resizing
+#
+pseudo-device	vnd			# disk-like interface to files
+#options 	VND_COMPRESSION		# compressed vnd(4)
+
+## network pseudo-devices
+pseudo-device	bpfilter		# Berkeley packet filter
+pseudo-device	loop			# network loopback
+
+## miscellaneous pseudo-devices
+pseudo-device	pty			# pseudo-terminals
+# userland interface to drivers, including autoconf and properties retrieval
+pseudo-device	drvctl
+
+file-system	PUFFS
+pseudo-device	putter

Reply via email to