On 13.01.23 03:57, Marek Marczykowski-Górecki wrote:
Hi,

6.1.3 as PV dom0 crashes when attempting to suspend. 6.1.1 works. The
crash:

     [  348.284004] PM: suspend entry (deep)
     [  348.289532] Filesystems sync: 0.005 seconds
     [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) 
done.
     [  348.292457] OOM killer disabled.
     [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 
seconds) done.
     [  348.396612] printk: Suspending console(s) (use no_console_suspend to 
debug)
     [  348.749228] PM: suspend devices took 0.352 seconds
     [  348.769713] ACPI: EC: interrupt blocked
     [  348.816077] BUG: kernel NULL pointer dereference, address: 
000000000000001c
     [  348.816080] #PF: supervisor read access in kernel mode
     [  348.816081] #PF: error_code(0x0000) - not-present page
     [  348.816083] PGD 0 P4D 0
     [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
     [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 
6.1.3-1.fc32.qubes.x86_64 #1
     [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 
07/03/2022
     [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20
     [  348.816100] Code: 44 00 00 48 8b 05 04 a3 82 02 c3 cc cc cc cc cc cc cc cc cc 
cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 48 8b 05 fc 9d 82 02 <8b> 40 1c c3 
cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 0f 1f
     [  348.816103] RSP: e02b:ffffc90042537d08 EFLAGS: 00010246
     [  348.816105] RAX: 0000000000000000 RBX: 0000000000000003 RCX: 
20c49ba5e353f7cf
     [  348.816106] RDX: 000000000000cd19 RSI: 000000000002ee9a RDI: 
002a051ed42d7694
     [  348.816108] RBP: 0000000000000003 R08: ffffc90042537ca0 R09: 
ffffffff82c5e468
     [  348.816110] R10: 0000000000007ff0 R11: 0000000000000000 R12: 
0000000000000000
     [  348.816111] R13: fffffffffffffff2 R14: ffff88812206e6c0 R15: 
ffff88812206e6e0
     [  348.816121] FS:  00007cb49b01eb80(0000) GS:ffff888189400000(0000) 
knlGS:0000000000000000
     [  348.816123] CS:  e030 DS: 0000 ES: 0000 CR0: 0000000080050033
     [  348.816124] CR2: 000000000000001c CR3: 000000012231a000 CR4: 
0000000000050660
     [  348.816131] Call Trace:
     [  348.816133]  <TASK>
     [  348.816134]  acpi_pm_prepare+0x1a/0x50
     [  348.816141]  suspend_enter+0x94/0x360
     [  348.816146]  suspend_devices_and_enter+0x198/0x2b0
     [  348.816150]  enter_state+0x18d/0x1f5
     [  348.816155]  pm_suspend.cold+0x20/0x6b
     [  348.816159]  state_store+0x27/0x60
     [  348.816163]  kernfs_fop_write_iter+0x125/0x1c0
     [  348.816169]  new_sync_write+0x105/0x190
     [  348.816176]  vfs_write+0x211/0x2a0
     [  348.816180]  ksys_write+0x67/0xe0
     [  348.816183]  do_syscall_64+0x59/0x90
     [  348.816188]  ? do_syscall_64+0x69/0x90
     [  348.816192]  ? exc_page_fault+0x76/0x170
     [  348.816195]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
     [  348.816200] RIP: 0033:0x7cb49c1412f7
     [  348.816203] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 
0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 
ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
     [  348.816204] RSP: 002b:00007ffc125f63f8 EFLAGS: 00000246 ORIG_RAX: 
0000000000000001
     [  348.816206] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 
00007cb49c1412f7
     [  348.816208] RDX: 0000000000000004 RSI: 00007ffc125f64e0 RDI: 
0000000000000004
     [  348.816209] RBP: 00007ffc125f64e0 R08: 00005c83d772bca0 R09: 
000000000000000d
     [  348.816210] R10: 00005c83d7727eb0 R11: 0000000000000246 R12: 
0000000000000004
     [  348.816211] R13: 00005c83d77272d0 R14: 0000000000000004 R15: 
00007cb49c213700
     [  348.816213]  </TASK>
     [  348.816214] Modules linked in: loop vfat fat snd_hda_codec_hdmi 
snd_sof_pci_intel_tgl snd_sof_intel_hda_common soundwire_intel 
soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda snd_sof_pci 
snd_sof_xtensa_dsp snd_sof snd_sof_utils snd_soc_hdac_hda snd_hda_ext_core 
snd_soc_acpi_intel_match snd_soc_acpi soundwire_bus snd_hda_codec_realtek 
snd_hda_codec_generic ledtrig_audio snd_soc_core snd_compress ac97_bus 
snd_pcm_dmaengine snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi iTCO_wdt 
intel_pmc_bxt ee1004 iTCO_vendor_support intel_rapl_msr snd_hda_codec 
snd_hda_core snd_hwdep snd_seq snd_seq_device iwlwifi snd_pcm pcspkr joydev 
processor_thermal_device_pci_legacy processor_thermal_device snd_timer snd 
cfg80211 processor_thermal_rfim i2c_i801 processor_thermal_mbox i2c_smbus 
idma64 rfkill processor_thermal_rapl soundcore intel_rapl_common 
int340x_thermal_zone intel_soc_dts_iosf igen6_edac intel_hid intel_pmc_core 
intel_scu_pltdrv sparse_keymap fuse xenfs ip_tables dm_thin_pool
     ic#2 Part1
     [  348.816259]  dm_persistent_data dm_bio_prison dm_crypt i915 
crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic 
drm_buddy nvme video wmi drm_display_helper nvme_core xhci_pci xhci_pci_renesas 
ghash_clmulni_intel hid_multitouch sha512_ssse3 serio_raw nvme_common cec 
xhci_hcd ttm i2c_hid_acpi i2c_hid pinctrl_tigerlake xen_acpi_processor 
xen_privcmd xen_pciback xen_blkback xen_gntalloc xen_gntdev xen_evtchn uinput
     [  348.816281] CR2: 000000000000001c
     [  348.816283] ---[ end trace 0000000000000000 ]---
     [  348.867991] RIP: e030:acpi_get_wakeup_address+0xc/0x20
     [  348.867996] Code: 44 00 00 48 8b 05 04 a3 82 02 c3 cc cc cc cc cc cc cc cc cc 
cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 48 8b 05 fc 9d 82 02 <8b> 40 1c c3 
cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 0f 1f
     [  348.867998] RSP: e02b:ffffc90042537d08 EFLAGS: 00010246
     [  348.867999] RAX: 0000000000000000 RBX: 0000000000000003 RCX: 
20c49ba5e353f7cf
     [  348.868000] RDX: 000000000000cd19 RSI: 000000000002ee9a RDI: 
002a051ed42d7694
     [  348.868001] RBP: 0000000000000003 R08: ffffc90042537ca0 R09: 
ffffffff82c5e468
     [  348.868001] R10: 0000000000007ff0 R11: 0000000000000000 R12: 
0000000000000000
     [  348.868002] R13: fffffffffffffff2 R14: ffff88812206e6c0 R15: 
ffff88812206e6e0
     [  348.868008] FS:  00007cb49b01eb80(0000) GS:ffff888189400000(0000) 
knlGS:0000000000000000
     [  348.868009] CS:  e030 DS: 0000 ES: 0000 CR0: 0000000080050033
     [  348.868009] CR2: 000000000000001c CR3: 000000012231a000 CR4: 
0000000000050660
     [  348.868014] Kernel panic - not syncing: Fatal exception
     [  348.868031] Kernel Offset: disabled

Looking at git log between those two versions, and the
acpi_get_wakeup_address() function, I suspect it's this change (but I
have _not_ tested it):

commit b1898793777fe10a31c160bb8bc385d6eea640c6
Author: Juergen Gross <[email protected]>
Date:   Wed Nov 23 12:45:23 2022 +0100

     x86/boot: Skip realmode init code when running as Xen PV guest
[ Upstream commit f1e525009493cbd569e7c8dd7d58157855f8658d ]

Yes, you are right.

Could you please test the attached patch? It is for upstream, but I think it
should apply to 6.1.3, too.


Juergen


From 40833b6701026a37243bda90bbd053c58963844d Mon Sep 17 00:00:00 2001
From: Juergen Gross <[email protected]>
To: [email protected]
To: [email protected]
To: [email protected]
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: "Rafael J. Wysocki" <[email protected]>
Cc: Len Brown <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Stefano Stabellini <[email protected]>
Cc: Oleksandr Tyshchenko <[email protected]>
Cc: [email protected]
Date: Fri, 13 Jan 2023 08:37:45 +0100
Subject: [PATCH] x86/acpi: fix suspend with Xen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit f1e525009493 ("x86/boot: Skip realmode init code when running as
Xen PV guest") missed one code path accessing real_mode_header, leading
to dereferencing NULL when suspending the system under Xen:

    [  348.284004] PM: suspend entry (deep)
    [  348.289532] Filesystems sync: 0.005 seconds
    [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) done.
    [  348.292457] OOM killer disabled.
    [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 seconds) done.
    [  348.396612] printk: Suspending console(s) (use no_console_suspend to debug)
    [  348.749228] PM: suspend devices took 0.352 seconds
    [  348.769713] ACPI: EC: interrupt blocked
    [  348.816077] BUG: kernel NULL pointer dereference, address: 000000000000001c
    [  348.816080] #PF: supervisor read access in kernel mode
    [  348.816081] #PF: error_code(0x0000) - not-present page
    [  348.816083] PGD 0 P4D 0
    [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
    [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 6.1.3-1.fc32.qubes.x86_64 #1
    [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 07/03/2022
    [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20

Fix that by adding an indirection for acpi_get_wakeup_address() which
Xen PV dom0 can use to return a dummy non-zero wakeup address (this
address won't ever be used, as the real suspend handling is done by the
hypervisor).

Fixes: f1e525009493 ("x86/boot: Skip realmode init code when running as Xen PV guest")
Reported-by: Marek Marczykowski-Górecki <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
 arch/x86/include/asm/acpi.h  | 2 +-
 arch/x86/kernel/acpi/sleep.c | 3 ++-
 include/xen/acpi.h           | 9 +++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 65064d9f7fa6..137259ff8f03 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -61,7 +61,7 @@ static inline void acpi_disable_pci(void)
 extern int (*acpi_suspend_lowlevel)(void);
 
 /* Physical address to resume after wakeup */
-unsigned long acpi_get_wakeup_address(void);
+extern unsigned long (*acpi_get_wakeup_address)(void);
 
 /*
  * Check if the CPU can handle C2 and deeper
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 3b7f4cdbf2e0..1a3cd5e24cd0 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -33,10 +33,11 @@ static char temp_stack[4096];
  * Returns the physical address where the kernel should be resumed after the
  * system awakes from S3, e.g. for programming into the firmware waking vector.
  */
-unsigned long acpi_get_wakeup_address(void)
+static unsigned long x86_acpi_get_wakeup_address(void)
 {
 	return ((unsigned long)(real_mode_header->wakeup_start));
 }
+unsigned long (*acpi_get_wakeup_address)(void) = x86_acpi_get_wakeup_address;
 
 /**
  * x86_acpi_enter_sleep_state - enter sleep state
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index b1e11863144d..7e1e5dbfb77c 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -56,6 +56,12 @@ static inline int xen_acpi_suspend_lowlevel(void)
 	return 0;
 }
 
+static inline unsigned long xen_acpi_get_wakeup_address(void)
+{
+	/* Just return a dummy non-zero value, it will never be used. */
+	return 1;
+}
+
 static inline void xen_acpi_sleep_register(void)
 {
 	if (xen_initial_domain()) {
@@ -65,6 +71,9 @@ static inline void xen_acpi_sleep_register(void)
 			&xen_acpi_notify_hypervisor_extended_sleep);
 
 		acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
+#ifdef CONFIG_ACPI_SLEEP
+		acpi_get_wakeup_address = xen_acpi_get_wakeup_address;
+#endif
 	}
 }
 #else
-- 
2.35.3

Attachment: OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key

Attachment: OpenPGP_signature
Description: OpenPGP digital signature

Reply via email to