Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package xen for openSUSE:Factory checked in at 2025-05-20 09:31:24 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/xen (Old) and /work/SRC/openSUSE:Factory/.xen.new.30101 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "xen" Tue May 20 09:31:24 2025 rev:364 rq:1277871 version:4.20.0_12 Changes: -------- --- /work/SRC/openSUSE:Factory/xen/xen.changes 2025-04-29 16:40:43.935076725 +0200 +++ /work/SRC/openSUSE:Factory/.xen.new.30101/xen.changes 2025-05-20 09:31:28.324652341 +0200 @@ -1,0 +2,30 @@ +Tue May 13 13:48:00 CEST 2025 - jbeul...@suse.com + +- bsc#1243117 - VUL-0: CVE-2024-28956: xen: Intel CPU: Indirect + Target Selection (ITS) (XSA-469) + 68221f20-x86-alternative-when-feature-not-present.patch + 68221f21-x86-guest-remove-Xen-hypercall_page.patch + 68221f22-x86-misalign-__x86_indirect_thunk.patch + 68221f23-x86-misalign-RETs-in-clear_bhb_loops.patch + 68221f24-x86-stubs-introduce-place_ret.patch + 68221f25-x86-build-with-Return-Thunks.patch + 68221f26-x86-spec-ctrl-synthesise-ITS_NO.patch + +------------------------------------------------------------------- +Mon Apr 28 12:55:00 CEST 2025 - jbeul...@suse.com + +- Upstream bug fixes (bsc#1027519) + 67dada68-x86-mm-IS_ALIGNED-in-IS_LnE_ALIGNED.patch + 67ea4268-x86-P2M-sync-fast-slow-p2m_get_page_from_gfn.patch + 67ea428e-percpu-dont-init-on-resume.patch + 67f8ecda-rangeset-incorrect-subtraction.patch + 6800b54f-x86-HVM-update-repeat-count-upon.patch + 68076044-x86emul-clip-rep-count-for-STOS.patch + 6808f549-x86-Intel-work-around-MONITOR-MWAIT-errata.patch + +------------------------------------------------------------------- +Mon Mar 31 14:37:13 MDT 2025 - carn...@suse.com + +- Drop build-python3-conversion.patch + +------------------------------------------------------------------- @@ -34,0 +65,43 @@ + +------------------------------------------------------------------- +Fri Feb 28 09:09:09 UTC 2025 - oher...@suse.de + +- refresh replace-obsolete-network-configuration-commands-in-s.patch + to not accidently enter untested brctl code paths +- bsc#985503 - vif-route.patch is obsolete since Xen 4.15 +- bsc#1035231 - remove SUSE specific changes for save/restore/migrate + to reduce future maintainence overhead. The bottleneck during + migration is the overhead of mapping HVM domU pages into dom0, + which was not addressed by these changes. + The options --abort_if_busy --max_iters --min_remaining will not + be recognized anymore by xl or virsh. + libxc-bitmap-long.patch + libxc-sr-xl-migration-debug.patch + libxc-sr-readv_exact.patch + libxc-sr-save-show_transfer_rate.patch + libxc-sr-save-mfns.patch + libxc-sr-save-types.patch + libxc-sr-save-errors.patch + libxc-sr-save-iov.patch + libxc-sr-save-rec_pfns.patch + libxc-sr-save-guest_data.patch + libxc-sr-save-local_pages.patch + libxc-sr-restore-pfns.patch + libxc-sr-restore-types.patch + libxc-sr-restore-mfns.patch + libxc-sr-restore-map_errs.patch + libxc-sr-restore-populate_pfns-pfns.patch + libxc-sr-restore-populate_pfns-mfns.patch + libxc-sr-restore-read_record.patch + libxc-sr-restore-handle_buffered_page_data.patch + libxc-sr-restore-handle_incoming_page_data.patch + libxc-sr-LIBXL_HAVE_DOMAIN_SUSPEND_PROPS.patch + libxc-sr-precopy_policy.patch + libxc-sr-max_iters.patch + libxc-sr-min_remaining.patch + libxc-sr-abort_if_busy.patch + libxc-sr-xg_sr_bitmap.patch + libxc-sr-xg_sr_bitmap-populated_pfns.patch + libxc-sr-restore-hvm-legacy-superpage.patch + libxc-sr-track-migration-time.patch + libxc-sr-number-of-iterations.patch Old: ---- build-python3-conversion.patch libxc-bitmap-long.patch libxc-sr-LIBXL_HAVE_DOMAIN_SUSPEND_PROPS.patch libxc-sr-abort_if_busy.patch libxc-sr-max_iters.patch libxc-sr-min_remaining.patch libxc-sr-number-of-iterations.patch libxc-sr-precopy_policy.patch libxc-sr-readv_exact.patch libxc-sr-restore-handle_buffered_page_data.patch libxc-sr-restore-handle_incoming_page_data.patch libxc-sr-restore-hvm-legacy-superpage.patch libxc-sr-restore-map_errs.patch libxc-sr-restore-mfns.patch libxc-sr-restore-pfns.patch libxc-sr-restore-populate_pfns-mfns.patch libxc-sr-restore-populate_pfns-pfns.patch libxc-sr-restore-read_record.patch libxc-sr-restore-types.patch libxc-sr-save-errors.patch libxc-sr-save-guest_data.patch libxc-sr-save-iov.patch libxc-sr-save-local_pages.patch libxc-sr-save-mfns.patch libxc-sr-save-rec_pfns.patch libxc-sr-save-show_transfer_rate.patch libxc-sr-save-types.patch libxc-sr-track-migration-time.patch libxc-sr-xg_sr_bitmap-populated_pfns.patch libxc-sr-xg_sr_bitmap.patch libxc-sr-xl-migration-debug.patch vif-route.patch New: ---- 67dada68-x86-mm-IS_ALIGNED-in-IS_LnE_ALIGNED.patch 67ea4268-x86-P2M-sync-fast-slow-p2m_get_page_from_gfn.patch 67ea428e-percpu-dont-init-on-resume.patch 67f8ecda-rangeset-incorrect-subtraction.patch 6800b54f-x86-HVM-update-repeat-count-upon.patch 68076044-x86emul-clip-rep-count-for-STOS.patch 6808f549-x86-Intel-work-around-MONITOR-MWAIT-errata.patch 68221f20-x86-alternative-when-feature-not-present.patch 68221f21-x86-guest-remove-Xen-hypercall_page.patch 68221f22-x86-misalign-__x86_indirect_thunk.patch 68221f23-x86-misalign-RETs-in-clear_bhb_loops.patch 68221f24-x86-stubs-introduce-place_ret.patch 68221f25-x86-build-with-Return-Thunks.patch 68221f26-x86-spec-ctrl-synthesise-ITS_NO.patch BETA DEBUG BEGIN: Old: - Drop build-python3-conversion.patch Old: be recognized anymore by xl or virsh. libxc-bitmap-long.patch libxc-sr-xl-migration-debug.patch Old: libxc-sr-restore-handle_incoming_page_data.patch libxc-sr-LIBXL_HAVE_DOMAIN_SUSPEND_PROPS.patch libxc-sr-precopy_policy.patch Old: libxc-sr-min_remaining.patch libxc-sr-abort_if_busy.patch libxc-sr-xg_sr_bitmap.patch Old: libxc-sr-precopy_policy.patch libxc-sr-max_iters.patch libxc-sr-min_remaining.patch Old: libxc-sr-max_iters.patch libxc-sr-min_remaining.patch libxc-sr-abort_if_busy.patch Old: libxc-sr-track-migration-time.patch libxc-sr-number-of-iterations.patch Old: libxc-sr-LIBXL_HAVE_DOMAIN_SUSPEND_PROPS.patch libxc-sr-precopy_policy.patch libxc-sr-max_iters.patch Old: libxc-sr-xl-migration-debug.patch libxc-sr-readv_exact.patch libxc-sr-save-show_transfer_rate.patch Old: libxc-sr-restore-read_record.patch libxc-sr-restore-handle_buffered_page_data.patch libxc-sr-restore-handle_incoming_page_data.patch Old: libxc-sr-restore-handle_buffered_page_data.patch libxc-sr-restore-handle_incoming_page_data.patch libxc-sr-LIBXL_HAVE_DOMAIN_SUSPEND_PROPS.patch Old: libxc-sr-xg_sr_bitmap-populated_pfns.patch libxc-sr-restore-hvm-legacy-superpage.patch libxc-sr-track-migration-time.patch Old: libxc-sr-restore-mfns.patch libxc-sr-restore-map_errs.patch libxc-sr-restore-populate_pfns-pfns.patch Old: libxc-sr-restore-types.patch libxc-sr-restore-mfns.patch libxc-sr-restore-map_errs.patch Old: libxc-sr-save-local_pages.patch libxc-sr-restore-pfns.patch libxc-sr-restore-types.patch Old: libxc-sr-restore-populate_pfns-pfns.patch libxc-sr-restore-populate_pfns-mfns.patch libxc-sr-restore-read_record.patch Old: libxc-sr-restore-map_errs.patch libxc-sr-restore-populate_pfns-pfns.patch libxc-sr-restore-populate_pfns-mfns.patch Old: libxc-sr-restore-populate_pfns-mfns.patch libxc-sr-restore-read_record.patch libxc-sr-restore-handle_buffered_page_data.patch Old: libxc-sr-restore-pfns.patch libxc-sr-restore-types.patch libxc-sr-restore-mfns.patch Old: libxc-sr-save-types.patch libxc-sr-save-errors.patch libxc-sr-save-iov.patch Old: libxc-sr-save-rec_pfns.patch libxc-sr-save-guest_data.patch libxc-sr-save-local_pages.patch Old: libxc-sr-save-errors.patch libxc-sr-save-iov.patch libxc-sr-save-rec_pfns.patch Old: libxc-sr-save-guest_data.patch libxc-sr-save-local_pages.patch libxc-sr-restore-pfns.patch Old: libxc-sr-save-show_transfer_rate.patch libxc-sr-save-mfns.patch libxc-sr-save-types.patch Old: libxc-sr-save-iov.patch libxc-sr-save-rec_pfns.patch libxc-sr-save-guest_data.patch Old: libxc-sr-readv_exact.patch libxc-sr-save-show_transfer_rate.patch libxc-sr-save-mfns.patch Old: libxc-sr-save-mfns.patch libxc-sr-save-types.patch libxc-sr-save-errors.patch Old: libxc-sr-restore-hvm-legacy-superpage.patch libxc-sr-track-migration-time.patch libxc-sr-number-of-iterations.patch Old: libxc-sr-xg_sr_bitmap.patch libxc-sr-xg_sr_bitmap-populated_pfns.patch libxc-sr-restore-hvm-legacy-superpage.patch Old: libxc-sr-abort_if_busy.patch libxc-sr-xg_sr_bitmap.patch libxc-sr-xg_sr_bitmap-populated_pfns.patch Old: libxc-bitmap-long.patch libxc-sr-xl-migration-debug.patch libxc-sr-readv_exact.patch Old: to not accidently enter untested brctl code paths - bsc#985503 - vif-route.patch is obsolete since Xen 4.15 - bsc#1035231 - remove SUSE specific changes for save/restore/migrate BETA DEBUG END: BETA DEBUG BEGIN: New:- Upstream bug fixes (bsc#1027519) 67dada68-x86-mm-IS_ALIGNED-in-IS_LnE_ALIGNED.patch 67ea4268-x86-P2M-sync-fast-slow-p2m_get_page_from_gfn.patch New: 67dada68-x86-mm-IS_ALIGNED-in-IS_LnE_ALIGNED.patch 67ea4268-x86-P2M-sync-fast-slow-p2m_get_page_from_gfn.patch 67ea428e-percpu-dont-init-on-resume.patch New: 67ea4268-x86-P2M-sync-fast-slow-p2m_get_page_from_gfn.patch 67ea428e-percpu-dont-init-on-resume.patch 67f8ecda-rangeset-incorrect-subtraction.patch New: 67ea428e-percpu-dont-init-on-resume.patch 67f8ecda-rangeset-incorrect-subtraction.patch 6800b54f-x86-HVM-update-repeat-count-upon.patch New: 67f8ecda-rangeset-incorrect-subtraction.patch 6800b54f-x86-HVM-update-repeat-count-upon.patch 68076044-x86emul-clip-rep-count-for-STOS.patch New: 6800b54f-x86-HVM-update-repeat-count-upon.patch 68076044-x86emul-clip-rep-count-for-STOS.patch 6808f549-x86-Intel-work-around-MONITOR-MWAIT-errata.patch New: 68076044-x86emul-clip-rep-count-for-STOS.patch 6808f549-x86-Intel-work-around-MONITOR-MWAIT-errata.patch New: Target Selection (ITS) (XSA-469) 68221f20-x86-alternative-when-feature-not-present.patch 68221f21-x86-guest-remove-Xen-hypercall_page.patch New: 68221f20-x86-alternative-when-feature-not-present.patch 68221f21-x86-guest-remove-Xen-hypercall_page.patch 68221f22-x86-misalign-__x86_indirect_thunk.patch New: 68221f21-x86-guest-remove-Xen-hypercall_page.patch 68221f22-x86-misalign-__x86_indirect_thunk.patch 68221f23-x86-misalign-RETs-in-clear_bhb_loops.patch New: 68221f22-x86-misalign-__x86_indirect_thunk.patch 68221f23-x86-misalign-RETs-in-clear_bhb_loops.patch 68221f24-x86-stubs-introduce-place_ret.patch New: 68221f23-x86-misalign-RETs-in-clear_bhb_loops.patch 68221f24-x86-stubs-introduce-place_ret.patch 68221f25-x86-build-with-Return-Thunks.patch New: 68221f24-x86-stubs-introduce-place_ret.patch 68221f25-x86-build-with-Return-Thunks.patch 68221f26-x86-spec-ctrl-synthesise-ITS_NO.patch New: 68221f25-x86-build-with-Return-Thunks.patch 68221f26-x86-spec-ctrl-synthesise-ITS_NO.patch BETA DEBUG END: ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ xen.spec ++++++ --- /var/tmp/diff_new_pack.io0R8x/_old 2025-05-20 09:31:30.372736609 +0200 +++ /var/tmp/diff_new_pack.io0R8x/_new 2025-05-20 09:31:30.372736609 +0200 @@ -125,7 +125,7 @@ BuildRequires: python-rpm-macros Provides: installhint(reboot-needed) -Version: 4.20.0_10 +Version: 4.20.0_12 Release: 0 Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) License: GPL-2.0-only @@ -170,38 +170,21 @@ Patch8: 67d17edd-x86-expose-MSR_FAM10H_MMIO_CONF_BASE-on-AMD.patch Patch9: 67d17ede-VT-x-PI-usage-of-msi_desc-msg-field.patch Patch10: 67d2a3fe-libxl-avoid-infinite-loop-in-libxl__remove_directory.patch +Patch11: 67dada68-x86-mm-IS_ALIGNED-in-IS_LnE_ALIGNED.patch +Patch12: 67ea4268-x86-P2M-sync-fast-slow-p2m_get_page_from_gfn.patch +Patch13: 67ea428e-percpu-dont-init-on-resume.patch +Patch14: 67f8ecda-rangeset-incorrect-subtraction.patch +Patch15: 6800b54f-x86-HVM-update-repeat-count-upon.patch +Patch16: 68076044-x86emul-clip-rep-count-for-STOS.patch +Patch17: 6808f549-x86-Intel-work-around-MONITOR-MWAIT-errata.patch +Patch18: 68221f20-x86-alternative-when-feature-not-present.patch +Patch19: 68221f21-x86-guest-remove-Xen-hypercall_page.patch +Patch20: 68221f22-x86-misalign-__x86_indirect_thunk.patch +Patch21: 68221f23-x86-misalign-RETs-in-clear_bhb_loops.patch +Patch22: 68221f24-x86-stubs-introduce-place_ret.patch +Patch23: 68221f25-x86-build-with-Return-Thunks.patch +Patch24: 68221f26-x86-spec-ctrl-synthesise-ITS_NO.patch # EMBARGOED security fixes -# libxc -Patch301: libxc-bitmap-long.patch -Patch302: libxc-sr-xl-migration-debug.patch -Patch303: libxc-sr-readv_exact.patch -Patch304: libxc-sr-save-show_transfer_rate.patch -Patch305: libxc-sr-save-mfns.patch -Patch306: libxc-sr-save-types.patch -Patch307: libxc-sr-save-errors.patch -Patch308: libxc-sr-save-iov.patch -Patch309: libxc-sr-save-rec_pfns.patch -Patch310: libxc-sr-save-guest_data.patch -Patch311: libxc-sr-save-local_pages.patch -Patch312: libxc-sr-restore-pfns.patch -Patch313: libxc-sr-restore-types.patch -Patch314: libxc-sr-restore-mfns.patch -Patch315: libxc-sr-restore-map_errs.patch -Patch316: libxc-sr-restore-populate_pfns-pfns.patch -Patch317: libxc-sr-restore-populate_pfns-mfns.patch -Patch318: libxc-sr-restore-read_record.patch -Patch319: libxc-sr-restore-handle_buffered_page_data.patch -Patch320: libxc-sr-restore-handle_incoming_page_data.patch -Patch321: libxc-sr-LIBXL_HAVE_DOMAIN_SUSPEND_PROPS.patch -Patch322: libxc-sr-precopy_policy.patch -Patch323: libxc-sr-max_iters.patch -Patch324: libxc-sr-min_remaining.patch -Patch325: libxc-sr-abort_if_busy.patch -Patch326: libxc-sr-xg_sr_bitmap.patch -Patch327: libxc-sr-xg_sr_bitmap-populated_pfns.patch -Patch328: libxc-sr-restore-hvm-legacy-superpage.patch -Patch329: libxc-sr-track-migration-time.patch -Patch330: libxc-sr-number-of-iterations.patch # Our platform specific patches Patch400: xen-destdir.patch Patch401: vif-bridge-no-iptables.patch @@ -214,7 +197,6 @@ Patch408: ignore-ip-command-script-errors.patch # Needs to go upstream Patch420: suspend_evtchn_lock.patch -Patch421: vif-route.patch # Other bug fixes or features Patch450: xen.sysconfig-fillup.patch Patch451: xenconsole-no-multiple-connections.patch @@ -228,8 +210,6 @@ Patch465: xenstore-run-in-studomain.patch Patch466: libxl.helper_done-crash.patch Patch467: libxl.LIBXL_HOTPLUG_TIMEOUT.patch -# python3 conversion patches -Patch500: build-python3-conversion.patch # Hypervisor and PV driver Patches Patch600: xen.bug1026236.suse_vtsc_tolerance.patch Patch601: x86-ioapic-ack-default.patch ++++++ 67dada68-x86-mm-IS_ALIGNED-in-IS_LnE_ALIGNED.patch ++++++ # Commit b07c7d63f9b587e4df5d71f6da9eaa433512c974 # Date 2025-03-19 14:53:28 +0000 # Author Andrew Cooper <andrew.coop...@citrix.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/mm: Fix IS_ALIGNED() check in IS_LnE_ALIGNED() The current CI failures turn out to be a latent bug triggered by a narrow set of properties of the initrd and the host memory map, which CI encountered by chance. One step during boot involves constructing directmap mappings for modules. With some probing at the point of creation, it is observed that there's a 4k mapping missing towards the end of the initrd. (XEN) === Mapped Mod1 [0000000394001000, 00000003be1ff6dc] to Directmap (XEN) Probing paddr 394001000, va ffff830394001000 (XEN) Probing paddr 3be1ff6db, va ffff8303be1ff6db (XEN) Probing paddr 3bdffffff, va ffff8303bdffffff (XEN) Probing paddr 3be001000, va ffff8303be001000 (XEN) Probing paddr 3be000000, va ffff8303be000000 (XEN) Early fatal page fault at e008:ffff82d04032014c (cr2=ffff8303be000000, ec=0000) The conditions for this bug appear to be map_pages_to_xen() call with a start address of exactly 4k beyond a 2M boundary, some number of full 2M pages, then a tail needing 4k pages. Anyway, the condition for spotting superpage boundaries in map_pages_to_xen() is wrong. The IS_ALIGNED() macro expects a power of two for the alignment argument, and subtracts 1 itself. Fixing this causes the failing case to now boot. Fixes: 97fb6fcf26e8 ("x86/mm: introduce helpers to detect super page alignment") Debugged-by: Marek Marczykowski-Górecki <marma...@invisiblethingslab.com> Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Tested-by: Marek Marczykowski-Górecki <marma...@invisiblethingslab.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -5502,7 +5502,7 @@ int map_pages_to_xen( \ ASSERT(!mfn_eq(m_, INVALID_MFN)); \ IS_ALIGNED(PFN_DOWN(v) | mfn_x(m_), \ - (1UL << (PAGETABLE_ORDER * ((n) - 1))) - 1); \ + 1UL << (PAGETABLE_ORDER * ((n) - 1))); \ }) #define IS_L2E_ALIGNED(v, m) IS_LnE_ALIGNED(v, m, 2) #define IS_L3E_ALIGNED(v, m) IS_LnE_ALIGNED(v, m, 3) ++++++ 67ea4268-x86-P2M-sync-fast-slow-p2m_get_page_from_gfn.patch ++++++ # Commit a8325f981ce4ff8ac8bcc73735f357846b0a0fbb # Date 2025-03-31 09:21:12 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> x86/P2M: synchronize fast and slow paths of p2m_get_page_from_gfn() Handling of both grants and foreign pages was different between the two paths. While permitting access to grants would be desirable, doing so would require more involved handling; undo that for the time being. In particular the page reference obtained would prevent the owning domain from changing e.g. the page's type (after the grantee has released the last reference of the grant). Instead perhaps another reference on the grant would need obtaining. Which in turn would require determining which grant that was. Foreign pages in any event need permitting on both paths. Introduce a helper function to be used on both paths, such that respective checking differs in just the extra "to be unshared" condition on the fast path. While there adjust the sanity check for foreign pages: Don't leak the reference on release builds when on a debug build the assertion would have triggered. (Thanks to Roger for the suggestion.) Fixes: 80ea7af17269 ("x86/mm: Introduce get_page_from_gfn()") Fixes: 50fe6e737059 ("pvh dom0: add and remove foreign pages") Fixes: cbbca7be4aaa ("x86/p2m: make p2m_get_page_from_gfn() handle grant case correctly") Signed-off-by: Jan Beulich <jbeul...@suse.com> Reviewed-by: Roger Pau Monné <roger....@citrix.com> --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -328,12 +328,46 @@ void p2m_put_gfn(struct p2m_domain *p2m, gfn_unlock(p2m, gfn_x(gfn), 0); } +static struct page_info *get_page_from_mfn_and_type( + const struct domain *d, mfn_t mfn, p2m_type_t t) +{ + struct page_info *page; + + if ( !mfn_valid(mfn) ) + return NULL; + + page = mfn_to_page(mfn); + + if ( p2m_is_ram(t) ) + { + if ( p2m_is_shared(t) ) + d = dom_cow; + + if ( get_page(page, d) ) + return page; + } + else if ( unlikely(p2m_is_foreign(t)) ) + { + const struct domain *fdom = page_get_owner_and_reference(page); + + if ( fdom ) + { + if ( likely(fdom != d) ) + return page; + ASSERT_UNREACHABLE(); + put_page(page); + } + } + + return NULL; +} + /* Atomically look up a GFN and take a reference count on the backing page. */ struct page_info *p2m_get_page_from_gfn( struct p2m_domain *p2m, gfn_t gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q) { - struct page_info *page = NULL; + struct page_info *page; p2m_access_t _a; p2m_type_t _t; mfn_t mfn; @@ -347,26 +381,9 @@ struct page_info *p2m_get_page_from_gfn( /* Fast path: look up and get out */ p2m_read_lock(p2m); mfn = p2m_get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0); - if ( p2m_is_any_ram(*t) && mfn_valid(mfn) - && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) ) - { - page = mfn_to_page(mfn); - if ( unlikely(p2m_is_foreign(*t)) || unlikely(p2m_is_grant(*t)) ) - { - struct domain *fdom = page_get_owner_and_reference(page); - - ASSERT(!p2m_is_foreign(*t) || fdom != p2m->domain); - if ( fdom == NULL ) - page = NULL; - } - else - { - struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow; - - if ( !get_page(page, d) ) - page = NULL; - } - } + page = !(q & P2M_UNSHARE) || !p2m_is_shared(*t) + ? get_page_from_mfn_and_type(p2m->domain, mfn, *t) + : NULL; p2m_read_unlock(p2m); if ( page ) @@ -380,14 +397,7 @@ struct page_info *p2m_get_page_from_gfn( /* Slow path: take the write lock and do fixups */ mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL); - if ( p2m_is_ram(*t) && mfn_valid(mfn) ) - { - struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow; - - page = mfn_to_page(mfn); - if ( !get_page(page, d) ) - page = NULL; - } + page = get_page_from_mfn_and_type(p2m->domain, mfn, *t); put_gfn(p2m->domain, gfn_x(gfn)); return page; ++++++ 67ea428e-percpu-dont-init-on-resume.patch ++++++ # Commit 282fa3fdb6a19c8ca56f79cdb9e6c1d1047d8e0a # Date 2025-03-31 09:21:50 +0200 # Author Mykyta Poturai <mykyta_potu...@epam.com> # Committer Jan Beulich <jbeul...@suse.com> xen/percpu: don't initialize percpu on resume Invocation of the CPU_UP_PREPARE notification on ARM64 during resume causes a crash: (XEN) [ 315.807606] Error bringing CPU1 up: -16 (XEN) [ 315.811926] Xen BUG at common/cpu.c:258 [...] (XEN) [ 316.142765] Xen call trace: (XEN) [ 316.146048] [<00000a0000202264>] enable_nonboot_cpus+0x128/0x1ac (PC) (XEN) [ 316.153219] [<00000a000020225c>] enable_nonboot_cpus+0x120/0x1ac (LR) (XEN) [ 316.160391] [<00000a0000278180>] suspend.c#system_suspend+0x4c/0x1a0 (XEN) [ 316.167476] [<00000a0000206b70>] domain.c#continue_hypercall_tasklet_handler+0x54/0xd0 (XEN) [ 316.176117] [<00000a0000226538>] tasklet.c#do_tasklet_work+0xb8/0x100 (XEN) [ 316.183288] [<00000a0000226920>] do_tasklet+0x68/0xb0 (XEN) [ 316.189077] [<00000a000026e120>] domain.c#idle_loop+0x7c/0x194 (XEN) [ 316.195644] [<00000a0000277638>] shutdown.c#halt_this_cpu+0/0x14 (XEN) [ 316.202383] [<0000000000000008>] 0000000000000008 Freeing per-CPU areas and setting __per_cpu_offset to INVALID_PERCPU_AREA only occur when !park_offline_cpus and system_state is not SYS_STATE_suspend. On ARM64, park_offline_cpus is always false, so setting __per_cpu_offset to INVALID_PERCPU_AREA depends solely on the system state. If the system is suspended, this area is not freed, and during resume, an error occurs in init_percpu_area, causing a crash because INVALID_PERCPU_AREA is not set and park_offline_cpus remains 0: if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) return park_offline_cpus ? 0 : -EBUSY; The same crash can occur on x86 if park_offline_cpus is set to 0 during Xen resume. Fixes: f75780d26b2f ("xen: move per-cpu area management into common code") Signed-off-by: Mykyta Poturai <mykyta_potu...@epam.com> Signed-off-by: Mykola Kvach <mykola_kv...@epam.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- a/xen/common/percpu.c +++ b/xen/common/percpu.c @@ -30,7 +30,9 @@ static int init_percpu_area(unsigned int char *p; if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) - return park_offline_cpus ? 0 : -EBUSY; + return park_offline_cpus || system_state == SYS_STATE_resume + ? 0 + : -EBUSY; if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) return -ENOMEM; ++++++ 67f8ecda-rangeset-incorrect-subtraction.patch ++++++ # Commit e118fc98e7ae652a188d227bd7ea22f132724150 # Date 2025-04-11 12:20:10 +0200 # Author Roger Pau Monne <roger....@citrix.com> # Committer Roger Pau Monne <roger....@citrix.com> xen/rangeset: fix incorrect subtraction Given the following rangset operation: { [0, 1], [4, 5] } - { [3, 4] } The current rangeset logic will output a rangeset: { [0, 2], [5, 5] } This is incorrect, and also has the undesirable property of being bogus in a way that the resulting rangeset is expanded. Fix this by making sure the bounds are correctly checked before modifying the previous range. Fixes: 484a058c4828 ('Add auto-destructing per-domain rangeset data structure...') Signed-off-by: Roger Pau Monné <roger....@citrix.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- a/xen/common/rangeset.c +++ b/xen/common/rangeset.c @@ -227,7 +227,8 @@ int rangeset_remove_range( if ( x->s < s ) { - x->e = s - 1; + if ( x->e >= s ) + x->e = s - 1; x = next_range(r, x); } ++++++ 6800b54f-x86-HVM-update-repeat-count-upon.patch ++++++ # Commit c07b16fd6e47782ebf1ee767cd07c1e2b4140f47 # Date 2025-04-17 10:01:19 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> x86/HVM: update repeat count upon nested lin->phys failure For the X86EMUL_EXCEPTION case the repeat count must be correctly propagated back. Since for the recursive invocation we use a local helper variable, its value needs copying to the caller's one. While there also correct the off-by-1 range in the comment ahead of the function (strictly speaking for the "DF set" case we'd need to put another, different range there as well). Fixes: 53f87c03b4ea ("x86emul: generalize exception handling for rep_* hooks") Reported-by: Manuel Andreas <manuel.andr...@tum.de> Signed-off-by: Jan Beulich <jbeul...@suse.com> Reviewed-by: Andrew Cooper <andrew.coop...@citrix.com> --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -825,7 +825,7 @@ static void hvmemul_unmap_linear_addr( /* * Convert addr from linear to physical form, valid over the range - * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to + * [addr, addr + *reps * bytes_per_rep). *reps is adjusted according to * the valid computed range. It is always >0 when X86EMUL_OKAY is returned. * @pfec indicates the access checks to be performed during page-table walks. */ @@ -865,7 +865,10 @@ static int hvmemul_linear_to_phys( int rc = hvmemul_linear_to_phys( addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) + { + *reps = one_rep; return rc; + } pfn = _paddr >> PAGE_SHIFT; } else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == gfn_x(INVALID_GFN) ) ++++++ 68076044-x86emul-clip-rep-count-for-STOS.patch ++++++ # Commit 8c5636b6c87777e6c2e4ffae28bffe1cfc189bfd # Date 2025-04-22 11:24:20 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> x86emul: also clip repetition count for STOS Like MOVS, INS, and OUTS, STOS also has a special purpose hook, where the hook function may legitimately have the same expectation as to the request not straddling address space start/end. Fixes: 5dfe4aa4eeb6 ("x86_emulate: Do not request emulation of REP instructions beyond the") Reported-by: Fabian Specht <f.spe...@tum.de> Signed-off-by: Jan Beulich <jbeul...@suse.com> Acked-by: Andrew Cooper <andrew.coop...@citrix.com> --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -2221,7 +2221,7 @@ x86_emulate( dst.bytes = src.bytes; dst.mem.seg = x86_seg_es; - dst.mem.off = truncate_ea(_regs.r(di)); + dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes); if ( (nr_reps == 1) || !ops->rep_stos || ((rc = ops->rep_stos(&src.val, dst.mem.seg, dst.mem.off, dst.bytes, ++++++ 6808f549-x86-Intel-work-around-MONITOR-MWAIT-errata.patch ++++++ # Commit 4aae4452efeee3d3bba092b875e37d1e7c8f6db9 # Date 2025-04-23 16:12:25 +0200 # Author Roger Pau Monne <roger....@citrix.com> # Committer Roger Pau Monne <roger....@citrix.com> x86/intel: workaround several MONITOR/MWAIT errata There are several errata on Intel regarding the usage of the MONITOR/MWAIT instructions, all having in common that stores to the monitored region might not wake up the CPU. Fix them by forcing the sending of an IPI for the affected models. The Ice Lake issue has been reproduced internally on XenServer hardware, and the fix does seem to prevent it. The symptom was APs getting stuck in the idle loop immediately after bring up, which in turn prevented the BSP from making progress. This would happen before the watchdog was initialized, and hence the whole system would get stuck. Signed-off-by: Roger Pau Monné <roger....@citrix.com> Acked-by: Jan Beulich <jbeul...@suse.com> Acked-by: Andrew Cooper <andrew.coop...@citrix.com> --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -441,8 +441,14 @@ void cpuidle_wakeup_mwait(cpumask_t *mas cpumask_andnot(mask, mask, &target); } +/* Force sending of a wakeup IPI regardless of mwait usage. */ +bool __ro_after_init force_mwait_ipi_wakeup; + bool arch_skip_send_event_check(unsigned int cpu) { + if ( force_mwait_ipi_wakeup ) + return false; + /* * This relies on softirq_pending() and mwait_wakeup() to access data * on the same cache line. --- a/xen/arch/x86/cpu/intel.c +++ b/xen/arch/x86/cpu/intel.c @@ -8,6 +8,7 @@ #include <asm/intel-family.h> #include <asm/processor.h> #include <asm/msr.h> +#include <asm/mwait.h> #include <asm/uaccess.h> #include <asm/mpspec.h> #include <asm/apic.h> @@ -368,7 +369,6 @@ static void probe_c3_errata(const struct INTEL_FAM6_MODEL(0x25), { } }; -#undef INTEL_FAM6_MODEL /* Serialized by the AP bringup code. */ if ( max_cstate > 1 && (c->apicid & (c->x86_num_siblings - 1)) && @@ -381,6 +381,38 @@ static void probe_c3_errata(const struct } /* + * APL30: One use of the MONITOR/MWAIT instruction pair is to allow a logical + * processor to wait in a sleep state until a store to the armed address range + * occurs. Due to this erratum, stores to the armed address range may not + * trigger MWAIT to resume execution. + * + * ICX143: Under complex microarchitectural conditions, a monitor that is armed + * with the MWAIT instruction may not be triggered, leading to a processor + * hang. + * + * LNL030: Problem P-cores may not exit power state Core C6 on monitor hit. + * + * Force the sending of an IPI in those cases. + */ +static void __init probe_mwait_errata(void) +{ + static const struct x86_cpu_id __initconst models[] = { + INTEL_FAM6_MODEL(INTEL_FAM6_ATOM_GOLDMONT), /* APL30 */ + INTEL_FAM6_MODEL(INTEL_FAM6_ICELAKE_X), /* ICX143 */ + INTEL_FAM6_MODEL(INTEL_FAM6_LUNARLAKE_M), /* LNL030 */ + { } + }; +#undef INTEL_FAM6_MODEL + + if ( boot_cpu_has(X86_FEATURE_MONITOR) && x86_match_cpu(models) ) + { + printk(XENLOG_WARNING + "Forcing IPI MWAIT wakeup due to CPU erratum\n"); + force_mwait_ipi_wakeup = true; + } +} + +/* * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. * @@ -406,6 +438,8 @@ static void Intel_errata_workarounds(str __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability); probe_c3_errata(c); + if (system_state < SYS_STATE_smp_boot) + probe_mwait_errata(); } --- a/xen/arch/x86/include/asm/mwait.h +++ b/xen/arch/x86/include/asm/mwait.h @@ -13,6 +13,9 @@ #define MWAIT_ECX_INTERRUPT_BREAK 0x1 +/* Force sending of a wakeup IPI regardless of mwait usage. */ +extern bool force_mwait_ipi_wakeup; + void mwait_idle_with_hints(unsigned int eax, unsigned int ecx); #ifdef CONFIG_INTEL bool mwait_pc10_supported(void); ++++++ 68221f20-x86-alternative-when-feature-not-present.patch ++++++ # Commit 328ed39c59e0af06d594f5e64a52b57aa0b02340 # Date 2025-05-12 17:17:38 +0100 # Author Andrew Cooper <andrew.coop...@citrix.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/alternative: Support replacements when a feature is not present Use the top bit of a->cpuid to express inverted polarity. This requires stripping the top bit back out when performing the sanity checks. Despite only being used once, create a replace boolean to express the decision more clearly in _apply_alternatives(). Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -197,6 +197,8 @@ static int init_or_livepatch _apply_alte uint8_t *repl = ALT_REPL_PTR(a); uint8_t buf[MAX_PATCH_LEN]; unsigned int total_len = a->orig_len + a->pad_len; + unsigned int feat = a->cpuid & ~ALT_FLAG_NOT; + bool inv = a->cpuid & ALT_FLAG_NOT, replace; if ( a->repl_len > total_len ) { @@ -214,11 +216,11 @@ static int init_or_livepatch _apply_alte return -ENOSPC; } - if ( a->cpuid >= NCAPINTS * 32 ) + if ( feat >= NCAPINTS * 32 ) { printk(XENLOG_ERR "Alt for %ps, feature %#x outside of featureset range %#x\n", - ALT_ORIG_PTR(a), a->cpuid, NCAPINTS * 32); + ALT_ORIG_PTR(a), feat, NCAPINTS * 32); return -ERANGE; } @@ -243,8 +245,14 @@ static int init_or_livepatch _apply_alte continue; } + /* + * Should a replacement be performed? Most replacements have positive + * polarity, but we support negative polarity too. + */ + replace = boot_cpu_has(feat) ^ inv; + /* If there is no replacement to make, see about optimising the nops. */ - if ( !boot_cpu_has(a->cpuid) ) + if ( !replace ) { /* Origin site site already touched? Don't nop anything. */ if ( base->priv ) --- a/xen/arch/x86/include/asm/alternative-asm.h +++ b/xen/arch/x86/include/asm/alternative-asm.h @@ -12,7 +12,7 @@ * instruction. See apply_alternatives(). */ .macro altinstruction_entry orig, repl, feature, orig_len, repl_len, pad_len - .if \feature >= NCAPINTS * 32 + .if ((\feature) & ~ALT_FLAG_NOT) >= NCAPINTS * 32 .error "alternative feature outside of featureset range" .endif .long \orig - . --- a/xen/arch/x86/include/asm/alternative.h +++ b/xen/arch/x86/include/asm/alternative.h @@ -1,6 +1,13 @@ #ifndef __X86_ALTERNATIVE_H__ #define __X86_ALTERNATIVE_H__ +/* + * Common to both C and ASM. Express a replacement when a feature is not + * available. + */ +#define ALT_FLAG_NOT (1 << 15) +#define ALT_NOT(x) (ALT_FLAG_NOT | (x)) + #ifdef __ASSEMBLY__ #include <asm/alternative-asm.h> #else @@ -12,7 +19,7 @@ struct __packed alt_instr { int32_t orig_offset; /* original instruction */ int32_t repl_offset; /* offset to replacement instruction */ - uint16_t cpuid; /* cpuid bit set for replacement */ + uint16_t cpuid; /* cpuid bit set for replacement (top bit is polarity) */ uint8_t orig_len; /* length of original instruction */ uint8_t repl_len; /* length of new instruction */ uint8_t pad_len; /* length of build-time padding */ @@ -60,7 +67,7 @@ extern void alternative_branches(void); alt_repl_len(n2)) "-" alt_orig_len) #define ALTINSTR_ENTRY(feature, num) \ - " .if " STR(feature) " >= " STR(NCAPINTS * 32) "\n" \ + " .if (" STR(feature & ~ALT_FLAG_NOT) ") >= " STR(NCAPINTS * 32) "\n" \ " .error \"alternative feature outside of featureset range\"\n" \ " .endif\n" \ " .long .LXEN%=_orig_s - .\n" /* label */ \ ++++++ 68221f21-x86-guest-remove-Xen-hypercall_page.patch ++++++ # Commit ef30ffe0a0f79313c00720793c475c45a9e490ff # Date 2025-05-12 17:17:38 +0100 # Author Andrew Cooper <andrew.coop...@citrix.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/guest: Remove use of the Xen hypercall_page In order to protect against ITS, Xen needs to start using return thunks. Therefore the advice in XSA-466 becomes relevant, and the hypercall_page needs to be removed. Implement early_hypercall(), with infrastructure to figure out the correct instruction on first use. Use ALTERNATIVE()s to result in inline hypercalls, including the ALT_NOT() form so we only need a single synthetic feature bit. No overall change. This is part of XSA-469 / CVE-2024-28956 Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Roger Pau Monné <roger....@citrix.com> --- a/xen/arch/x86/guest/xen/Makefile +++ b/xen/arch/x86/guest/xen/Makefile @@ -1,4 +1,4 @@ -obj-y += hypercall_page.o +obj-bin-y += hypercall.init.o obj-y += xen.o obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o --- /dev/null +++ b/xen/arch/x86/guest/xen/hypercall.S @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <xen/linkage.h> + + .section .init.text, "ax", @progbits + + /* + * Used during early boot, before alternatives have run and inlined + * the appropriate instruction. Called using the hypercall ABI. + */ +FUNC(early_hypercall) + cmpb $0, early_hypercall_insn(%rip) + jl .L_setup + je 1f + + vmmcall + ret + +1: vmcall + ret + +.L_setup: + /* + * When setting up the first time around, all registers need + * preserving. Save the non-callee-saved ones. + */ + push %r11 + push %r10 + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + call early_hypercall_setup + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + + jmp early_hypercall +END(early_hypercall) --- a/xen/arch/x86/guest/xen/hypercall_page.S +++ /dev/null @@ -1,76 +0,0 @@ -#include <asm/page.h> -#include <asm/asm_defns.h> -#include <public/xen.h> - - .section ".text.page_aligned", "ax", @progbits - -DATA(hypercall_page, PAGE_SIZE) - /* Poisoned with `ret` for safety before hypercalls are set up. */ - .fill PAGE_SIZE, 1, 0xc3 -END(hypercall_page) - -/* - * Identify a specific hypercall in the hypercall page - * @param name Hypercall name. - */ -#define DECLARE_HYPERCALL(name) \ - .globl HYPERCALL_ ## name; \ - .type HYPERCALL_ ## name, STT_FUNC; \ - .size HYPERCALL_ ## name, 32; \ - .set HYPERCALL_ ## name, hypercall_page + __HYPERVISOR_ ## name * 32 - -DECLARE_HYPERCALL(set_trap_table) -DECLARE_HYPERCALL(mmu_update) -DECLARE_HYPERCALL(set_gdt) -DECLARE_HYPERCALL(stack_switch) -DECLARE_HYPERCALL(set_callbacks) -DECLARE_HYPERCALL(fpu_taskswitch) -DECLARE_HYPERCALL(sched_op_compat) -DECLARE_HYPERCALL(platform_op) -DECLARE_HYPERCALL(set_debugreg) -DECLARE_HYPERCALL(get_debugreg) -DECLARE_HYPERCALL(update_descriptor) -DECLARE_HYPERCALL(memory_op) -DECLARE_HYPERCALL(multicall) -DECLARE_HYPERCALL(update_va_mapping) -DECLARE_HYPERCALL(set_timer_op) -DECLARE_HYPERCALL(event_channel_op_compat) -DECLARE_HYPERCALL(xen_version) -DECLARE_HYPERCALL(console_io) -DECLARE_HYPERCALL(physdev_op_compat) -DECLARE_HYPERCALL(grant_table_op) -DECLARE_HYPERCALL(vm_assist) -DECLARE_HYPERCALL(update_va_mapping_otherdomain) -DECLARE_HYPERCALL(iret) -DECLARE_HYPERCALL(vcpu_op) -DECLARE_HYPERCALL(set_segment_base) -DECLARE_HYPERCALL(mmuext_op) -DECLARE_HYPERCALL(xsm_op) -DECLARE_HYPERCALL(nmi_op) -DECLARE_HYPERCALL(sched_op) -DECLARE_HYPERCALL(callback_op) -DECLARE_HYPERCALL(xenoprof_op) -DECLARE_HYPERCALL(event_channel_op) -DECLARE_HYPERCALL(physdev_op) -DECLARE_HYPERCALL(hvm_op) -DECLARE_HYPERCALL(sysctl) -DECLARE_HYPERCALL(domctl) -DECLARE_HYPERCALL(kexec_op) -DECLARE_HYPERCALL(argo_op) -DECLARE_HYPERCALL(xenpmu_op) - -DECLARE_HYPERCALL(arch_0) -DECLARE_HYPERCALL(arch_1) -DECLARE_HYPERCALL(arch_2) -DECLARE_HYPERCALL(arch_3) -DECLARE_HYPERCALL(arch_4) -DECLARE_HYPERCALL(arch_5) -DECLARE_HYPERCALL(arch_6) -DECLARE_HYPERCALL(arch_7) - -/* - * Local variables: - * tab-width: 8 - * indent-tabs-mode: nil - * End: - */ --- a/xen/arch/x86/guest/xen/xen.c +++ b/xen/arch/x86/guest/xen/xen.c @@ -26,7 +26,6 @@ bool __read_mostly xen_guest; uint32_t __read_mostly xen_cpuid_base; -extern char hypercall_page[]; static struct rangeset *mem; DEFINE_PER_CPU(unsigned int, vcpu_id); @@ -35,6 +34,50 @@ static struct vcpu_info *vcpu_info; static unsigned long vcpu_info_mapped[BITS_TO_LONGS(NR_CPUS)]; DEFINE_PER_CPU(struct vcpu_info *, vcpu_info); +/* + * Which instruction to use for early hypercalls: + * < 0 setup + * 0 vmcall + * > 0 vmmcall + */ +int8_t __initdata early_hypercall_insn = -1; + +/* + * Called once during the first hypercall to figure out which instruction to + * use. Error handling options are limited. + */ +void asmlinkage __init early_hypercall_setup(void) +{ + BUG_ON(early_hypercall_insn != -1); + + if ( !boot_cpu_data.x86_vendor ) + { + unsigned int eax, ebx, ecx, edx; + + cpuid(0, &eax, &ebx, &ecx, &edx); + + boot_cpu_data.x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); + } + + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_SHANGHAI: + early_hypercall_insn = 0; + setup_force_cpu_cap(X86_FEATURE_USE_VMCALL); + break; + + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + early_hypercall_insn = 1; + break; + + default: + BUG(); + } +} + static void __init find_xen_leaves(void) { uint32_t eax, ebx, ecx, edx, base; @@ -337,9 +380,6 @@ const struct hypervisor_ops *__init xg_p if ( !xen_cpuid_base ) return NULL; - /* Fill the hypercall page. */ - wrmsrl(cpuid_ebx(xen_cpuid_base + 2), __pa(hypercall_page)); - xen_guest = true; return &ops; --- a/xen/arch/x86/include/asm/cpufeatures.h +++ b/xen/arch/x86/include/asm/cpufeatures.h @@ -42,6 +42,7 @@ XEN_CPUFEATURE(XEN_SHSTK, X86_SY XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */ XEN_CPUFEATURE(IBPB_ENTRY_PV, X86_SYNTH(28)) /* MSR_PRED_CMD used by Xen for PV */ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for HVM */ +XEN_CPUFEATURE(USE_VMCALL, X86_SYNTH(30)) /* Use VMCALL instead of VMMCALL */ /* Bug words follow the synthetic words. */ #define X86_NR_BUG 1 --- a/xen/arch/x86/include/asm/guest/xen-hcall.h +++ b/xen/arch/x86/include/asm/guest/xen-hcall.h @@ -30,9 +30,11 @@ ({ \ long res, tmp__; \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__) ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)) \ : "memory" ); \ (type)res; \ @@ -42,10 +44,12 @@ ({ \ long res, tmp__; \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__), "=S" (tmp__) \ ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)), "2" ((long)(a2)) \ : "memory" ); \ (type)res; \ @@ -55,10 +59,12 @@ ({ \ long res, tmp__; \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__), "=S" (tmp__), "=d" (tmp__) \ ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)), "2" ((long)(a2)), "3" ((long)(a3)) \ : "memory" ); \ (type)res; \ @@ -69,10 +75,12 @@ long res, tmp__; \ register long _a4 asm ("r10") = ((long)(a4)); \ asm volatile ( \ - "call hypercall_page + %c[offset]" \ + ALTERNATIVE_2("call early_hypercall", \ + "vmmcall", ALT_NOT(X86_FEATURE_USE_VMCALL), \ + "vmcall", X86_FEATURE_USE_VMCALL) \ : "=a" (res), "=D" (tmp__), "=S" (tmp__), "=d" (tmp__), \ "=&r" (tmp__) ASM_CALL_CONSTRAINT \ - : [offset] "i" (hcall * 32), \ + : "0" (hcall), \ "1" ((long)(a1)), "2" ((long)(a2)), "3" ((long)(a3)), \ "4" (_a4) \ : "memory" ); \ ++++++ 68221f22-x86-misalign-__x86_indirect_thunk.patch ++++++ # Commit d293cc9da9021a51915e058acd1f05e83a462aa9 # Date 2025-05-12 17:17:38 +0100 # Author Jan Beulich <jbeul...@suse.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/thunk: (Mis)align __x86_indirect_thunk_* to mitigate ITS The Indirect Target Selection speculative vulnerability means that indirect branches (including RETs) are unsafe when in the first half of a cacheline. Arrange for __x86_indirect_thunk_* to always be in the second half. This is part of XSA-469 / CVE-2024-28956 Signed-off-by: Jan Beulich <jbeul...@suse.com> Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- a/xen/arch/x86/indirect-thunk.S +++ b/xen/arch/x86/indirect-thunk.S @@ -11,6 +11,10 @@ #include <asm/asm_defns.h> +/* Alignment is dealt with explicitly here; override the respective macro. */ +#undef SYM_ALIGN +#define SYM_ALIGN(align...) + .macro IND_THUNK_RETPOLINE reg:req call 1f int3 @@ -35,6 +39,16 @@ .macro GEN_INDIRECT_THUNK reg:req .section .text.__x86_indirect_thunk_\reg, "ax", @progbits + /* + * The Indirect Target Selection speculative vulnerability means that + * indirect branches (including RETs) are unsafe when in the first + * half of a cacheline. Arrange for them to be in the second half. + * + * Align to 64, then skip 32. + */ + .balign 64 + .fill 32, 1, 0xcc + FUNC(__x86_indirect_thunk_\reg) ALTERNATIVE_2 __stringify(IND_THUNK_RETPOLINE \reg), \ __stringify(IND_THUNK_LFENCE \reg), X86_FEATURE_IND_THUNK_LFENCE, \ ++++++ 68221f23-x86-misalign-RETs-in-clear_bhb_loops.patch ++++++ # Commit c0db07031a41ff892f57756b7e2b4ef98df72588 # Date 2025-05-12 17:17:38 +0100 # Author Andrew Cooper <andrew.coop...@citrix.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/thunk: (Mis)align the RETs in clear_bhb_loops() to mitigate ITS The Indirect Target Selection speculative vulnerability means that indirect branches (including RETs) are unsafe when in the first half of a cacheline. clear_bhb_loops() has a precise layout of branches. The alignment for performance cause the RETs to always be in an unsafe position, and converting those to return thunks changes the branching pattern. While such a conversion is believed to be safe, clear_bhb_loops() is also a performance-relevant fastpath, so (mis)align the RETs to be in a safe position. No functional change. This is part of XSA-469 / CVE-2024-28956 Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Roger Pau Monné <roger....@citrix.com> --- a/xen/arch/x86/bhb-thunk.S +++ b/xen/arch/x86/bhb-thunk.S @@ -50,7 +50,12 @@ END(clear_bhb_tsx) * ret * * The CALL/RETs are necessary to prevent the Loop Stream Detector from - * interfering. The alignment is for performance and not safety. + * interfering. + * + * The .balign's are for performance, but they cause the RETs to be in unsafe + * positions with respect to Indirect Target Selection. The .skips are to + * move the RETs into ITS-safe positions, rather than using the slowpath + * through __x86_return_thunk. * * The "short" sequence (5 and 5) is for CPUs prior to Alder Lake / Sapphire * Rapids (i.e. Cores prior to Golden Cove and/or Gracemont). @@ -66,12 +71,14 @@ FUNC(clear_bhb_loops) jmp 5f int3 - .align 64 + .balign 64 + .skip 32 - (.Lr1 - 1f), 0xcc 1: call 2f - ret +.Lr1: ret int3 - .align 64 + .balign 64 + .skip 32 - 18 /* (.Lr2 - 2f) but Clang IAS doesn't like this */, 0xcc 2: ALTERNATIVE "mov $5, %eax", "mov $7, %eax", X86_SPEC_BHB_LOOPS_LONG 3: jmp 4f @@ -83,7 +90,7 @@ FUNC(clear_bhb_loops) sub $1, %ecx jnz 1b - ret +.Lr2: ret 5: /* * The Intel sequence has an LFENCE here. The purpose is to ensure ++++++ 68221f24-x86-stubs-introduce-place_ret.patch ++++++ # Commit 2eb1132f796386e4524fb25dd0ed349e14ca35dd # Date 2025-05-12 17:17:38 +0100 # Author Andrew Cooper <andrew.coop...@citrix.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/stubs: Introduce place_ret() to abstract away raw 0xc3's The Indirect Target Selection speculative vulnerability means that indirect branches (including RETs) are unsafe when in the first half of a cacheline. This means it's not safe for logic using the stubs to write raw 0xc3's. Introduce place_ret() which, for now, writes a raw 0xc3 but will contain additional logic when return thunks are in use. stub_selftest() doesn't strictly need to be converted as they only run on boot, but doing so gets us a partial test of place_ret() too. No functional change. This is part of XSA-469 / CVE-2024-28956 Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Roger Pau Monné <roger....@citrix.com> --- a/tools/tests/x86_emulator/x86-emulate.h +++ b/tools/tests/x86_emulator/x86-emulate.h @@ -77,6 +77,12 @@ #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63)) +static inline void *place_ret(void *ptr) +{ + *(uint8_t *)ptr = 0xc3; + return ptr + 1; +} + extern uint32_t mxcsr_mask; extern struct cpu_policy cpu_policy; --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -11,9 +11,7 @@ obj-$(CONFIG_PV) += pv/ obj-y += x86_64/ obj-y += x86_emulate/ -alternative-y := alternative.init.o -alternative-$(CONFIG_LIVEPATCH) := -obj-bin-y += $(alternative-y) +obj-y += alternative.o obj-y += apic.o obj-y += bhb-thunk.o obj-y += bitops.o @@ -41,7 +39,7 @@ obj-y += hypercall.o obj-y += i387.o obj-y += i8259.o obj-y += io_apic.o -obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o +obj-$(CONFIG_LIVEPATCH) += livepatch.o obj-y += msi.o obj-y += msr.o obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -138,6 +138,20 @@ void init_or_livepatch add_nops(void *in } /* + * Place a return at @ptr. @ptr must be in the writable alias of a stub. + * + * Returns the next position to write into the stub. + */ +void *place_ret(void *ptr) +{ + uint8_t *p = ptr; + + *p++ = 0xc3; + + return p; +} + +/* * text_poke - Update instructions on a live kernel or non-executed code. * @addr: address to modify * @opcode: source of the copy --- a/xen/arch/x86/extable.c +++ b/xen/arch/x86/extable.c @@ -151,20 +151,20 @@ search_exception_table(const struct cpu_ int __init cf_check stub_selftest(void) { static const struct { - uint8_t opc[8]; + uint8_t opc[7]; uint64_t rax; union stub_exception_token res; } tests[] __initconst = { #define endbr64 0xf3, 0x0f, 0x1e, 0xfa - { .opc = { endbr64, 0x0f, 0xb9, 0xc3, 0xc3 }, /* ud1 */ + { .opc = { endbr64, 0x0f, 0xb9, 0x90 }, /* ud1 */ .res.fields.trapnr = X86_EXC_UD }, - { .opc = { endbr64, 0x90, 0x02, 0x00, 0xc3 }, /* nop; add (%rax),%al */ + { .opc = { endbr64, 0x90, 0x02, 0x00 }, /* nop; add (%rax),%al */ .rax = 0x0123456789abcdef, .res.fields.trapnr = X86_EXC_GP }, - { .opc = { endbr64, 0x02, 0x04, 0x04, 0xc3 }, /* add (%rsp,%rax),%al */ + { .opc = { endbr64, 0x02, 0x04, 0x04 }, /* add (%rsp,%rax),%al */ .rax = 0xfedcba9876543210UL, .res.fields.trapnr = X86_EXC_SS }, - { .opc = { endbr64, 0xcc, 0xc3, 0xc3, 0xc3 }, /* int3 */ + { .opc = { endbr64, 0xcc, 0x90, 0x90 }, /* int3 */ .res.fields.trapnr = X86_EXC_BP }, #undef endbr64 }; @@ -183,6 +183,7 @@ int __init cf_check stub_selftest(void) memset(ptr, 0xcc, STUB_BUF_SIZE / 2); memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc)); + place_ret(ptr + ARRAY_SIZE(tests[i].opc)); unmap_domain_page(ptr); asm volatile ( "INDIRECT_CALL %[stb]\n" --- a/xen/arch/x86/include/asm/alternative.h +++ b/xen/arch/x86/include/asm/alternative.h @@ -31,6 +31,8 @@ struct __packed alt_instr { #define ALT_REPL_PTR(a) __ALT_PTR(a, repl_offset) extern void add_nops(void *insns, unsigned int len); +void *place_ret(void *ptr); + /* Similar to alternative_instructions except it can be run with IRQs enabled. */ extern int apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void alternative_instructions(void); --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -76,7 +76,6 @@ static io_emul_stub_t *io_emul_stub_setu 0x41, 0x5c, /* pop %r12 */ 0x5d, /* pop %rbp */ 0x5b, /* pop %rbx */ - 0xc3, /* ret */ }; const struct stubs *this_stubs = &this_cpu(stubs); @@ -126,11 +125,13 @@ static io_emul_stub_t *io_emul_stub_setu APPEND_CALL(save_guest_gprs); APPEND_BUFF(epilogue); + p = place_ret(p); /* Build-time best effort attempt to catch problems. */ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < (sizeof(prologue) + sizeof(epilogue) + 10 /* 2x call */ + - MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES))); + MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES) + + 1 /* ret */)); /* Runtime confirmation that we haven't clobbered an adjacent stub. */ BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub)); --- a/xen/arch/x86/x86_emulate/fpu.c +++ b/xen/arch/x86/x86_emulate/fpu.c @@ -32,36 +32,42 @@ static inline bool fpu_check_write(void) #define emulate_fpu_insn_memdst(opc, ext, arg) \ do { \ + void *_p = get_stub(stub); \ /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \ *insn_bytes = 2; \ - memcpy(get_stub(stub), \ - ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \ + memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \ + place_ret(_p); \ invoke_stub("", "", "+m" (arg) : "a" (&(arg))); \ put_stub(stub); \ } while (0) #define emulate_fpu_insn_memsrc(opc, ext, arg) \ do { \ + void *_p = get_stub(stub); \ /* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \ - memcpy(get_stub(stub), \ - ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \ + memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \ + place_ret(_p); \ invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg))); \ put_stub(stub); \ } while (0) #define emulate_fpu_insn_stub(bytes...) \ do { \ + void *_p = get_stub(stub); \ unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \ - memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \ + memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \ + place_ret(_p); \ invoke_stub("", "", "=m" (dummy) : "i" (0)); \ put_stub(stub); \ } while (0) #define emulate_fpu_insn_stub_eflags(bytes...) \ do { \ + void *_p = get_stub(stub); \ unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \ unsigned long tmp_; \ - memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \ + memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \ + place_ret(_p); \ invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"), \ _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), \ [eflags] "+g" (regs->eflags), [tmp] "=&r" (tmp_) \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -1398,7 +1398,7 @@ x86_emulate( stb[3] = 0x91; stb[4] = evex.opmsk << 3; insn_bytes = 5; - stb[5] = 0xc3; + place_ret(&stb[5]); invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); @@ -3631,7 +3631,7 @@ x86_emulate( } opc[1] = (modrm & 0x38) | 0xc0; insn_bytes = EVEX_PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_EVEX(opc, evex); invoke_stub("", "", "=g" (dummy) : "a" (src.val)); @@ -3698,7 +3698,7 @@ x86_emulate( insn_bytes = PFX_BYTES + 2; copy_REX_VEX(opc, rex_prefix, vex); } - opc[2] = 0xc3; + place_ret(&opc[2]); ea.reg = decode_gpr(&_regs, modrm_reg); invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp)); @@ -3772,7 +3772,7 @@ x86_emulate( insn_bytes = PFX_BYTES + 2; copy_REX_VEX(opc, rex_prefix, vex); } - opc[2] = 0xc3; + place_ret(&opc[2]); _regs.eflags &= ~EFLAGS_MASK; invoke_stub("", @@ -4008,7 +4008,7 @@ x86_emulate( opc[1] = modrm & 0xc7; insn_bytes = PFX_BYTES + 2; simd_0f_to_gpr: - opc[insn_bytes - PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - PFX_BYTES]); generate_exception_if(ea.type != OP_REG, X86_EXC_UD); @@ -4405,7 +4405,7 @@ x86_emulate( vex.w = 0; opc[1] = modrm & 0x38; insn_bytes = PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_REX_VEX(opc, rex_prefix, vex); invoke_stub("", "", "+m" (src.val) : "a" (&src.val)); @@ -4442,7 +4442,7 @@ x86_emulate( evex.w = 0; opc[1] = modrm & 0x38; insn_bytes = EVEX_PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_EVEX(opc, evex); invoke_stub("", "", "+m" (src.val) : "a" (&src.val)); @@ -4637,7 +4637,7 @@ x86_emulate( #endif /* X86EMUL_NO_SIMD */ simd_0f_reg_only: - opc[insn_bytes - PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - PFX_BYTES]); copy_REX_VEX(opc, rex_prefix, vex); invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) ); @@ -4971,7 +4971,7 @@ x86_emulate( if ( !mode_64bit() ) vex.w = 0; opc[1] = modrm & 0xf8; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_VEX(opc, vex); ea.reg = decode_gpr(&_regs, modrm_rm); @@ -5014,7 +5014,7 @@ x86_emulate( if ( !mode_64bit() ) vex.w = 0; opc[1] = modrm & 0xc7; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_VEX(opc, vex); invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0)); @@ -5044,7 +5044,7 @@ x86_emulate( opc = init_prefixes(stub); opc[0] = b; opc[1] = modrm; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_VEX(opc, vex); _regs.eflags &= ~EFLAGS_MASK; @@ -5612,7 +5612,7 @@ x86_emulate( if ( !mode_64bit() ) vex.w = 0; opc[1] = modrm & 0xc7; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_REX_VEX(opc, rex_prefix, vex); invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0)); @@ -5730,7 +5730,7 @@ x86_emulate( opc[1] &= 0x38; } insn_bytes = PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); if ( vex.opcx == vex_none ) { /* Cover for extra prefix byte. */ @@ -6010,7 +6010,7 @@ x86_emulate( pvex->b = !mode_64bit() || (vex.reg >> 3); opc[1] = 0xc0 | (~vex.reg & 7); pvex->reg = 0xf; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0)); put_stub(stub); @@ -6284,7 +6284,7 @@ x86_emulate( evex.w = 0; opc[1] = modrm & 0xf8; insn_bytes = EVEX_PFX_BYTES + 2; - opc[2] = 0xc3; + place_ret(&opc[2]); copy_EVEX(opc, evex); invoke_stub("", "", "=g" (dummy) : "a" (src.val)); @@ -6383,7 +6383,7 @@ x86_emulate( pvex->b = 1; opc[1] = (modrm_reg & 7) << 3; pvex->reg = 0xf; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); @@ -6453,7 +6453,7 @@ x86_emulate( pvex->b = 1; opc[1] = (modrm_reg & 7) << 3; pvex->reg = 0xf; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp)); @@ -6509,7 +6509,7 @@ x86_emulate( pevex->b = 1; opc[1] = (modrm_reg & 7) << 3; pevex->RX = 1; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); @@ -6574,7 +6574,7 @@ x86_emulate( pevex->b = 1; opc[1] = (modrm_reg & 7) << 3; pevex->RX = 1; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp)); @@ -6588,7 +6588,7 @@ x86_emulate( opc[2] = 0x90; /* Use (%rax) as source. */ opc[3] = evex.opmsk << 3; - opc[4] = 0xc3; + place_ret(&opc[4]); invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); put_stub(stub); @@ -6664,7 +6664,7 @@ x86_emulate( pevex->b = 1; opc[1] = (modrm_reg & 7) << 3; pevex->RX = 1; - opc[2] = 0xc3; + place_ret(&opc[2]); invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp)); @@ -6741,7 +6741,7 @@ x86_emulate( opc[2] = 0x90; /* Use (%rax) as source. */ opc[3] = evex.opmsk << 3; - opc[4] = 0xc3; + place_ret(&opc[4]); invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask)); put_stub(stub); @@ -6940,7 +6940,7 @@ x86_emulate( pvex->reg = 0xf; /* rAX */ buf[3] = b; buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */ - buf[5] = 0xc3; + place_ret(&buf[5]); src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg)); @@ -6976,7 +6976,7 @@ x86_emulate( pvex->reg = 0xf; /* rAX */ buf[3] = b; buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */ - buf[5] = 0xc3; + place_ret(&buf[5]); dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); emulate_stub("=&a" (dst.val), "c" (&src.val)); @@ -7217,7 +7217,7 @@ x86_emulate( evex.w = vex.w = 0; opc[1] = modrm & 0x38; opc[2] = imm1; - opc[3] = 0xc3; + place_ret(&opc[3]); if ( vex.opcx == vex_none ) { /* Cover for extra prefix byte. */ @@ -7384,7 +7384,7 @@ x86_emulate( insn_bytes = PFX_BYTES + 3; copy_VEX(opc, vex); } - opc[3] = 0xc3; + place_ret(&opc[3]); /* Latch MXCSR - we may need to restore it below. */ invoke_stub("stmxcsr %[mxcsr]", "", @@ -7630,7 +7630,7 @@ x86_emulate( } opc[2] = imm1; insn_bytes = PFX_BYTES + 3; - opc[3] = 0xc3; + place_ret(&opc[3]); if ( vex.opcx == vex_none ) { /* Cover for extra prefix byte. */ @@ -7976,7 +7976,7 @@ x86_emulate( pxop->reg = 0xf; /* rAX */ buf[3] = b; buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */ - buf[5] = 0xc3; + place_ret(&buf[5]); dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt); emulate_stub([dst] "=&a" (dst.val), "c" (&src.val)); @@ -8085,7 +8085,7 @@ x86_emulate( buf[3] = b; buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */ *(uint32_t *)(buf + 5) = imm1; - buf[9] = 0xc3; + place_ret(&buf[9]); emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val)); @@ -8181,12 +8181,12 @@ x86_emulate( if ( evex_encoded() ) { - opc[insn_bytes - EVEX_PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - EVEX_PFX_BYTES]); copy_EVEX(opc, evex); } else { - opc[insn_bytes - PFX_BYTES] = 0xc3; + place_ret(&opc[insn_bytes - PFX_BYTES]); copy_REX_VEX(opc, rex_prefix, vex); } @@ -8510,7 +8510,7 @@ int x86_emul_rmw( pvex->reg = 0xf; /* rAX */ buf[3] = ctxt->opcode; buf[4] = 0x11; /* reg=rDX r/m=(%RCX) */ - buf[5] = 0xc3; + place_ret(&buf[5]); *eflags &= ~EFLAGS_MASK; invoke_stub("", ++++++ 68221f25-x86-build-with-Return-Thunks.patch ++++++ # Commit afcb4a06c740f7f71d2e9746c9d147c38a6e6c90 # Date 2025-05-12 17:17:38 +0100 # Author Jan Beulich <jbeul...@suse.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/thunk: Build Xen with Return Thunks The Indirect Target Selection speculative vulnerability means that indirect branches (including RETs) are unsafe when in the first half of a cacheline. In order to mitigate this, build with return thunks and arrange for __x86_return_thunk to be (mis)aligned in the same manner as __x86_indirect_thunk_* so the RET instruction is placed in a safe location. place_ret() needs to conditionally emit JMP __x86_return_thunk instead of RET. This is part of XSA-469 / CVE-2024-28956 Signed-off-by: Jan Beulich <jbeul...@suse.com> Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Roger Pau Monné <roger....@citrix.com> --- a/xen/arch/x86/Kconfig +++ b/xen/arch/x86/Kconfig @@ -38,9 +38,14 @@ config ARCH_DEFCONFIG default "arch/x86/configs/x86_64_defconfig" config CC_HAS_INDIRECT_THUNK + # GCC >= 8 or Clang >= 6 def_bool $(cc-option,-mindirect-branch-register) || \ $(cc-option,-mretpoline-external-thunk) +config CC_HAS_RETURN_THUNK + # GCC >= 8 or Clang >= 15 + def_bool $(cc-option,-mfunction-return=thunk-extern) + config HAS_AS_CET_SS # binutils >= 2.29 or LLVM >= 6 def_bool $(as-instr,wrssq %rax$(comma)0;setssbsy) --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o obj-y += msi.o obj-y += msr.o obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o +obj-$(CONFIG_RETURN_THUNK) += indirect-thunk.o obj-$(CONFIG_PV) += ioport_emulate.o obj-y += irq.o obj-$(CONFIG_KEXEC) += machine_kexec.o --- a/xen/arch/x86/acpi/wakeup_prot.S +++ b/xen/arch/x86/acpi/wakeup_prot.S @@ -131,7 +131,7 @@ LABEL(s3_resume) pop %r12 pop %rbx pop %rbp - ret + RET END(do_suspend_lowlevel) .data --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -137,16 +137,45 @@ void init_or_livepatch add_nops(void *in } } +void nocall __x86_return_thunk(void); + /* * Place a return at @ptr. @ptr must be in the writable alias of a stub. * + * When CONFIG_RETURN_THUNK is active, this may be a JMP __x86_return_thunk + * instead, depending on the safety of @ptr with respect to Indirect Target + * Selection. + * * Returns the next position to write into the stub. */ void *place_ret(void *ptr) { + unsigned long addr = (unsigned long)ptr; uint8_t *p = ptr; - *p++ = 0xc3; + /* + * When Return Thunks are used, if a RET would be unsafe at this location + * with respect to Indirect Target Selection (i.e. if addr is in the first + * half of a cacheline), insert a JMP __x86_return_thunk instead. + * + * The displacement needs to be relative to the executable alias of the + * stub, not to @ptr which is the writeable alias. + */ + if ( IS_ENABLED(CONFIG_RETURN_THUNK) && !(addr & 0x20) ) + { + long stub_va = (this_cpu(stubs.addr) & PAGE_MASK) + (addr & ~PAGE_MASK); + long disp = (long)__x86_return_thunk - (stub_va + 5); + + BUG_ON((int32_t)disp != disp); + + *p++ = 0xe9; + *(int32_t *)p = disp; + p += 4; + } + else + { + *p++ = 0xc3; + } return p; } --- a/xen/arch/x86/arch.mk +++ b/xen/arch/x86/arch.mk @@ -44,6 +44,9 @@ CFLAGS-$(CONFIG_CC_IS_GCC) += -fno-jump- CFLAGS-$(CONFIG_CC_IS_CLANG) += -mretpoline-external-thunk endif +# Compile with return thunk support if selected. +CFLAGS-$(CONFIG_RETURN_THUNK) += -mfunction-return=thunk-extern + # Disable the addition of a .note.gnu.property section to object files when # livepatch support is enabled. The contents of that section can change # depending on the instructions used, and livepatch-build-tools doesn't know --- a/xen/arch/x86/bhb-thunk.S +++ b/xen/arch/x86/bhb-thunk.S @@ -23,7 +23,7 @@ FUNC(clear_bhb_tsx) 0: .byte 0xc6, 0xf8, 0 /* xabort $0 */ int3 1: - ret + RET END(clear_bhb_tsx) /* --- a/xen/arch/x86/clear_page.S +++ b/xen/arch/x86/clear_page.S @@ -1,6 +1,8 @@ .file __FILE__ #include <xen/linkage.h> + +#include <asm/asm_defns.h> #include <asm/page.h> FUNC(clear_page_sse2) @@ -16,5 +18,5 @@ FUNC(clear_page_sse2) jnz 0b sfence - ret + RET END(clear_page_sse2) --- a/xen/arch/x86/copy_page.S +++ b/xen/arch/x86/copy_page.S @@ -1,6 +1,8 @@ .file __FILE__ #include <xen/linkage.h> + +#include <asm/asm_defns.h> #include <asm/page.h> #define src_reg %rsi @@ -41,5 +43,5 @@ FUNC(copy_page_sse2) movnti tmp4_reg, 3*WORD_SIZE(dst_reg) sfence - ret + RET END(copy_page_sse2) --- a/xen/arch/x86/efi/check.c +++ b/xen/arch/x86/efi/check.c @@ -3,6 +3,9 @@ int __attribute__((__ms_abi__)) test(int return i; } +/* In case -mfunction-return is in use. */ +void __x86_return_thunk(void) {}; + /* * Populate an array with "addresses" of relocatable and absolute values. * This is to probe ld for (a) emitting base relocations at all and (b) not --- a/xen/arch/x86/include/asm/asm-defns.h +++ b/xen/arch/x86/include/asm/asm-defns.h @@ -58,6 +58,12 @@ .endif .endm +#ifdef CONFIG_RETURN_THUNK +# define RET jmp __x86_return_thunk +#else +# define RET ret +#endif + #ifdef CONFIG_XEN_IBT # define ENDBR64 endbr64 #else --- a/xen/arch/x86/indirect-thunk.S +++ b/xen/arch/x86/indirect-thunk.S @@ -15,6 +15,8 @@ #undef SYM_ALIGN #define SYM_ALIGN(align...) +#ifdef CONFIG_INDIRECT_THUNK + .macro IND_THUNK_RETPOLINE reg:req call 1f int3 @@ -62,3 +64,25 @@ END(__x86_indirect_thunk_\reg) .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 GEN_INDIRECT_THUNK reg=r\reg .endr + +#endif /* CONFIG_INDIRECT_THUNK */ + +#ifdef CONFIG_RETURN_THUNK + .section .text.entry.__x86_return_thunk, "ax", @progbits + + /* + * The Indirect Target Selection speculative vulnerability means that + * indirect branches (including RETs) are unsafe when in the first + * half of a cacheline. Arrange for them to be in the second half. + * + * Align to 64, then skip 32. + */ + .balign 64 + .fill 32, 1, 0xcc + +FUNC(__x86_return_thunk) + ret + int3 /* Halt straight-line speculation */ +END(__x86_return_thunk) + +#endif /* CONFIG_RETURN_THUNK */ --- a/xen/arch/x86/pv/emul-priv-op.c +++ b/xen/arch/x86/pv/emul-priv-op.c @@ -131,7 +131,7 @@ static io_emul_stub_t *io_emul_stub_setu BUILD_BUG_ON(STUB_BUF_SIZE / 2 < (sizeof(prologue) + sizeof(epilogue) + 10 /* 2x call */ + MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES) + - 1 /* ret */)); + (IS_ENABLED(CONFIG_RETURN_THUNK) ? 5 : 1) /* ret */)); /* Runtime confirmation that we haven't clobbered an adjacent stub. */ BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub)); --- a/xen/arch/x86/pv/gpr_switch.S +++ b/xen/arch/x86/pv/gpr_switch.S @@ -26,7 +26,7 @@ FUNC(load_guest_gprs) movq UREGS_r15(%rdi), %r15 movq UREGS_rcx(%rdi), %rcx movq UREGS_rdi(%rdi), %rdi - ret + RET END(load_guest_gprs) /* Save guest GPRs. Parameter on the stack above the return address. */ @@ -48,5 +48,5 @@ FUNC(save_guest_gprs) movq %rbx, UREGS_rbx(%rdi) movq %rdx, UREGS_rdx(%rdi) movq %rcx, UREGS_rcx(%rdi) - ret + RET END(save_guest_gprs) --- a/xen/arch/x86/spec_ctrl.c +++ b/xen/arch/x86/spec_ctrl.c @@ -571,6 +571,9 @@ static void __init print_details(enum in #ifdef CONFIG_INDIRECT_THUNK " INDIRECT_THUNK" #endif +#ifdef CONFIG_RETURN_THUNK + " RETURN_THUNK" +#endif #ifdef CONFIG_SHADOW_PAGING " SHADOW_PAGING" #endif --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -180,7 +180,7 @@ FUNC(cr4_pv32_restore) or cr4_pv32_mask(%rip), %rax mov %rax, %cr4 mov %rax, (%rcx) - ret + RET 0: #ifndef NDEBUG /* Check that _all_ of the bits intended to be set actually are. */ @@ -198,7 +198,7 @@ FUNC(cr4_pv32_restore) 1: #endif xor %eax, %eax - ret + RET END(cr4_pv32_restore) FUNC(compat_syscall) @@ -329,7 +329,7 @@ __UNLIKELY_END(compat_bounce_null_select xor %eax, %eax mov %ax, TRAPBOUNCE_cs(%rdx) mov %al, TRAPBOUNCE_flags(%rdx) - ret + RET .section .fixup,"ax" .Lfx13: --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -604,7 +604,7 @@ __UNLIKELY_END(create_bounce_frame_bad_b xor %eax, %eax mov %rax, TRAPBOUNCE_eip(%rdx) mov %al, TRAPBOUNCE_flags(%rdx) - ret + RET .pushsection .fixup, "ax", @progbits # Numeric tags below represent the intended overall %rsi adjustment. --- a/xen/arch/x86/xen.lds.S +++ b/xen/arch/x86/xen.lds.S @@ -83,6 +83,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); _stextentry = .; *(.text.entry) + *(.text.entry.*) . = ALIGN(PAGE_SIZE); _etextentry = .; --- a/xen/common/Kconfig +++ b/xen/common/Kconfig @@ -136,6 +136,17 @@ config INDIRECT_THUNK When enabled, indirect branches are implemented using a new construct called "retpoline" that prevents speculation. +config RETURN_THUNK + bool "Out-of-line Returns" + depends on CC_HAS_RETURN_THUNK + default INDIRECT_THUNK + help + Compile Xen with out-of-line returns. + + This allows Xen to mitigate a variety of speculative vulnerabilities + by choosing a hardware-dependent instruction sequence to implement + function returns safely. + config SPECULATIVE_HARDEN_ARRAY bool "Speculative Array Hardening" default y --- a/xen/lib/x86-generic-hweightl.c +++ b/xen/lib/x86-generic-hweightl.c @@ -51,7 +51,11 @@ asm ( "pop %rdx\n\t" "pop %rdi\n\t" +#ifdef CONFIG_RETURN_THUNK + "jmp __x86_return_thunk\n\t" +#else "ret\n\t" +#endif ".size arch_generic_hweightl, . - arch_generic_hweightl\n\t" ); ++++++ 68221f26-x86-spec-ctrl-synthesise-ITS_NO.patch ++++++ # Commit f6042f38e621525feff86bb101dc751d2d87cff8 # Date 2025-05-12 17:17:38 +0100 # Author Andrew Cooper <andrew.coop...@citrix.com> # Committer Andrew Cooper <andrew.coop...@citrix.com> x86/spec-ctrl: Synthesise ITS_NO to guests on unaffected hardware It is easier to express feature word 17 in terms of word 16 + [32, 64) as that's how the layout is given in documentation. This is part of XSA-469 / CVE-2024-28956 Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Roger Pau Monné <roger....@citrix.com> --- a/xen/arch/x86/include/asm/cpufeature.h +++ b/xen/arch/x86/include/asm/cpufeature.h @@ -218,6 +218,7 @@ static inline bool boot_cpu_has(unsigned #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO) #define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO) #define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR) +#define cpu_has_its_no boot_cpu_has(X86_FEATURE_ITS_NO) /* Synthesized. */ #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) --- a/xen/arch/x86/spec_ctrl.c +++ b/xen/arch/x86/spec_ctrl.c @@ -1760,6 +1760,90 @@ static void __init bhi_calculations(void } } +/* + * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/indirect-target-selection.html + */ +static void __init its_calculations(void) +{ + /* + * Indirect Target Selection is a Branch Prediction bug whereby certain + * indirect branches (including RETs) get predicted using a direct branch + * target, rather than a suitable indirect target, bypassing hardware + * isolation protections. + * + * ITS affects Core (but not Atom) processors starting from the + * introduction of eIBRS, up to but not including Golden Cove cores + * (checked here with BHI_CTRL). + * + * The ITS_NO feature is not expected to be enumerated by hardware, and is + * only for VMMs to synthesise for guests. + * + * ITS comes in 3 flavours: + * + * 1) Across-IBPB. Indirect branches after the IBPB can be controlled + * by direct targets which existed prior to the IBPB. This is + * addressed in the IPU 2025.1 microcode drop, and has no other + * software interaction. + * + * 2) Guest/Host. Indirect branches in the VMM can be controlled by + * direct targets from the guest. This applies equally to PV guests + * (Ring3) and HVM guests (VMX), and applies to all Skylake-uarch + * cores with eIBRS. + * + * 3) Intra-mode. Indirect branches in the VMM can be controlled by + * other execution in the same mode. + */ + + /* + * If we can see ITS_NO, or we're virtualised, do nothing. We are or may + * migrate somewhere unsafe. + */ + if ( cpu_has_its_no || cpu_has_hypervisor ) + return; + + /* ITS is only known to affect Intel processors at this time. */ + if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) + return; + + /* + * ITS does not exist on: + * - non-Family 6 CPUs + * - those without eIBRS + * - those with BHI_CTRL + * but we still need to synthesise ITS_NO. + */ + if ( boot_cpu_data.x86 != 6 || !cpu_has_eibrs || + boot_cpu_has(X86_FEATURE_BHI_CTRL) ) + goto synthesise; + + switch ( boot_cpu_data.x86_model ) + { + /* These Skylake-uarch cores suffer cases #2 and #3. */ + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_L: + case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE: + case INTEL_FAM6_COMETLAKE_L: + return; + + /* These Sunny/Willow/Cypress Cove cores suffer case #3. */ + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: + case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + return; + + default: + break; + } + + /* Platforms remaining are not believed to be vulnerable to ITS. */ + synthesise: + setup_force_cpu_cap(X86_FEATURE_ITS_NO); +} + void spec_ctrl_init_domain(struct domain *d) { bool pv = is_pv_domain(d); @@ -2316,6 +2400,8 @@ void __init init_speculation_mitigations bhi_calculations(); + its_calculations(); + print_details(thunk); /* --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -379,7 +379,8 @@ XEN_CPUFEATURE(GDS_NO, 16*32 XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */ XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A| Register File(s) cleared by VERW */ -/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ +/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 (express in terms of word 16) */ +XEN_CPUFEATURE(ITS_NO, 16*32+62) /*!A No Indirect Target Selection */ #endif /* XEN_CPUFEATURE */ --- a/xen/tools/gen-cpuid.py +++ b/xen/tools/gen-cpuid.py @@ -51,7 +51,7 @@ def parse_definitions(state): r"\s+/\*([\w!|]*) .*$") word_regex = re.compile( - r"^/\* .* word (\d*) \*/$") + r"^/\* .* word (\d*) .*\*/$") last_word = -1 this = sys.modules[__name__] ++++++ ignore-ip-command-script-errors.patch ++++++ --- /var/tmp/diff_new_pack.io0R8x/_old 2025-05-20 09:31:30.696749940 +0200 +++ /var/tmp/diff_new_pack.io0R8x/_new 2025-05-20 09:31:30.700750105 +0200 @@ -11,44 +11,42 @@ and executing it manually at the command line. This seems to be an artifact of using 'set -e' everywhere. -Index: xen-4.15.0-testing/tools/hotplug/Linux/xen-network-common.sh -=================================================================== ---- xen-4.15.0-testing.orig/tools/hotplug/Linux/xen-network-common.sh -+++ xen-4.15.0-testing/tools/hotplug/Linux/xen-network-common.sh -@@ -90,7 +90,7 @@ _setup_bridge_port() { +--- + tools/hotplug/Linux/xen-network-common.sh | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/hotplug/Linux/xen-network-common.sh ++++ b/tools/hotplug/Linux/xen-network-common.sh +@@ -84,7 +84,7 @@ local virtual="$2" # take interface down ... - ip link set dev ${dev} down -+ (ip link set dev ${dev} down || true) ++ ip link set dev ${dev} down || true if [ $virtual -ne 0 ] ; then # Initialise a dummy MAC address. We choose the numerically -@@ -101,7 +101,7 @@ _setup_bridge_port() { +@@ -95,7 +95,7 @@ fi # ... and configure it - ip address flush dev ${dev} -+ (ip address flush dev ${dev} || true) ++ ip address flush dev ${dev} || true } setup_physical_bridge_port() { -@@ -136,15 +136,15 @@ add_to_bridge () { +@@ -123,12 +123,12 @@ + # Don't add $dev to $bridge if it's already on the bridge. if [ ! -e "/sys/class/net/${bridge}/brif/${dev}" ]; then log debug "adding $dev to bridge $bridge" - if which brctl >&/dev/null; then -- brctl addif ${bridge} ${dev} -+ (brctl addif ${bridge} ${dev} || true) - else - ip link set ${dev} master ${bridge} -+ (ip link set ${dev} master ${bridge} || true) - fi ++ ip link set ${dev} master ${bridge} || true else log debug "$dev already on bridge $bridge" fi - ip link set dev ${dev} up -+ (ip link set dev ${dev} up || true) ++ ip link set dev ${dev} up || true } remove_from_bridge () { ++++++ libxl.LIBXL_HOTPLUG_TIMEOUT.patch ++++++ --- /var/tmp/diff_new_pack.io0R8x/_old 2025-05-20 09:31:30.720750928 +0200 +++ /var/tmp/diff_new_pack.io0R8x/_new 2025-05-20 09:31:30.724751093 +0200 @@ -278,7 +278,7 @@ #include <xenevtchn.h> #include <xenstore.h> -@@ -1626,6 +1627,7 @@ struct libxl__xswait_state { +@@ -1624,6 +1625,7 @@ struct libxl__xswait_state { const char *what; /* for error msgs: noun phrase, what we're waiting for */ const char *path; int timeout_ms; /* as for poll(2) */ @@ -286,7 +286,7 @@ libxl__xswait_callback *callback; /* remaining fields are private to xswait */ libxl__ev_time time_ev; -@@ -2703,6 +2705,7 @@ struct libxl__async_exec_state { +@@ -2701,6 +2703,7 @@ struct libxl__async_exec_state { char **args; /* execution arguments */ char **env; /* execution environment */ @@ -294,7 +294,7 @@ /* private */ libxl__ev_time time; libxl__ev_child child; -@@ -4899,6 +4902,9 @@ _hidden int userlookup_helper_getpwuid(l +@@ -4893,6 +4896,9 @@ _hidden int userlookup_helper_getpwuid(l #endif ++++++ replace-obsolete-network-configuration-commands-in-s.patch ++++++ --- /var/tmp/diff_new_pack.io0R8x/_old 2025-05-20 09:31:30.768752903 +0200 +++ /var/tmp/diff_new_pack.io0R8x/_new 2025-05-20 09:31:30.776753232 +0200 @@ -6,105 +6,97 @@ Some scripts still use obsolete network configuration commands ifconfig and brctl. Replace them by commands from iproute2 package. --- - README | 3 +-- - tools/hotplug/Linux/colo-proxy-setup | 14 ++++++-------- - tools/hotplug/Linux/remus-netbuf-setup | 3 ++- - tools/hotplug/Linux/vif-bridge | 7 ++++--- - tools/hotplug/Linux/vif-nat | 2 +- - tools/hotplug/Linux/vif-route | 6 ++++-- - tools/hotplug/Linux/xen-network-common.sh | 6 ++---- - .../i386-dm/qemu-ifup-Linux | 5 +++-- - 9 files changed, 26 insertions(+), 26 deletions(-) + tools/hotplug/Linux/colo-proxy-setup | 14 -------------- + tools/hotplug/Linux/remus-netbuf-setup | 2 +- + tools/hotplug/Linux/vif-bridge | 6 +----- + tools/hotplug/Linux/vif-nat | 2 +- + tools/hotplug/Linux/vif-route | 6 ++++-- + tools/hotplug/Linux/xen-network-common.sh | 15 +-------------- + 6 files changed, 8 insertions(+), 37 deletions(-) -Index: xen-4.19.0-testing/README -=================================================================== ---- xen-4.19.0-testing.orig/README -+++ xen-4.19.0-testing/README -@@ -59,8 +59,7 @@ provided by your OS distributor: - * Development install of GLib v2.0 (e.g. libglib2.0-dev) - * Development install of Pixman (e.g. libpixman-1-dev) - * pkg-config -- * bridge-utils package (/sbin/brctl) -- * iproute package (/sbin/ip) -+ * iproute package (/sbin/ip, /sbin/bridge) - * GNU bison and GNU flex - * ACPI ASL compiler (iasl) - -Index: xen-4.19.0-testing/tools/hotplug/Linux/remus-netbuf-setup -=================================================================== ---- xen-4.19.0-testing.orig/tools/hotplug/Linux/remus-netbuf-setup -+++ xen-4.19.0-testing/tools/hotplug/Linux/remus-netbuf-setup -@@ -76,6 +76,7 @@ - #specific setup code such as renaming. - dir=$(dirname "$0") - . "$dir/xen-hotplug-common.sh" -+. "$dir/xen-network-common.sh" - - findCommand "$@" - -@@ -139,8 +140,16 @@ check_ifb() { +--- a/tools/hotplug/Linux/colo-proxy-setup ++++ b/tools/hotplug/Linux/colo-proxy-setup +@@ -76,17 +76,10 @@ + + function setup_secondary() + { +- if which brctl >&/dev/null; then +- do_without_error brctl delif $bridge $vifname +- do_without_error brctl addbr $forwardbr +- do_without_error brctl addif $forwardbr $vifname +- do_without_error brctl addif $forwardbr $forwarddev +- else + do_without_error ip link set $vifname nomaster + do_without_error ip link add name $forwardbr type bridge + do_without_error ip link set $vifname master $forwardbr + do_without_error ip link set $forwarddev master $forwardbr +- fi + do_without_error ip link set dev $forwardbr up + do_without_error modprobe xt_SECCOLO + +@@ -98,17 +91,10 @@ + + function teardown_secondary() + { +- if which brctl >&/dev/null; then +- do_without_error brctl delif $forwardbr $forwarddev +- do_without_error brctl delif $forwardbr $vifname +- do_without_error brctl delbr $forwardbr +- do_without_error brctl addif $bridge $vifname +- else + do_without_error ip link set $forwarddev nomaster + do_without_error ip link set $vifname nomaster + do_without_error ip link delete $forwardbr type bridge + do_without_error ip link set $vifname master $bridge +- fi + + do_without_error iptables -t mangle -D PREROUTING -m physdev --physdev-in \ + $vifname -j SECCOLO --index $index +--- a/tools/hotplug/Linux/remus-netbuf-setup ++++ b/tools/hotplug/Linux/remus-netbuf-setup +@@ -139,7 +139,7 @@ setup_ifb() { - for ifb in `ifconfig -a -s|egrep ^ifb|cut -d ' ' -f1` -+ if [ "$legacy_tools" ]; then -+ ifbs=`ifconfig -a -s|egrep ^ifb|cut -d ' ' -f1` -+ else -+ ifbs=$(ip --oneline link show type ifb | cut -d ' ' -f2) -+ fi -+ for ifb in $ifbs ++ for ifb in $(ip --oneline link show type ifb | awk -F : '(NR == 1) { print $2; }') do -+ if [ ! "$legacy_tools" ]; then -+ ifb="${ifb%:}" -+ fi check_ifb "$ifb" || continue REMUS_IFB="$ifb" - break -Index: xen-4.19.0-testing/tools/hotplug/Linux/vif-bridge -=================================================================== ---- xen-4.19.0-testing.orig/tools/hotplug/Linux/vif-bridge -+++ xen-4.19.0-testing/tools/hotplug/Linux/vif-bridge -@@ -42,7 +42,8 @@ if [ -z "$bridge" ]; then - if which brctl >&/dev/null; then - bridge=$(brctl show | awk 'NR==2{print$1}') - else +--- a/tools/hotplug/Linux/vif-bridge ++++ b/tools/hotplug/Linux/vif-bridge +@@ -39,11 +39,7 @@ + bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge") + + if [ -z "$bridge" ]; then +- if which brctl >&/dev/null; then +- bridge=$(brctl show | awk 'NR==2{print$1}') +- else - bridge=$(bridge link | cut -d" " -f7) -+ bridge=$(ip --oneline link show type bridge | awk '(NR == 1) { print $2; }') -+ bridge="${bridge%:}" - fi +- fi ++ read bridge < <(ip --oneline link show type bridge | awk -F : '(NR == 1) { print $2; }') if [ -z "$bridge" ] then -Index: xen-4.19.0-testing/tools/hotplug/Linux/vif-nat -=================================================================== ---- xen-4.19.0-testing.orig/tools/hotplug/Linux/vif-nat -+++ xen-4.19.0-testing/tools/hotplug/Linux/vif-nat -@@ -172,7 +172,11 @@ case "$command" in + fatal "Could not find bridge, and none was specified" +--- a/tools/hotplug/Linux/vif-nat ++++ b/tools/hotplug/Linux/vif-nat +@@ -172,7 +172,7 @@ ;; offline) [ "$dhcp" != 'no' ] && dhcp_down - do_without_error ifconfig "${dev}" down -+ if [ "$legacy_tools" ]; then -+ do_without_error ifconfig "${dev}" down -+ else -+ do_without_error ip link set "${dev}" down -+ fi ++ do_without_error ip link set "${dev}" down ;; esac -Index: xen-4.19.0-testing/tools/hotplug/Linux/vif-route -=================================================================== ---- xen-4.19.0-testing.orig/tools/hotplug/Linux/vif-route -+++ xen-4.19.0-testing/tools/hotplug/Linux/vif-route -@@ -23,13 +23,23 @@ main_ip=$(dom0_ip) +--- a/tools/hotplug/Linux/vif-route ++++ b/tools/hotplug/Linux/vif-route +@@ -23,13 +23,15 @@ case "${command}" in add|online) - ifconfig ${dev} ${main_ip} netmask 255.255.255.255 up -+ if [ "$legacy_tools" ]; then -+ ifconfig ${dev} ${main_ip} netmask 255.255.255.255 up -+ else -+ ip addr add "${main_ip}/32" dev "$dev" -+ fi ++ ip addr add "${main_ip}/32" dev "$dev" + ip link set "dev" up echo 1 >/proc/sys/net/ipv4/conf/${dev}/proxy_arp ipcmd='add' @@ -112,41 +104,50 @@ ;; remove|offline) - do_without_error ifdown ${dev} -+ if [ "$legacy_tools" ]; then -+ do_without_error ifdown ${dev} -+ else -+ do_without_error ip addr flush dev "$dev" -+ do_without_error ip link set "$dev" down -+ fi ++ do_without_error ip addr flush dev "$dev" ++ do_without_error ip link set "$dev" down ipcmd='del' cmdprefix='do_without_error' ;; -Index: xen-4.19.0-testing/tools/hotplug/Linux/xen-network-common.sh -=================================================================== ---- xen-4.19.0-testing.orig/tools/hotplug/Linux/xen-network-common.sh -+++ xen-4.19.0-testing/tools/hotplug/Linux/xen-network-common.sh -@@ -15,6 +15,12 @@ - # - - -+# Use brctl and ifconfig on older systems -+legacy_tools= -+if [ -f /sbin/brctl -a -f /sbin/ifconfig ]; then -+ legacy_tools="true" -+fi -+ - # Gentoo doesn't have ifup/ifdown, so we define appropriate alternatives. +--- a/tools/hotplug/Linux/xen-network-common.sh ++++ b/tools/hotplug/Linux/xen-network-common.sh +@@ -111,13 +111,7 @@ + + # Don't create the bridge if it already exists. + if [ ! -e "/sys/class/net/${bridge}/bridge" ]; then +- if which brctl >&/dev/null; then +- brctl addbr ${bridge} +- brctl stp ${bridge} off +- brctl setfd ${bridge} 0 +- else + ip link add name ${bridge} type bridge stp_state 0 forward_delay 0 +- fi + fi + } - # Other platforms just use ifup / ifdown directly. -@@ -152,8 +158,10 @@ remove_from_bridge () { +@@ -129,11 +123,7 @@ + # Don't add $dev to $bridge if it's already on the bridge. + if [ ! -e "/sys/class/net/${bridge}/brif/${dev}" ]; then + log debug "adding $dev to bridge $bridge" +- if which brctl >&/dev/null; then +- brctl addif ${bridge} ${dev} +- else + ip link set ${dev} master ${bridge} +- fi + else + log debug "$dev already on bridge $bridge" + fi +@@ -150,11 +140,8 @@ + # Don't remove $dev from $bridge if it's not on the bridge. + if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then log debug "removing $dev from bridge $bridge" - if which brctl >&/dev/null; then - do_without_error brctl delif ${bridge} ${dev} -+ do_without_error ifconfig "$dev" down - else +- if which brctl >&/dev/null; then +- do_without_error brctl delif ${bridge} ${dev} +- else do_without_error ip link set ${dev} nomaster +- fi + do_without_error ip link set "$dev" down - fi else log debug "$dev not on bridge $bridge" + fi ++++++ xen.libxl.dmmd.patch ++++++ --- /var/tmp/diff_new_pack.io0R8x/_old 2025-05-20 09:31:30.852756359 +0200 +++ /var/tmp/diff_new_pack.io0R8x/_new 2025-05-20 09:31:30.856756524 +0200 @@ -100,7 +100,7 @@ aio:/.* { DPC->had_depr_prefix=1; DEPRECATE(0); } --- a/tools/libs/light/libxl_internal.h +++ b/tools/libs/light/libxl_internal.h -@@ -2070,6 +2070,10 @@ _hidden char *libxl__object_to_json(libx +@@ -2068,6 +2068,10 @@ _hidden char *libxl__object_to_json(libx _hidden int libxl__cpuid_legacy(libxl_ctx *ctx, uint32_t domid, bool retore, libxl_domain_build_info *info); ++++++ xl-save-pc.patch ++++++ --- /var/tmp/diff_new_pack.io0R8x/_old 2025-05-20 09:31:30.968761132 +0200 +++ /var/tmp/diff_new_pack.io0R8x/_new 2025-05-20 09:31:30.972761297 +0200 @@ -67,7 +67,7 @@ int fd; uint8_t *config_data; int config_len; -@@ -144,12 +147,24 @@ static int save_domain(uint32_t domid, i +@@ -143,12 +146,24 @@ static int save_domain(uint32_t domid, i fprintf(stderr, "Failed to open temp file %s for writing\n", filename); exit(EXIT_FAILURE); } @@ -81,7 +81,7 @@ save_domain_core_writeconfig(fd, filename, config_data, config_len); - int rc = libxl_domain_suspend_suse(ctx, domid, fd, &props, NULL); + int rc = libxl_domain_suspend(ctx, domid, fd, 0, NULL); close(fd); + if (xsh) {