Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> --- ...t-Fix-VLAN-filter-table-reset-timing.patch | 107 +++++++++++++++ ...width-of-third-operand-of-VINSERTx12.patch | 46 +++++++ ...mem-fix-use-after-free-with-dispatch.patch | 126 ++++++++++++++++++ ...y-one-and-invalid-access-in-virtqueu.patch | 86 ++++++++++++ debian/patches/series | 4 + 5 files changed, 369 insertions(+) create mode 100644 debian/patches/extra/0003-virtio-net-Fix-VLAN-filter-table-reset-timing.patch create mode 100644 debian/patches/extra/0004-target-i386-fix-width-of-third-operand-of-VINSERTx12.patch create mode 100644 debian/patches/extra/0005-system-physmem-fix-use-after-free-with-dispatch.patch create mode 100644 debian/patches/extra/0006-virtio-fix-off-by-one-and-invalid-access-in-virtqueu.patch
diff --git a/debian/patches/extra/0003-virtio-net-Fix-VLAN-filter-table-reset-timing.patch b/debian/patches/extra/0003-virtio-net-Fix-VLAN-filter-table-reset-timing.patch new file mode 100644 index 0000000..9cfadff --- /dev/null +++ b/debian/patches/extra/0003-virtio-net-Fix-VLAN-filter-table-reset-timing.patch @@ -0,0 +1,107 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki <od...@rsg.ci.i.u-tokyo.ac.jp> +Date: Sun, 27 Jul 2025 15:22:36 +0900 +Subject: [PATCH] virtio-net: Fix VLAN filter table reset timing + +Problem +------- + +The expected initial state of the table depends on feature negotiation: + +With VIRTIO_NET_F_CTRL_VLAN: + The table must be empty in accordance with the specification. +Without VIRTIO_NET_F_CTRL_VLAN: + The table must be filled to permit all VLAN traffic. + +Prior to commit 06b636a1e2ad ("virtio-net: do not reset vlan filtering +at set_features"), virtio_net_set_features() always reset the VLAN +table. That commit changed the behavior to skip table reset when +VIRTIO_NET_F_CTRL_VLAN was negotiated, assuming the table would be +properly cleared during device reset and remain stable. + +However, this assumption breaks when a driver renegotiates features: +1. Initial negotiation without VIRTIO_NET_F_CTRL_VLAN (table filled) +2. Renegotiation with VIRTIO_NET_F_CTRL_VLAN (table will not be cleared) + +The problem was exacerbated by commit 0caed25cd171 ("virtio: Call +set_features during reset"), which triggered virtio_net_set_features() +during device reset, exposing the bug whenever VIRTIO_NET_F_CTRL_VLAN +was negotiated after a device reset. + +Solution +-------- + +Fix the issue by initializing the table when virtio_net_set_features() +is called to change the VIRTIO_NET_F_CTRL_VLAN bit of +vdev->guest_features. + +This approach ensures the correct table state regardless of feature +negotiation sequence by performing initialization in +virtio_net_set_features() as QEMU did prior to commit 06b636a1e2ad +("virtio-net: do not reset vlan filtering at set_features"). + +This change still preserves the goal of the commit, which was to avoid +resetting the table during migration, by checking whether the +VIRTIO_NET_F_CTRL_VLAN bit of vdev->guest_features is being changed; +vdev->guest_features is set before virtio_net_set_features() gets called +during migration. + +It also avoids resetting the table when the driver sets a feature +bitmask with no change for the VIRTIO_NET_F_CTRL_VLAN bit, which makes +the operation idempotent and its semantics cleaner. + +Additionally, this change ensures the table is initialized after +feature negotiation and before the DRIVER_OK status bit being set for +compatibility with the Linux driver before commit 50c0ada627f5 +("virtio-net: fix race between ndo_open() and virtio_device_ready()"), +which did not ensure to set the DRIVER_OK status bit before modifying +the table. + +Fixes: 06b636a1e2ad ("virtio-net: do not reset vlan filtering at set_features") +Cc: qemu-sta...@nongnu.org +Reported-by: Konstantin Shkolnyy <k...@linux.ibm.com> +Signed-off-by: Akihiko Odaki <od...@rsg.ci.i.u-tokyo.ac.jp> +Tested-by: Konstantin Shkolnyy <k...@linux.ibm.com> +Tested-by: Lei Yang <leiy...@redhat.com> +Message-Id: <20250727-vlan-v3-1-bbee73861...@rsg.ci.i.u-tokyo.ac.jp> +Tested-by: Konstantin Shkolnyy <k...@linux.ibm.com> +Reviewed-by: Michael S. Tsirkin <m...@redhat.com> +Signed-off-by: Michael S. Tsirkin <m...@redhat.com> +(cherry picked from commit 6071d13c6a37493a6b26e1609b09a98aa058038a) +Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> +--- + hw/net/virtio-net.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 5f908e5bca..8a0ea4cff5 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -997,8 +997,9 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + vhost_net_save_acked_features(nc->peer); + } + +- if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { +- memset(n->vlans, 0xff, MAX_VLAN >> 3); ++ if (virtio_has_feature(vdev->guest_features ^ features, VIRTIO_NET_F_CTRL_VLAN)) { ++ bool vlan = virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN); ++ memset(n->vlans, vlan ? 0 : 0xff, MAX_VLAN >> 3); + } + + if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { +@@ -3896,6 +3897,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); + + n->vlans = g_malloc0(MAX_VLAN >> 3); ++ memset(n->vlans, 0xff, MAX_VLAN >> 3); + + nc = qemu_get_queue(n->nic); + nc->rxfilter_notify_enabled = 1; +@@ -3986,7 +3988,6 @@ static void virtio_net_reset(VirtIODevice *vdev) + memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); + memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); + qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); +- memset(n->vlans, 0, MAX_VLAN >> 3); + + /* Flush any async TX */ + for (i = 0; i < n->max_queue_pairs; i++) { diff --git a/debian/patches/extra/0004-target-i386-fix-width-of-third-operand-of-VINSERTx12.patch b/debian/patches/extra/0004-target-i386-fix-width-of-third-operand-of-VINSERTx12.patch new file mode 100644 index 0000000..e15333e --- /dev/null +++ b/debian/patches/extra/0004-target-i386-fix-width-of-third-operand-of-VINSERTx12.patch @@ -0,0 +1,46 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini <pbonz...@redhat.com> +Date: Fri, 25 Jul 2025 01:10:12 +0200 +Subject: [PATCH] target/i386: fix width of third operand of VINSERTx128 + +Table A-5 of the Intel manual incorrectly lists the third operand of +VINSERTx128 as Wqq, but it is actually a 128-bit value. This is +visible when W is a memory operand close to the end of the page. + +Fixes the recently-added poly1305_kunit test in linux-next. + +(No testcase yet, but I plan to modify test-avx2 to use memory +close to the end of the page. This would work because the test +vectors correctly have the memory operand as xmm2/m128). + +Reported-by: Eric Biggers <ebigg...@kernel.org> +Tested-by: Eric Biggers <ebigg...@kernel.org> +Cc: Ard Biesheuvel <a...@kernel.org> +Cc: "Jason A. Donenfeld" <ja...@zx2c4.com> +Cc: Guenter Roeck <li...@roeck-us.net> +Cc: qemu-sta...@nongnu.org +Fixes: 79068477686 ("target/i386: reimplement 0x0f 0x3a, add AVX", 2022-10-18) +Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> +(cherry picked from commit feea87cd6b645d5166bdd304aac88f47f63dc2ef) +Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> +--- + target/i386/tcg/decode-new.c.inc | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc +index cda32ee678..f4cfc196b8 100644 +--- a/target/i386/tcg/decode-new.c.inc ++++ b/target/i386/tcg/decode-new.c.inc +@@ -878,10 +878,10 @@ static const X86OpEntry opcodes_0F3A[256] = { + [0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + +- [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), ++ [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX) p_66), + [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66), + +- [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), ++ [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX2) p_66), + [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66), + + /* Listed incorrectly as type 4 */ diff --git a/debian/patches/extra/0005-system-physmem-fix-use-after-free-with-dispatch.patch b/debian/patches/extra/0005-system-physmem-fix-use-after-free-with-dispatch.patch new file mode 100644 index 0000000..a1acfe6 --- /dev/null +++ b/debian/patches/extra/0005-system-physmem-fix-use-after-free-with-dispatch.patch @@ -0,0 +1,126 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Pierrick Bouvier <pierrick.bouv...@linaro.org> +Date: Thu, 24 Jul 2025 09:11:42 -0700 +Subject: [PATCH] system/physmem: fix use-after-free with dispatch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +A use-after-free bug was reported when booting a Linux kernel during the +pci setup phase. It's quite hard to reproduce (needs smp, and favored by +having several pci devices with BAR and specific Linux config, which +is Debian default one in this case). + +After investigation (see the associated bug ticket), it appears that, +under specific conditions, we might access a cached AddressSpaceDispatch +that was reclaimed by RCU thread meanwhile. +In the Linux boot scenario, during the pci phase, memory region are +destroyed/recreated, resulting in exposition of the bug. + +The core of the issue is that we cache the dispatch associated to +current cpu in cpu->cpu_ases[asidx].memory_dispatch. It is updated with +tcg_commit, which runs asynchronously on a given cpu. +At some point, we leave the rcu critial section, and the RCU thread +starts reclaiming it, but tcg_commit is not yet invoked, resulting in +the use-after-free. + +It's not the first problem around this area, and commit 0d58c660689 [1] +("softmmu: Use async_run_on_cpu in tcg_commit") already tried to +address it. It did a good job, but it seems that we found a specific +situation where it's not enough. + +This patch takes a simple approach: remove the cached value creating the +issue, and make sure we always get the current mapping for address +space, using address_space_to_dispatch(cpu->cpu_ases[asidx].as). +It's equivalent to qatomic_rcu_read(&as->current_map)->dispatch; +This is not really costly, we just need two dereferences, +including one atomic (rcu) read, which is negligible considering we are +already on mmu slow path anyway. + +Note that tcg_commit is still needed, as it's taking care of flushing +TLB, removing previously mapped entries. + +Another solution would be to cache directly values under the dispatch +(dispatch themselves are not ref counted), keep an active reference on +associated memory section, and release it when appropriate (tricky). +Given the time already spent debugging this area now and previously, I +strongly prefer eliminating the root of the issue, instead of adding +more complexity for a hypothetical performance gain. RCU is precisely +used to ensure good performance when reading data, so caching is not as +beneficial as it might seem IMHO. + +[1] https://gitlab.com/qemu-project/qemu/-/commit/0d58c660689f6da1e3feff8a997014003d928b3b + +Cc: qemu-sta...@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3040 +Signed-off-by: Pierrick Bouvier <pierrick.bouv...@linaro.org> +Reviewed-by: Richard Henderson <richard.hender...@linaro.org> +Reviewed-by: Michael Tokarev <m...@tls.msk.ru> +Tested-by: Michael Tokarev <m...@tls.msk.ru> +Message-ID: <20250724161142.2803091-1-pierrick.bouv...@linaro.org> +Signed-off-by: Philippe Mathieu-Daudé <phi...@linaro.org> +(cherry picked from commit 2865bf1c5795371ef441e9029bc22566ccff8299) +Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> +--- + system/physmem.c | 15 +++------------ + 1 file changed, 3 insertions(+), 12 deletions(-) + +diff --git a/system/physmem.c b/system/physmem.c +index 333a5eb94d..32f5895b80 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -164,13 +164,11 @@ static bool ram_is_cpr_compatible(RAMBlock *rb); + * CPUAddressSpace: all the information a CPU needs about an AddressSpace + * @cpu: the CPU whose AddressSpace this is + * @as: the AddressSpace itself +- * @memory_dispatch: its dispatch pointer (cached, RCU protected) + * @tcg_as_listener: listener for tracking changes to the AddressSpace + */ + typedef struct CPUAddressSpace { + CPUState *cpu; + AddressSpace *as; +- struct AddressSpaceDispatch *memory_dispatch; + MemoryListener tcg_as_listener; + } CPUAddressSpace; + +@@ -689,7 +687,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, + IOMMUTLBEntry iotlb; + int iommu_idx; + hwaddr addr = orig_addr; +- AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; ++ AddressSpaceDispatch *d = address_space_to_dispatch(cpu->cpu_ases[asidx].as); + + for (;;) { + section = address_space_translate_internal(d, addr, &addr, plen, false); +@@ -2673,7 +2671,7 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, + { + int asidx = cpu_asidx_from_attrs(cpu, attrs); + CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; +- AddressSpaceDispatch *d = cpuas->memory_dispatch; ++ AddressSpaceDispatch *d = address_space_to_dispatch(cpuas->as); + int section_index = index & ~TARGET_PAGE_MASK; + MemoryRegionSection *ret; + +@@ -2750,9 +2748,6 @@ static void tcg_log_global_after_sync(MemoryListener *listener) + + static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data) + { +- CPUAddressSpace *cpuas = data.host_ptr; +- +- cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as); + tlb_flush(cpu); + } + +@@ -2768,11 +2763,7 @@ static void tcg_commit(MemoryListener *listener) + cpu = cpuas->cpu; + + /* +- * Defer changes to as->memory_dispatch until the cpu is quiescent. +- * Otherwise we race between (1) other cpu threads and (2) ongoing +- * i/o for the current cpu thread, with data cached by mmu_lookup(). +- * +- * In addition, queueing the work function will kick the cpu back to ++ * Queueing the work function will kick the cpu back to + * the main loop, which will end the RCU critical section and reclaim + * the memory data structures. + * diff --git a/debian/patches/extra/0006-virtio-fix-off-by-one-and-invalid-access-in-virtqueu.patch b/debian/patches/extra/0006-virtio-fix-off-by-one-and-invalid-access-in-virtqueu.patch new file mode 100644 index 0000000..68c67da --- /dev/null +++ b/debian/patches/extra/0006-virtio-fix-off-by-one-and-invalid-access-in-virtqueu.patch @@ -0,0 +1,86 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Jonah Palmer <jonah.pal...@oracle.com> +Date: Mon, 21 Jul 2025 15:02:08 +0000 +Subject: [PATCH] virtio: fix off-by-one and invalid access in + virtqueue_ordered_fill + +Commit b44135daa372 introduced virtqueue_ordered_fill for +VIRTIO_F_IN_ORDER support but had a few issues: + +* Conditional while loop used 'steps <= max_steps' but should've been + 'steps < max_steps' since reaching steps == max_steps would indicate + that we didn't find an element, which is an error. Without this + change, the code would attempt to read invalid data at an index + outside of our search range. + +* Incremented 'steps' using the next chain's ndescs instead of the + current one. + +This patch corrects the loop bounds and synchronizes 'steps' and index +increments. + +We also add a defensive sanity check against malicious or invalid +descriptor counts to avoid a potential infinite loop and DoS. + +Fixes: b44135daa372 ("virtio: virtqueue_ordered_fill - VIRTIO_F_IN_ORDER support") +Reported-by: terrynini <terrynini38...@gmail.com> +Signed-off-by: Jonah Palmer <jonah.pal...@oracle.com> +Message-Id: <20250721150208.2409779-1-jonah.pal...@oracle.com> +Reviewed-by: Si-Wei Liu <si-wei....@oracle.com> +Acked-by: Jason Wang <jasow...@redhat.com> +Reviewed-by: Michael S. Tsirkin <m...@redhat.com> +Signed-off-by: Michael S. Tsirkin <m...@redhat.com> +(cherry picked from commit 6fcf5ebafad65adc19a616260ca7dc90005785d1) +Signed-off-by: Fiona Ebner <f.eb...@proxmox.com> +--- + hw/virtio/virtio.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index ec54573feb..b756f49867 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -929,18 +929,18 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, + static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) + { +- unsigned int i, steps, max_steps; ++ unsigned int i, steps, max_steps, ndescs; + + i = vq->used_idx % vq->vring.num; + steps = 0; + /* +- * We shouldn't need to increase 'i' by more than the distance +- * between used_idx and last_avail_idx. ++ * We shouldn't need to increase 'i' by more than or equal to ++ * the distance between used_idx and last_avail_idx (max_steps). + */ + max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num; + + /* Search for element in vq->used_elems */ +- while (steps <= max_steps) { ++ while (steps < max_steps) { + /* Found element, set length and mark as filled */ + if (vq->used_elems[i].index == elem->index) { + vq->used_elems[i].len = len; +@@ -948,8 +948,18 @@ static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, + break; + } + +- i += vq->used_elems[i].ndescs; +- steps += vq->used_elems[i].ndescs; ++ ndescs = vq->used_elems[i].ndescs; ++ ++ /* Defensive sanity check */ ++ if (unlikely(ndescs == 0 || ndescs > vq->vring.num)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: %s invalid ndescs %u at position %u\n", ++ __func__, vq->vdev->name, ndescs, i); ++ return; ++ } ++ ++ i += ndescs; ++ steps += ndescs; + + if (i >= vq->vring.num) { + i -= vq->vring.num; diff --git a/debian/patches/series b/debian/patches/series index 6e5fd69..c0e5a0d 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,5 +1,9 @@ extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch extra/0002-ide-avoid-potential-deadlock-when-draining-during-tr.patch +extra/0003-virtio-net-Fix-VLAN-filter-table-reset-timing.patch +extra/0004-target-i386-fix-width-of-third-operand-of-VINSERTx12.patch +extra/0005-system-physmem-fix-use-after-free-with-dispatch.patch +extra/0006-virtio-fix-off-by-one-and-invalid-access-in-virtqueu.patch bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch -- 2.47.2 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel