With the introduction of the RamBlockAttribute object to manage RAMBlocks with guest_memfd, it is more elegant to move KVM set attribute into a RamDiscardListener.
The KVM attribute change RamDiscardListener is registered/unregistered for each memory region section during kvm_region_add/del(). The listener handler performs attribute change upon receiving notifications from ram_block_attribute_state_change() calls. After this change, the operations in kvm_convert_memory() can be removed. Note that, errors can be returned in ram_block_attribute_notify_to_discard() by KVM attribute changes, although it is currently unlikely to happen. With in-place conversion guest_memfd in the future, it would be more likely to encounter errors and require error handling. For now, simply return the result, and kvm_convert_memory() will cause QEMU to quit if any issue arises. Signed-off-by: Chenyi Qiang <chenyi.qi...@intel.com> --- Changes in v5: - Revert to use RamDiscardListener Changes in v4: - Newly added. --- accel/kvm/kvm-all.c | 72 ++++++++++++++++++--- include/system/confidential-guest-support.h | 9 +++ system/ram-block-attribute.c | 16 +++-- target/i386/kvm/tdx.c | 1 + target/i386/sev.c | 1 + 5 files changed, 85 insertions(+), 14 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 2d7ecaeb6a..ca4ef8062b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -49,6 +49,7 @@ #include "kvm-cpus.h" #include "system/dirtylimit.h" #include "qemu/range.h" +#include "system/confidential-guest-support.h" #include "hw/boards.h" #include "system/stats.h" @@ -1689,28 +1690,90 @@ static int kvm_dirty_ring_init(KVMState *s) return 0; } +static int kvm_private_shared_notify(RamDiscardListener *rdl, + MemoryRegionSection *section, + bool to_private) +{ + hwaddr start = section->offset_within_address_space; + hwaddr size = section->size; + + if (to_private) { + return kvm_set_memory_attributes_private(start, size); + } else { + return kvm_set_memory_attributes_shared(start, size); + } +} + +static int kvm_ram_discard_notify_to_shared(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + return kvm_private_shared_notify(rdl, section, false); +} + +static int kvm_ram_discard_notify_to_private(RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + return kvm_private_shared_notify(rdl, section, true); +} + static void kvm_region_add(MemoryListener *listener, MemoryRegionSection *section) { KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); KVMMemoryUpdate *update; + CGSRamDiscardListener *crdl; + RamDiscardListener *rdl; + update = g_new0(KVMMemoryUpdate, 1); update->section = *section; QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); + + if (!memory_region_has_guest_memfd(section->mr) || !rdm) { + return; + } + + crdl = g_new0(CGSRamDiscardListener, 1); + crdl->mr = section->mr; + crdl->offset_within_address_space = section->offset_within_address_space; + rdl = &crdl->listener; + QLIST_INSERT_HEAD(&cgs->cgs_rdl_list, crdl, next); + ram_discard_listener_init(rdl, kvm_ram_discard_notify_to_shared, + kvm_ram_discard_notify_to_private, true); + ram_discard_manager_register_listener(rdm, rdl, section); } static void kvm_region_del(MemoryListener *listener, MemoryRegionSection *section) { KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); KVMMemoryUpdate *update; + CGSRamDiscardListener *crdl; + RamDiscardListener *rdl; update = g_new0(KVMMemoryUpdate, 1); update->section = *section; QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); + if (!memory_region_has_guest_memfd(section->mr) || !rdm) { + return; + } + + QLIST_FOREACH(crdl, &cgs->cgs_rdl_list, next) { + if (crdl->mr == section->mr && + crdl->offset_within_address_space == section->offset_within_address_space) { + rdl = &crdl->listener; + ram_discard_manager_unregister_listener(rdm, rdl); + QLIST_REMOVE(crdl, next); + g_free(crdl); + break; + } + } } static void kvm_region_commit(MemoryListener *listener) @@ -3077,15 +3140,6 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) goto out_unref; } - if (to_private) { - ret = kvm_set_memory_attributes_private(start, size); - } else { - ret = kvm_set_memory_attributes_shared(start, size); - } - if (ret) { - goto out_unref; - } - addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; rb = qemu_ram_block_from_host(addr, false, &offset); diff --git a/include/system/confidential-guest-support.h b/include/system/confidential-guest-support.h index ea46b50c56..974abdbf6b 100644 --- a/include/system/confidential-guest-support.h +++ b/include/system/confidential-guest-support.h @@ -19,12 +19,19 @@ #define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H #include "qom/object.h" +#include "system/memory.h" #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, ConfidentialGuestSupportClass, CONFIDENTIAL_GUEST_SUPPORT) +typedef struct CGSRamDiscardListener { + MemoryRegion *mr; + hwaddr offset_within_address_space; + RamDiscardListener listener; + QLIST_ENTRY(CGSRamDiscardListener) next; +} CGSRamDiscardListener; struct ConfidentialGuestSupport { Object parent; @@ -34,6 +41,8 @@ struct ConfidentialGuestSupport { */ bool require_guest_memfd; + QLIST_HEAD(, CGSRamDiscardListener) cgs_rdl_list; + /* * ready: flag set by CGS initialization code once it's ready to * start executing instructions in a potentially-secure diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c index 896c3d7543..387501b569 100644 --- a/system/ram-block-attribute.c +++ b/system/ram-block-attribute.c @@ -274,11 +274,12 @@ static bool ram_block_attribute_is_valid_range(RamBlockAttribute *attr, return true; } -static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr, - uint64_t offset, - uint64_t size) +static int ram_block_attribute_notify_to_discard(RamBlockAttribute *attr, + uint64_t offset, + uint64_t size) { RamDiscardListener *rdl; + int ret = 0; QLIST_FOREACH(rdl, &attr->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; @@ -286,8 +287,13 @@ static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr, if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } - rdl->notify_discard(rdl, &tmp); + ret = rdl->notify_discard(rdl, &tmp); + if (ret) { + break; + } } + + return ret; } static int @@ -377,7 +383,7 @@ int ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t offset, if (to_private) { bitmap_clear(attr->bitmap, first_bit, nbits); - ram_block_attribute_notify_to_discard(attr, offset, size); + ret = ram_block_attribute_notify_to_discard(attr, offset, size); } else { bitmap_set(attr->bitmap, first_bit, nbits); ret = ram_block_attribute_notify_to_populated(attr, offset, size); diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 7ef49690bd..17b360059c 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -1492,6 +1492,7 @@ static void tdx_guest_init(Object *obj) qemu_mutex_init(&tdx->lock); cgs->require_guest_memfd = true; + QLIST_INIT(&cgs->cgs_rdl_list); tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, diff --git a/target/i386/sev.c b/target/i386/sev.c index adf787797e..f1b9c35fc3 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -2430,6 +2430,7 @@ sev_snp_guest_instance_init(Object *obj) SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); cgs->require_guest_memfd = true; + QLIST_INIT(&cgs->cgs_rdl_list); /* default init/start/finish params for kvm */ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; -- 2.43.5