Tarun Sahu <[email protected]> writes:
> Add a new KVM selftest `guest_memfd_preservation_test` to verify that
> guest memory backed by guest_memfd is preserved properly.
>
Don't think using backticks in commit messages is a common practice but
I might be wrong here.
> The test leverages the Live Update Orchestrator (LUO) infrastructure
> to validate that memory folios and configuration layouts are
> successfully saved and then restored during kernel live updates,
> preventing any memory loss for the guest.
>
> Here, I have used the kvm selftests framework by creating a new
> vm and mapping two memory slots to it. One is the code that is executed
> inside the vm and other is the guest_memfd whose memory is being
> written by the guest code.
>
Don't think commit messages with "I" are common either
> In Phase 1: Once data is written the vm exits and wait for the user
> to trigger the kexec.
>
> In Phase 2: A new vm is created with retrieved kvm and again two
> memory slots are assigned. Once for guest code, and another is for
> retrieved guest_memfd where guest_memfd memory is verified by the
> executed guest code. If verification succeeds, The test passes.
>
>
> [...snip...]
>
> +#define SESSION_NAME "gmem_vm_preservation_session"
> +#define VM_TOKEN 0x1001
> +#define GMEM_TOKEN 0x1002
> +
> +#define GMEM_SIZE (16ULL * 1024 * 1024)
> +#define DATA_SIZE (5ULL * 1024 * 1024)
> +
> +static size_t page_size;
> +
> +/* Deterministic byte pattern generation based on offset */
> +static inline uint8_t get_pattern_byte(size_t offset)
> +{
> + return (uint8_t)(offset ^ 0x5A);
> +}
> +
> +static void guest_code_phase1(uint64_t gpa, uint64_t size, uint64_t
> data_size)
> +{
> + uint8_t *mem = (uint8_t *)gpa;
> + size_t i;
> +
> + for (i = 0; i < data_size; i++)
> + mem[i] = get_pattern_byte(i);
> +
> + GUEST_DONE();
> +}
> +
> +static void guest_code_phase2(uint64_t gpa, uint64_t size, uint64_t
> data_size)
> +{
> + uint8_t *mem = (uint8_t *)gpa;
> + size_t i;
> +
> + for (i = 0; i < data_size; i++) {
> + uint8_t val = get_pattern_byte(i);
> +
> + __GUEST_ASSERT(mem[i] == val,
> + "Data mismatch at offset %lu! Expected 0x%x, got
> 0x%x",
> + i, val, mem[i]);
> + }
> +
> + GUEST_DONE();
> +}
> +
> +static void do_phase1(void)
> +{
> + uint64_t flags = GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED;
Is there a reason to set GUEST_MEMFD_FLAG_MMAP? We're not really
accessing that memory from the host in this test.
> + int gmem_fd, dev_luo_fd, session_fd, ret;
> + const uint64_t gpa = SZ_4G;
> + struct kvm_vcpu *vcpu;
> + const int slot = 1;
> + struct kvm_vm *vm;
> +
> + vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1,
> + guest_code_phase1);
> + gmem_fd = vm_create_guest_memfd(vm, GMEM_SIZE, flags);
> + vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa,
> GMEM_SIZE, NULL,
> + gmem_fd, 0);
> +
> + for (size_t i = 0; i < GMEM_SIZE; i += page_size)
> + virt_pg_map(vm, gpa + i, gpa + i);
> +
> + vcpu_args_set(vcpu, 3, gpa, GMEM_SIZE, DATA_SIZE);
If GMEM_SIZE and DATA_SIZE are static I think we don't have to set those
as vcpu_args_set(), they can be used as macros from within the guest.
> +
> + vcpu_run(vcpu);
> + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
> +
> + dev_luo_fd = luo_open_device();
> + TEST_ASSERT(dev_luo_fd >= 0, "Failed to open /dev/liveupdate");
> +
> + session_fd = luo_create_session(dev_luo_fd, SESSION_NAME);
> + TEST_ASSERT(session_fd >= 0, "Failed to create LUO session");
> +
> + ret = luo_session_preserve_fd(session_fd, vm->fd, VM_TOKEN);
> + TEST_ASSERT(ret == 0, "Failed to preserve VM file descriptor");
> +
> + ret = luo_session_preserve_fd(session_fd, gmem_fd, GMEM_TOKEN);
> + TEST_ASSERT(ret == 0, "Failed to preserve guest_memfd file descriptor");
> +
Thanks for showing how this works :)
> +
> printf("\n============================================================\n");
> + printf("Phase 1 Complete Successfully!\n");
> + printf("VM file and guest_memfd file have been preserved via LUO.\n");
> + printf("Tokens: VM_TOKEN=0x%x, GMEM_TOKEN=0x%x\n", VM_TOKEN,
> GMEM_TOKEN);
> + printf("Machine Size: %llu MB, Data Size: %llu MB\n", GMEM_SIZE / SZ_1M,
> + DATA_SIZE / SZ_1M);
> +
> printf("------------------------------------------------------------\n");
> +
> + daemonize_and_wait();
> +}
> +
> +static struct kvm_vm *vm_create_from_fd(int resurrected_vm_fd,
> + struct vm_shape shape)
> +{
> + struct kvm_vm *vm;
> +
> + vm = calloc(1, sizeof(*vm));
> + TEST_ASSERT(vm != NULL, "Insufficient Memory");
> +
> + vm_init_fields(vm, shape);
What would happen if the shape was changed between preserving and
restoring?
> +
> + vm->kvm_fd = open_path_or_exit(KVM_DEV_PATH, O_RDWR);
> + vm->fd = resurrected_vm_fd;
> +
> + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
> + vm->stats.fd = vm_get_stats_fd(vm);
> + else
> + vm->stats.fd = -1;
> +
> + vm_init_memory_properties(vm);
> +
> + return vm;
> +}
> +
I think vm_create_from_fd() could be introduced in an earlier patch to
reduce the amount of new code in this patch. Also, I think it could
perhaps be moved to kvm_util.c assuming that other test will use it too.
> +static void do_phase2(void)
> +{
> + int retrieved_vm_fd, retrieved_gmem_fd, dev_luo_fd, session_fd;
> + struct vm_shape shape = VM_SHAPE_DEFAULT;
> + const uint64_t gpa = SZ_4G;
> + struct kvm_vcpu *vcpu;
> + const int slot = 1;
> + struct kvm_vm *vm;
> +
> + dev_luo_fd = luo_open_device();
> + TEST_ASSERT(dev_luo_fd >= 0, "Failed to open /dev/liveupdate");
> +
> + session_fd = luo_retrieve_session(dev_luo_fd, SESSION_NAME);
> + TEST_ASSERT(session_fd >= 0, "Failed to retrieve LUO session");
> +
> + retrieved_vm_fd = luo_session_retrieve_fd(session_fd, VM_TOKEN);
> + TEST_ASSERT(retrieved_vm_fd >= 0, "Failed to retrieve VM file
> descriptor");
> +
> + retrieved_gmem_fd = luo_session_retrieve_fd(session_fd, GMEM_TOKEN);
> + TEST_ASSERT(retrieved_gmem_fd >= 0, "Failed to retrieve guest_memfd
> file descriptor");
> +
> + vm = vm_create_from_fd(retrieved_vm_fd, shape);
> +
> + u64 nr_pages = 2048; /* 8MB is plenty for slot0 pages */
> +
I don't think declarations are usually mixed with regular code.
> + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages,
> 0);
> + kvm_vm_elf_load(vm, program_invocation_name);
> +
> + for (int i = 0; i < NR_MEM_REGIONS; i++)
> + vm->memslots[i] = 0;
> +
> + struct userspace_mem_region *slot0 = memslot2region(vm, 0);
> +
> + ucall_init(vm, slot0->region.guest_phys_addr +
> slot0->region.memory_size);
> +
> + vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa,
> GMEM_SIZE, NULL,
> + retrieved_gmem_fd, 0);
> +
> + for (size_t i = 0; i < GMEM_SIZE; i += page_size)
> + virt_pg_map(vm, gpa + i, gpa + i);
> +
> + vcpu = vm_vcpu_add(vm, 0, guest_code_phase2);
> + kvm_arch_vm_finalize_vcpus(vm);
> +
> + vcpu_args_set(vcpu, 3, gpa, GMEM_SIZE, DATA_SIZE);
> +
> + printf("Resuming / Running VM in Phase 2...\n");
> + vcpu_run(vcpu);
> + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
> +
> + printf("\nSUCCESS: Phase 2 Complete! All 5MB complex data verified
> intact!\n");
> +
> + luo_session_finish(session_fd);
> + close(session_fd);
> + close(dev_luo_fd);
> + /* This will also close the vm_fd */
> + kvm_vm_free(vm);
> + close(retrieved_gmem_fd);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + bool phase2 = false;
> +
> + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
> + page_size = getpagesize();
> +
> + for (int i = 1; i < argc; i++) {
> + if (strcmp(argv[i], "--phase2") == 0)
> + phase2 = true;
> + }
> +
Maybe use getopt() here?
> + if (phase2)
> + do_phase2();
> + else
> + do_phase1();
> +
> + return 0;
> +}
> --
> 2.54.0.1032.g2f8565e1d1-goog
I think we also need tests for trying to allocate while frozen, and
conversion while frozen, and trying to preserve while preservation is
not allowed.