The default value of unprivileged_userfaultfd sysctl knob was changed to 0 since kernel v5.11 by commit d0d4730a: userfaultfd: add user-mode only option to unprivileged_userfaultfd sysctl knob.
In this mode, An unprivileged user (without SYS_CAP_PTRACE capability) must pass UFFD_USER_MODE_ONLY to userfaultd or the API will fail with EPERM. So add a capability to pass UFFD_USER_MODE_ONLY to support it. Signed-off-by: Lin Ma <l...@suse.com> --- migration/migration.c | 9 +++++++++ migration/migration.h | 1 + migration/postcopy-ram.c | 22 +++++++++++++++++++--- qapi/migration.json | 8 +++++++- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 6ac807ef3d..86212dcb70 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2380,6 +2380,15 @@ bool migrate_postcopy_blocktime(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; } +bool migrate_postcopy_uffd_usermode_only(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_UFFD_USERMODE_ONLY]; +} + bool migrate_use_compression(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index 7a5aa8c2fd..a516d7f59f 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -358,6 +358,7 @@ int migrate_decompress_threads(void); bool migrate_use_events(void); bool migrate_postcopy_blocktime(void); bool migrate_background_snapshot(void); +bool migrate_postcopy_uffd_usermode_only(void); /* Sending on the return path - generic and then for each message type */ void migrate_send_rp_shut(MigrationIncomingState *mis, diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 2e9697bdd2..078c558626 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -206,9 +206,14 @@ static bool receive_ufd_features(uint64_t *features) struct uffdio_api api_struct = {0}; int ufd; bool ret = true; + int flags; + + flags = O_CLOEXEC; + if (migrate_postcopy_uffd_usermode_only()) + flags |= UFFD_USER_MODE_ONLY; /* if we are here __NR_userfaultfd should exists */ - ufd = syscall(__NR_userfaultfd, O_CLOEXEC); + ufd = syscall(__NR_userfaultfd, flags); if (ufd == -1) { error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, strerror(errno)); @@ -352,13 +357,18 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) struct uffdio_range range_struct; uint64_t feature_mask; Error *local_err = NULL; + int flags; if (qemu_target_page_size() > pagesize) { error_report("Target page size bigger than host page size"); goto out; } - ufd = syscall(__NR_userfaultfd, O_CLOEXEC); + flags = O_CLOEXEC; + if (migrate_postcopy_uffd_usermode_only()) + flags |= UFFD_USER_MODE_ONLY; + + ufd = syscall(__NR_userfaultfd, flags); if (ufd == -1) { error_report("%s: userfaultfd not available: %s", __func__, strerror(errno)); @@ -1064,8 +1074,14 @@ retry: int postcopy_ram_incoming_setup(MigrationIncomingState *mis) { + int flags; + + flags = O_CLOEXEC | O_NONBLOCK; + if (migrate_postcopy_uffd_usermode_only()) + flags |= UFFD_USER_MODE_ONLY; + /* Open the fd for the kernel to give us userfaults */ - mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + mis->userfault_fd = syscall(__NR_userfaultfd, flags); if (mis->userfault_fd == -1) { error_report("%s: Failed to open userfault fd: %s", __func__, strerror(errno)); diff --git a/qapi/migration.json b/qapi/migration.json index 88f07baedd..3af1ec4cec 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -452,6 +452,11 @@ # procedure starts. The VM RAM is saved with running VM. # (since 6.0) # +# @postcopy-uffd-usermode-only: If enabled, It allows unprivileged users to use +# userfaultfd but with the restriction that page +# faults from only user mode can be handled. +# (since 6.2.0) +# # Since: 1.2 ## { 'enum': 'MigrationCapability', @@ -459,7 +464,8 @@ 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', 'block', 'return-path', 'pause-before-switchover', 'multifd', 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', - 'x-ignore-shared', 'validate-uuid', 'background-snapshot'] } + 'x-ignore-shared', 'validate-uuid', 'background-snapshot', + 'postcopy-uffd-usermode-only'] } ## # @MigrationCapabilityStatus: -- 2.26.2