On Tue, 14 Apr 2026 15:23:44 +0100 "Kiryl Shutsemau (Meta)" <[email protected]> 
wrote:

> Add UFFDIO_SET_MODE ioctl to toggle UFFD_FEATURE_MINOR_ASYNC at
> runtime. Takes mmap_write_lock for serialization against all in-flight
> faults. On sync-to-async transition, wake threads blocked in
> handle_userfault() so they retry and auto-resolve.
> 
> Since ctx->features can now be modified concurrently, add
> userfaultfd_features() helper that wraps READ_ONCE() and convert
> all ctx->features reads to use it.
> 
> Signed-off-by: Kiryl Shutsemau (Meta) <[email protected]>
> Assisted-by: Claude:claude-opus-4-6
> ---
>  fs/userfaultfd.c                 | 95 ++++++++++++++++++++++++++++----
>  include/uapi/linux/userfaultfd.h | 13 +++++
>  2 files changed, 96 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 43064238fd8d..0edb33599491 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -79,24 +79,33 @@ struct userfaultfd_wake_range {
>  /* internal indication that UFFD_API ioctl was successfully executed */
>  #define UFFD_FEATURE_INITIALIZED             (1u << 31)
>  
> +/*
> + * Read ctx->features with READ_ONCE() since UFFDIO_SET_MODE can
> + * modify it concurrently.
> + */
> +static unsigned int userfaultfd_features(struct userfaultfd_ctx *ctx)
> +{
> +     return READ_ONCE(ctx->features);
> +}
> +
>  static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
>  {
> -     return ctx->features & UFFD_FEATURE_INITIALIZED;
> +     return userfaultfd_features(ctx) & UFFD_FEATURE_INITIALIZED;
>  }
>  
>  static bool userfaultfd_wp_async_ctx(struct userfaultfd_ctx *ctx)
>  {
> -     return ctx && (ctx->features & UFFD_FEATURE_WP_ASYNC);
> +     return ctx && (userfaultfd_features(ctx) & UFFD_FEATURE_WP_ASYNC);
>  }
>  
>  static bool userfaultfd_minor_anon_ctx(struct userfaultfd_ctx *ctx)
>  {
> -     return ctx && (ctx->features & UFFD_FEATURE_MINOR_ANON);
> +     return ctx && (userfaultfd_features(ctx) & UFFD_FEATURE_MINOR_ANON);
>  }
>  
>  static bool userfaultfd_minor_async_ctx(struct userfaultfd_ctx *ctx)
>  {
> -     return ctx && (ctx->features & UFFD_FEATURE_MINOR_ASYNC);
> +     return ctx && (userfaultfd_features(ctx) & UFFD_FEATURE_MINOR_ASYNC);
>  }
>  
>  static unsigned int userfaultfd_ctx_flags(struct userfaultfd_ctx *ctx)
> @@ -122,7 +131,7 @@ bool userfaultfd_wp_unpopulated(struct vm_area_struct 
> *vma)
>       if (!ctx)
>               return false;
>  
> -     return ctx->features & UFFD_FEATURE_WP_UNPOPULATED;
> +     return userfaultfd_features(ctx) & UFFD_FEATURE_WP_UNPOPULATED;
>  }
>  
>  static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
> @@ -435,7 +444,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, 
> unsigned long reason)
>       /* 0 or > 1 flags set is a bug; we expect exactly 1. */
>       VM_WARN_ON_ONCE(!reason || (reason & (reason - 1)));
>  
> -     if (ctx->features & UFFD_FEATURE_SIGBUS)
> +     if (userfaultfd_features(ctx) & UFFD_FEATURE_SIGBUS)
>               goto out;
>       if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & 
> UFFD_USER_MODE_ONLY))
>               goto out;
> @@ -506,7 +515,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, 
> unsigned long reason)
>       init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
>       uwq.wq.private = current;
>       uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags,
> -                             reason, ctx->features);
> +                             reason, userfaultfd_features(ctx));
>       uwq.ctx = ctx;
>       uwq.waken = false;
>  
> @@ -668,7 +677,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct 
> list_head *fcs)
>       if (!octx)
>               return 0;
>  
> -     if (!(octx->features & UFFD_FEATURE_EVENT_FORK)) {
> +     if (!(userfaultfd_features(octx) & UFFD_FEATURE_EVENT_FORK)) {
>               userfaultfd_reset_ctx(vma);
>               return 0;
>       }
> @@ -774,7 +783,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
>       if (!ctx)
>               return;
>  
> -     if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
> +     if (userfaultfd_features(ctx) & UFFD_FEATURE_EVENT_REMAP) {
>               vm_ctx->ctx = ctx;
>               userfaultfd_ctx_get(ctx);
>               down_write(&ctx->map_changing_lock);
> @@ -824,7 +833,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
>       struct userfaultfd_wait_queue ewq;
>  
>       ctx = vma->vm_userfaultfd_ctx.ctx;
> -     if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
> +     if (!ctx || !(userfaultfd_features(ctx) & UFFD_FEATURE_EVENT_REMOVE))
>               return true;
>  
>       userfaultfd_ctx_get(ctx);
> @@ -863,7 +872,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, 
> unsigned long start,
>       struct userfaultfd_unmap_ctx *unmap_ctx;
>       struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
>  
> -     if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
> +     if (!ctx || !(userfaultfd_features(ctx) & UFFD_FEATURE_EVENT_UNMAP) ||
>           has_unmap_ctx(ctx, unmaps, start, end))
>               return 0;
>  
> @@ -1826,6 +1835,65 @@ static int userfaultfd_deactivate(struct 
> userfaultfd_ctx *ctx,
>       return ret;
>  }
>  
> +/*
> + * Features that can be toggled at runtime via UFFDIO_SET_MODE.
> + * Only async features that were enabled at UFFDIO_API time may be toggled.
> + */
> +#define UFFD_FEATURE_TOGGLEABLE      (UFFD_FEATURE_MINOR_ASYNC)
> +
> +static int userfaultfd_set_mode(struct userfaultfd_ctx *ctx,
> +                               unsigned long arg)
> +{
> +     struct uffdio_set_mode mode;
> +     struct mm_struct *mm = ctx->mm;
> +
> +     if (copy_from_user(&mode, (void __user *)arg, sizeof(mode)))
> +             return -EFAULT;
> +
> +     /* enable and disable must not overlap */
> +     if (mode.enable & mode.disable)
> +             return -EINVAL;
> +
> +     /* only toggleable features are allowed */
> +     if ((mode.enable | mode.disable) & ~UFFD_FEATURE_TOGGLEABLE)
> +             return -EINVAL;

The commit message states "Only async features that were enabled at
UFFDIO_API time may be toggled."  However, the code only checks that
the requested feature is in UFFD_FEATURE_TOGGLEABLE.

Is it intentional that a user who opened a uffd without
UFFD_FEATURE_MINOR_ASYNC can still enable it later via
UFFDIO_SET_MODE? 

> +
> +     if (!mmget_not_zero(mm))
> +             return -ESRCH;
> +
> +     /*
> +      * mmap_write_lock serializes against all page faults.
> +      * After we release, no in-flight faults from the old mode exist.
> +      */
> +     {
> +             unsigned int new_features;
> +
> +             mmap_write_lock(mm);
> +             new_features = userfaultfd_features(ctx);
> +             new_features |= mode.enable;
> +             new_features &= ~mode.disable;
> +             WRITE_ONCE(ctx->features, new_features);
> +             mmap_write_unlock(mm);
> +     }
> +
> +     /*
> +      * If switching to async, wake threads blocked in handle_userfault().
> +      * They will retry the fault and auto-resolve under the new mode.
> +      * len=0 means wake all pending faults on this context.
> +      */
> +     if (mode.enable & UFFD_FEATURE_MINOR_ASYNC) {
> +             struct userfaultfd_wake_range range = { .len = 0 };
> +
> +             spin_lock_irq(&ctx->fault_pending_wqh.lock);
> +             __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
> +                                  &range);
> +             __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
> +             spin_unlock_irq(&ctx->fault_pending_wqh.lock);
> +     }
> +
> +     mmput(mm);
> +     return 0;
> +}
>  
>  static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long 
> arg)
>  {
> @@ -2150,6 +2218,9 @@ static long userfaultfd_ioctl(struct file *file, 
> unsigned cmd,
>       case UFFDIO_DEACTIVATE:
>               ret = userfaultfd_deactivate(ctx, arg);
>               break;
> +     case UFFDIO_SET_MODE:
> +             ret = userfaultfd_set_mode(ctx, arg);
> +             break;
>       }
>       return ret;
>  }
> @@ -2177,7 +2248,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, 
> struct file *f)
>        *      protocols: aa:... bb:...
>        */
>       seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n",
> -                pending, total, UFFD_API, ctx->features,
> +                pending, total, UFFD_API, userfaultfd_features(ctx),
>                  UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS);
>  }
>  #endif
> diff --git a/include/uapi/linux/userfaultfd.h 
> b/include/uapi/linux/userfaultfd.h
> index 775825da2596..f0f14f9db06c 100644
> --- a/include/uapi/linux/userfaultfd.h
> +++ b/include/uapi/linux/userfaultfd.h
> @@ -84,6 +84,7 @@
>  #define _UFFDIO_CONTINUE             (0x07)
>  #define _UFFDIO_POISON                       (0x08)
>  #define _UFFDIO_DEACTIVATE           (0x09)
> +#define _UFFDIO_SET_MODE             (0x0A)
>  #define _UFFDIO_API                  (0x3F)
>  
>  /* userfaultfd ioctl ids */
> @@ -110,6 +111,8 @@
>                                     struct uffdio_poison)
>  #define UFFDIO_DEACTIVATE    _IOR(UFFDIO, _UFFDIO_DEACTIVATE,        \
>                                    struct uffdio_range)
> +#define UFFDIO_SET_MODE              _IOW(UFFDIO, _UFFDIO_SET_MODE,  \
> +                                  struct uffdio_set_mode)
>  
>  /* read() structure */
>  struct uffd_msg {
> @@ -395,6 +398,16 @@ struct uffdio_move {
>       __s64 move;
>  };
>  
> +struct uffdio_set_mode {
> +     /*
> +      * Toggle async mode for features at runtime.
> +      * Supported: UFFD_FEATURE_MINOR_ASYNC.
> +      * Setting a bit in both enable and disable is invalid.
> +      */
> +     __u64 enable;
> +     __u64 disable;
> +};
> +
>  /*
>   * Flags for the userfaultfd(2) system call itself.
>   */
> -- 
> 2.51.2
> 
> 

Reply via email to