On 17.11.25 12:46, Mike Rapoport wrote:
From: "Mike Rapoport (Microsoft)" <[email protected]>

* Export handle_userfault() for KVM module so that fault() handler in
   guest_memfd would be able to notify userspace about page faults in its
   address space.
* Implement get_pagecache_folio() for guest_memfd.
* And finally, introduce UFFD_FEATURE_MINOR_GENERIC that will allow
   using userfaultfd minor mode with memory types other than shmem and
   hugetlb provided they are allowed to call handle_userfault() and
   implement get_pagecache_folio().

Signed-off-by: Mike Rapoport (Microsoft) <[email protected]>
---
  fs/userfaultfd.c                 |  4 +++-
  include/uapi/linux/userfaultfd.h |  8 +++++++-
  virt/kvm/guest_memfd.c           | 30 ++++++++++++++++++++++++++++++
  3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 54c6cc7fe9c6..964fa2662d5c 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -537,6 +537,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned 
long reason)
  out:
        return ret;
  }
+EXPORT_SYMBOL_FOR_MODULES(handle_userfault, "kvm");
static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                                              struct userfaultfd_wait_queue 
*ewq)
@@ -1978,7 +1979,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
        uffdio_api.features = UFFD_API_FEATURES;
  #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
        uffdio_api.features &=
-               ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
+               ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
+                 UFFD_FEATURE_MINOR_GENERIC);
  #endif
  #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
        uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 2841e4ea8f2c..c5cbd4a5a26e 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -42,7 +42,8 @@
                           UFFD_FEATURE_WP_UNPOPULATED |        \
                           UFFD_FEATURE_POISON |                \
                           UFFD_FEATURE_WP_ASYNC |              \
-                          UFFD_FEATURE_MOVE)
+                          UFFD_FEATURE_MOVE |                  \
+                          UFFD_FEATURE_MINOR_GENERIC)
  #define UFFD_API_IOCTLS                               \
        ((__u64)1 << _UFFDIO_REGISTER |           \
         (__u64)1 << _UFFDIO_UNREGISTER | \
@@ -210,6 +211,10 @@ struct uffdio_api {
         * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
         * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
         *
+        * UFFD_FEATURE_MINOR_GENERIC indicates that minor faults can be
+        * intercepted for file-backed memory in case subsystem backing this
+        * memory supports it.
+        *
         * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
         * faults would be provided and the offset within the page would not be
         * masked.
@@ -248,6 +253,7 @@ struct uffdio_api {
  #define UFFD_FEATURE_POISON                   (1<<14)
  #define UFFD_FEATURE_WP_ASYNC                 (1<<15)
  #define UFFD_FEATURE_MOVE                     (1<<16)
+#define UFFD_FEATURE_MINOR_GENERIC             (1<<17)
        __u64 features;
__u64 ioctls;
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index fbca8c0972da..5e3c63307fdf 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -4,6 +4,7 @@
  #include <linux/kvm_host.h>
  #include <linux/pagemap.h>
  #include <linux/anon_inodes.h>
+#include <linux/userfaultfd_k.h>
#include "kvm_mm.h" @@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
                return vmf_error(err);
        }
+ if (userfaultfd_minor(vmf->vma)) {
+               folio_unlock(folio);
+               folio_put(folio);
+               return handle_userfault(vmf, VM_UFFD_MINOR);
+       }

Staring at things like VM_FAULT_NEEDDSYNC, I'm wondering whether we could have a
new return value from ->fault that would indicate that
handle_userfault(vmf, VM_UFFD_MINOR) should be called.

Maybe some VM_FAULT_UFFD_MINOR or simply VM_FAULT_USERFAULTFD and we
can just derive that it is VM_UFFD_MINOR.


diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4f66a3206a63c..2cf17da880f0e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1601,6 +1601,8 @@ typedef __bitwise unsigned int vm_fault_t;
  *                             fsync() to complete (for synchronous page faults
  *                             in DAX)
  * @VM_FAULT_COMPLETED:                ->fault completed, meanwhile mmap lock 
released
+ * @VM_FAULT_USERFAULTFD:      ->fault did not modify page tables and needs
+ *                             handle_userfault() to complete
  * @VM_FAULT_HINDEX_MASK:      mask HINDEX value
  *
  */
@@ -1618,6 +1620,7 @@ enum vm_fault_reason {
        VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
        VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
        VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
+       VM_FAULT_USERFAULTFD    = (__force vm_fault_t)0x006000,
        VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
 };
@@ -1642,6 +1645,7 @@ enum vm_fault_reason {
        { VM_FAULT_FALLBACK,            "FALLBACK" },   \
        { VM_FAULT_DONE_COW,            "DONE_COW" },   \
        { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },  \
+       { VM_FAULT_USERFAULTFD,         "USERFAULTFD" },\
        { VM_FAULT_COMPLETED,           "COMPLETED" }
struct vm_special_mapping {


IIUC, we have exactly two invocations of ->fault(vmf) in memory.c where
we would have to handle it IIUC. And the return value would never leave
the core.

That way, we wouldn't have to export handle_userfault().

Just a thought ...

--
Cheers

David

Reply via email to