> Date: Fri, 1 Jan 2021 20:10:45 +1100
> From: Jonathan Gray <[email protected]>

New diff at the end of this mail.  Please test this one instead of the
previous one.

> On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:
> > The diff below changes the emulated Linux memory allocation functions
> > a bit such that they only use malloc(9) for allocations smaller than a
> > page.  This reduces pressure on the "interrupt safe" map and hopefully
> > will avoid the
> > 
> >     uvm_mapent_alloc: out of static map entries
> > 
> > messages that some people have seen more often in the last few months.
> > It also could help with some memory allocation issues seem by
> > amdgpu(4) users.
> > 
> > The downside of this approach is that memory leaks will be harder to
> > spot as the larger memory allocations are no longer included in the
> > DRM type as reported by vmstat -m.  Another downside is that this no
> > longer caps the amount of kernel memory that can be allocated by
> > drm(4).  If that turns out to be a problem, we can impose a limit on
> > the amount of memory that can be allocated this way.
> > 
> > The implementation needs to keep track of the size of each allocated
> > memory block.  This is done using a red-black tree.  Our kernel uses
> > red-black trees in similar situations but I wouldn't call myself an
> > expert in the area of data structures so a there may be a better
> > approach.
> > 
> > Some real-life testing would be appreciated.
> > 
> 
> Thanks for looking into this, I'm interested to hear how people go with
> it.  Some comments inline below.
> 
> > 
> > Index: dev/pci/drm/drm_linux.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > retrieving revision 1.74
> > diff -u -p -r1.74 drm_linux.c
> > --- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -0000      1.74
> > +++ dev/pci/drm/drm_linux.c 31 Dec 2020 20:44:38 -0000
> > @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
> >     return (num);
> >  }
> >  
> > +struct vmalloc_entry {
> > +   const void      *addr;
> > +   size_t          size;
> > +   RBT_ENTRY(vmalloc_entry) vmalloc_node;
> > +};
> > +
> > +struct pool vmalloc_pool;
> > +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> > +
> > +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +static inline int
> > +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry 
> > *b)
> > +{
> > +   vaddr_t va = (vaddr_t)a->addr;
> > +   vaddr_t vb = (vaddr_t)b->addr;
> > +
> > +   return va < vb ? -1 : va > vb;
> > +}
> > +
> > +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +bool
> > +is_vmalloc_addr(const void *addr)
> > +{
> > +   struct vmalloc_entry key;
> > +   struct vmalloc_entry *entry;
> > +
> > +   key.addr = addr;
> > +   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > +   return (entry != NULL);
> > +}
> > +
> > +void *
> > +vmalloc(unsigned long size)
> > +{
> > +   struct vmalloc_entry *entry;
> > +   void *addr;
> > +
> > +   size = roundup(size, PAGE_SIZE);
> 
> this could just be round_page(size)

Sure.

> > +   addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> > +   if (addr) {
> > +           entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > +           entry->addr = addr;
> > +           entry->size = size;
> > +           RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > +   }
> > +
> > +   return addr;
> > +}
> > +
> > +void *
> > +vzalloc(unsigned long size)
> > +{
> > +   struct vmalloc_entry *entry;
> > +   void *addr;
> > +
> > +   size = roundup(size, PAGE_SIZE);
> > +   addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> > +   if (addr) {
> > +           entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > +           entry->addr = addr;
> > +           entry->size = size;
> > +           RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > +   }
> > +
> > +   return addr;
> > +}
> > +
> > +void
> > +vfree(const void *addr)
> > +{
> > +   struct vmalloc_entry key;
> > +   struct vmalloc_entry *entry;
> > +
> > +   key.addr = addr;
> > +   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > +   if (entry == NULL)
> > +           panic("%s: non vmalloced addr %p", __func__, addr);
> > +
> > +   RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> > +   km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> > +   pool_put(&vmalloc_pool, entry);
> > +}
> > +
> > +void *
> > +kvmalloc(size_t size, gfp_t flags)
> > +{
> > +   if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > +           return malloc(size, M_DRM, flags);
> > +   return vmalloc(size);
> > +}
> > +
> > +void *
> > +kvzalloc(size_t size, gfp_t flags)
> > +{
> > +   if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > +           return malloc(size, M_DRM, flags | M_ZERO);
> > +   return vzalloc(size);
> > +}
> 
> these should be (flags & GFP_KERNEL) != GFP_KERNEL to handle calls like
> 
> ttm->pages = kvmalloc_array(ttm->num_pages, sizeof(void*),
>     GFP_KERNEL | __GFP_ZERO);

Actually that wouldn't work.  But this points out that we kvmalloc()
needs to handle __GFP_ZERO properly.  So a slightly different approach
down below.  If the flags include M_NOWAIT or if the size is less of a
page go down the malloc path.  Otherwise call vmalloc() or kzalloc()
based on the M_ZERO flag.  kvzalloc() becomes a simple wrapper around
kvmalloc() that sets the M_ZERO flag.


Index: dev/pci/drm/drm_linux.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.74
diff -u -p -r1.74 drm_linux.c
--- dev/pci/drm/drm_linux.c     31 Dec 2020 06:31:55 -0000      1.74
+++ dev/pci/drm/drm_linux.c     1 Jan 2021 14:27:39 -0000
@@ -430,6 +430,111 @@ dmi_check_system(const struct dmi_system
        return (num);
 }
 
+struct vmalloc_entry {
+       const void      *addr;
+       size_t          size;
+       RBT_ENTRY(vmalloc_entry) vmalloc_node;
+};
+
+struct pool vmalloc_pool;
+RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
+
+RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
+
+static inline int
+vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry *b)
+{
+       vaddr_t va = (vaddr_t)a->addr;
+       vaddr_t vb = (vaddr_t)b->addr;
+
+       return va < vb ? -1 : va > vb;
+}
+
+RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
+
+bool
+is_vmalloc_addr(const void *addr)
+{
+       struct vmalloc_entry key;
+       struct vmalloc_entry *entry;
+
+       key.addr = addr;
+       entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
+       return (entry != NULL);
+}
+
+void *
+vmalloc(unsigned long size)
+{
+       struct vmalloc_entry *entry;
+       void *addr;
+
+       size = round_page(size);
+       addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
+       if (addr) {
+               entry = pool_get(&vmalloc_pool, PR_WAITOK);
+               entry->addr = addr;
+               entry->size = size;
+               RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
+       }
+
+       return addr;
+}
+
+void *
+vzalloc(unsigned long size)
+{
+       struct vmalloc_entry *entry;
+       void *addr;
+
+       size = round_page(size);
+       addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
+       if (addr) {
+               entry = pool_get(&vmalloc_pool, PR_WAITOK);
+               entry->addr = addr;
+               entry->size = size;
+               RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
+       }
+
+       return addr;
+}
+
+void
+vfree(const void *addr)
+{
+       struct vmalloc_entry key;
+       struct vmalloc_entry *entry;
+
+       key.addr = addr;
+       entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
+       if (entry == NULL)
+               panic("%s: non vmalloced addr %p", __func__, addr);
+
+       RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
+       km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
+       pool_put(&vmalloc_pool, entry);
+}
+
+void *
+kvmalloc(size_t size, gfp_t flags)
+{
+       if ((flags & M_NOWAIT) || size < PAGE_SIZE)
+               return malloc(size, M_DRM, flags);
+       if (flags & M_ZERO)
+               return vzalloc(size);
+       else
+               return vmalloc(size);
+}
+
+void
+kvfree(const void *addr)
+{
+       if (is_vmalloc_addr(addr))
+               vfree(addr);
+       else
+               free((void *)addr, M_DRM, 0);
+}
+
 struct vm_page *
 alloc_pages(unsigned int gfp_mask, unsigned int order)
 {
@@ -1939,6 +2044,10 @@ drm_linux_init(void)
 
        pool_init(&idr_pool, sizeof(struct idr_entry), 0, IPL_TTY, 0,
            "idrpl", NULL);
+
+       pool_init(&vmalloc_pool, sizeof(struct vmalloc_entry), 0, IPL_NONE, 0,
+           "vmallocpl", NULL);
+       RBT_INIT(vmalloc_tree, &vmalloc_tree);
 }
 
 void
Index: dev/pci/drm/include/linux/mm.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/include/linux/mm.h,v
retrieving revision 1.6
diff -u -p -r1.6 mm.h
--- dev/pci/drm/include/linux/mm.h      7 Dec 2020 09:10:42 -0000       1.6
+++ dev/pci/drm/include/linux/mm.h      1 Jan 2021 14:27:39 -0000
@@ -30,38 +30,31 @@
 #define PFN_DOWN(x)            ((x) >> PAGE_SHIFT)
 #define PFN_PHYS(x)            ((x) << PAGE_SHIFT)
 
-#define is_vmalloc_addr(ptr)   true
+bool is_vmalloc_addr(const void *);
 
-static inline void *
-kvmalloc(size_t size, gfp_t flags)
-{
-       return malloc(size, M_DRM, flags);
-}
+void *kvmalloc(size_t, gfp_t);
+void kvfree(const void *);
 
 static inline void *
-kvmalloc_array(size_t n, size_t size, int flags)
+kvmalloc_array(size_t n, size_t size, gfp_t flags)
 {
        if (n != 0 && SIZE_MAX / n < size)
                return NULL;
-       return malloc(n * size, M_DRM, flags);
+       return kvmalloc(n * size, flags);
 }
 
 static inline void *
-kvcalloc(size_t n, size_t size, int flags)
+kvcalloc(size_t n, size_t size, gfp_t flags)
 {
-       return kvmalloc_array(n, size, flags | M_ZERO);
+       if (n != 0 && SIZE_MAX / n < size)
+               return NULL;
+       return kvmalloc(n * size, flags | M_ZERO);
 }
 
 static inline void *
-kvzalloc(size_t size, int flags)
-{
-       return malloc(size, M_DRM, flags | M_ZERO);
-}
-
-static inline void
-kvfree(const void *objp)
+kvzalloc(size_t size, gfp_t flags)
 {
-       free((void *)objp, M_DRM, 0);
+       return kvmalloc(size, flags | M_ZERO);
 }
 
 static inline long
Index: dev/pci/drm/include/linux/vmalloc.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/include/linux/vmalloc.h,v
retrieving revision 1.1
diff -u -p -r1.1 vmalloc.h
--- dev/pci/drm/include/linux/vmalloc.h 14 Apr 2019 10:14:53 -0000      1.1
+++ dev/pci/drm/include/linux/vmalloc.h 1 Jan 2021 14:27:39 -0000
@@ -25,25 +25,10 @@
 #include <linux/overflow.h>
 
 void   *vmap(struct vm_page **, unsigned int, unsigned long, pgprot_t);
-void    vunmap(void *, size_t);
-
-static inline void *
-vmalloc(unsigned long size)
-{
-       return malloc(size, M_DRM, M_WAITOK | M_CANFAIL);
-}
-
-static inline void *
-vzalloc(unsigned long size)
-{
-       return malloc(size, M_DRM, M_WAITOK | M_CANFAIL | M_ZERO);
-}
-
-static inline void
-vfree(void *objp)
-{
-       free(objp, M_DRM, 0);
-}
+void   vunmap(void *, size_t);
 
+void   *vmalloc(unsigned long);
+void   *vzalloc(unsigned long);
+void   vfree(const void *);
 
 #endif

Reply via email to