On embedded platforms a central process often allocates dma-buf
memory on behalf of client applications. Without a way to
attribute the charge to the requesting client's cgroup, the
cost lands on the allocator, making per-cgroup memory limits
ineffective for the actual consumers.

Add charge_pid_fd to struct dma_heap_allocation_data. When set to
a valid pidfd, DMA_HEAP_IOCTL_ALLOC resolves the target task's
memcg and charges the buffer there via mem_cgroup_charge_dmabuf()
inside dma_heap_buffer_alloc(). Without charge_pid_fd, and with
the mem_accounting module parameter enabled, the buffer is charged
to the allocator's own cgroup.

Additionally, commit 3c227be90659 ("dma-buf: system_heap: account for
system heap allocation in memcg") adds __GFP_ACCOUNT to system-heap
page allocations. Keeping __GFP_ACCOUNT would charge the same pages
twice (once to kmem, once to MEMCG_DMABUF), thus remove it and route
all accounting through a single MEMCG_DMABUF path.

Usage examples:

  1. Central allocator charging to a client at allocation time.
     The allocator knows the client's PID (e.g., from binder's
     sender_pid) and uses pidfd to attribute the charge:

       pid_t client_pid = txn->sender_pid;
       int pidfd = pidfd_open(client_pid, 0);

       struct dma_heap_allocation_data alloc = {
           .len             = buffer_size,
           .fd_flags        = O_RDWR | O_CLOEXEC,
           .charge_pid_fd   = pidfd,
       };
       ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc);
       close(pidfd);
       /* alloc.fd is now charged to client's cgroup */

  2. Default allocation (no pidfd, mem_accounting=1).
     When charge_pid_fd is not set and the mem_accounting module
     parameter is enabled, the buffer is charged to the allocator's
     own cgroup:

       struct dma_heap_allocation_data alloc = {
           .len      = buffer_size,
           .fd_flags = O_RDWR | O_CLOEXEC,
       };
       ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc);
       /* charged to current process's cgroup */

Current limitations:

 - Single-owner model: a dma-buf carries one memcg charge regardless of
   how many processes share it. Means only the first owner (and exporter)
   of the shared buffer bears the charge.
 - Only memcg accounting supported. While this makes sense for system
   heap buffers, other heaps (e.g., CMA heaps) will require selectively
   charging also for the dmem controller.

Signed-off-by: Albert Esteve <[email protected]>
---
 Documentation/admin-guide/cgroup-v2.rst |  5 ++--
 drivers/dma-buf/dma-buf.c               | 16 ++++---------
 drivers/dma-buf/dma-heap.c              | 42 ++++++++++++++++++++++++++++++---
 drivers/dma-buf/heaps/system_heap.c     |  2 --
 include/uapi/linux/dma-heap.h           |  6 +++++
 5 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 8bdbc2e866430..824d269531eb1 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1636,8 +1636,9 @@ The following nested keys are defined.
                structures.
 
          dmabuf (npn)
-               Amount of memory used for exported DMA buffers allocated by the 
cgroup.
-               Stays with the allocating cgroup regardless of how the buffer 
is shared.
+               Amount of memory used for exported DMA buffers allocated by or 
on
+               behalf of the cgroup. Stays with the allocating cgroup 
regardless
+               of how the buffer is shared.
 
          workingset_refault_anon
                Number of refaults of previously evicted anonymous pages.
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index ce02377f48908..23fb758b78297 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -181,8 +181,11 @@ static void dma_buf_release(struct dentry *dentry)
         */
        BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
 
-       mem_cgroup_uncharge_dmabuf(dmabuf->memcg, PAGE_ALIGN(dmabuf->size) / 
PAGE_SIZE);
-       mem_cgroup_put(dmabuf->memcg);
+       if (dmabuf->memcg) {
+               mem_cgroup_uncharge_dmabuf(dmabuf->memcg,
+                                         PAGE_ALIGN(dmabuf->size) / PAGE_SIZE);
+               mem_cgroup_put(dmabuf->memcg);
+       }
 
        dmabuf->ops->release(dmabuf);
 
@@ -764,13 +767,6 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
                dmabuf->resv = resv;
        }
 
-       dmabuf->memcg = get_mem_cgroup_from_mm(current->mm);
-       if (!mem_cgroup_charge_dmabuf(dmabuf->memcg, PAGE_ALIGN(dmabuf->size) / 
PAGE_SIZE,
-                                     GFP_KERNEL)) {
-               ret = -ENOMEM;
-               goto err_memcg;
-       }
-
        file->private_data = dmabuf;
        file->f_path.dentry->d_fsdata = dmabuf;
        dmabuf->file = file;
@@ -781,8 +777,6 @@ struct dma_buf *dma_buf_export(const struct 
dma_buf_export_info *exp_info)
 
        return dmabuf;
 
-err_memcg:
-       mem_cgroup_put(dmabuf->memcg);
 err_file:
        fput(file);
 err_module:
diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c
index ac5f8685a6494..ff6e259afcdc0 100644
--- a/drivers/dma-buf/dma-heap.c
+++ b/drivers/dma-buf/dma-heap.c
@@ -7,13 +7,17 @@
  */
 
 #include <linux/cdev.h>
+#include <linux/cgroup.h>
 #include <linux/device.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-heap.h>
+#include <linux/memcontrol.h>
+#include <linux/sched/mm.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/list.h>
 #include <linux/nospec.h>
+#include <linux/pidfd.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/xarray.h>
@@ -55,10 +59,12 @@ MODULE_PARM_DESC(mem_accounting,
                 "Enable cgroup-based memory accounting for dma-buf heap 
allocations (default=false).");
 
 static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len,
-                                u32 fd_flags,
-                                u64 heap_flags)
+                                u32 fd_flags, u64 heap_flags,
+                                struct mem_cgroup *charge_to)
 {
        struct dma_buf *dmabuf;
+       unsigned int nr_pages;
+       struct mem_cgroup *memcg = charge_to;
        int fd;
 
        /*
@@ -73,6 +79,22 @@ static int dma_heap_buffer_alloc(struct dma_heap *heap, 
size_t len,
        if (IS_ERR(dmabuf))
                return PTR_ERR(dmabuf);
 
+       nr_pages = len / PAGE_SIZE;
+
+       if (memcg)
+               css_get(&memcg->css);
+       else if (mem_accounting)
+               memcg = get_mem_cgroup_from_mm(current->mm);
+
+       if (memcg) {
+               if (!mem_cgroup_charge_dmabuf(memcg, nr_pages, GFP_KERNEL)) {
+                       mem_cgroup_put(memcg);
+                       dma_buf_put(dmabuf);
+                       return -ENOMEM;
+               }
+               dmabuf->memcg = memcg;
+       }
+
        fd = dma_buf_fd(dmabuf, fd_flags);
        if (fd < 0) {
                dma_buf_put(dmabuf);
@@ -102,6 +124,9 @@ static long dma_heap_ioctl_allocate(struct file *file, void 
*data)
 {
        struct dma_heap_allocation_data *heap_allocation = data;
        struct dma_heap *heap = file->private_data;
+       struct mem_cgroup *memcg = NULL;
+       struct task_struct *task;
+       unsigned int pidfd_flags;
        int fd;
 
        if (heap_allocation->fd)
@@ -113,9 +138,20 @@ static long dma_heap_ioctl_allocate(struct file *file, 
void *data)
        if (heap_allocation->heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS)
                return -EINVAL;
 
+       if (heap_allocation->charge_pid_fd) {
+               task = pidfd_get_task(heap_allocation->charge_pid_fd, 
&pidfd_flags);
+               if (IS_ERR(task))
+                       return PTR_ERR(task);
+
+               memcg = get_mem_cgroup_from_mm(task->mm);
+               put_task_struct(task);
+       }
+
        fd = dma_heap_buffer_alloc(heap, heap_allocation->len,
                                   heap_allocation->fd_flags,
-                                  heap_allocation->heap_flags);
+                                  heap_allocation->heap_flags,
+                                  memcg);
+       mem_cgroup_put(memcg);
        if (fd < 0)
                return fd;
 
diff --git a/drivers/dma-buf/heaps/system_heap.c 
b/drivers/dma-buf/heaps/system_heap.c
index 03c2b87cb1112..95d7688167b93 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -385,8 +385,6 @@ static struct page *alloc_largest_available(unsigned long 
size,
                if (max_order < orders[i])
                        continue;
                flags = order_flags[i];
-               if (mem_accounting)
-                       flags |= __GFP_ACCOUNT;
                page = alloc_pages(flags, orders[i]);
                if (!page)
                        continue;
diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h
index a4cf716a49fa6..e02b0f8cbc6a1 100644
--- a/include/uapi/linux/dma-heap.h
+++ b/include/uapi/linux/dma-heap.h
@@ -29,6 +29,10 @@
  *                     handle to the allocated dma-buf
  * @fd_flags:          file descriptor flags used when allocating
  * @heap_flags:                flags passed to heap
+ * @charge_pid_fd:     optional pidfd of the process whose cgroup should be
+ *                     charged for this allocation; 0 means charge the calling
+ *                     process's cgroup
+ * @__padding:         reserved, must be zero
  *
  * Provided by userspace as an argument to the ioctl
  */
@@ -37,6 +41,8 @@ struct dma_heap_allocation_data {
        __u32 fd;
        __u32 fd_flags;
        __u64 heap_flags;
+       __u32 charge_pid_fd;
+       __u32 __padding;
 };
 
 #define DMA_HEAP_IOC_MAGIC             'H'

-- 
2.53.0


Reply via email to