On embedded platforms a central process often allocates dma-buf
memory on behalf of client applications. Without a way to
attribute the charge to the requesting client's cgroup, the
cost lands on the allocator, making per-cgroup memory limits
ineffective for the actual consumers.
Add charge_pid_fd to struct dma_heap_allocation_data. When set to
a valid pidfd, DMA_HEAP_IOCTL_ALLOC resolves the target task's
memcg and charges the buffer there via mem_cgroup_charge_dmabuf()
inside dma_heap_buffer_alloc(). Without charge_pid_fd, and with
the mem_accounting module parameter enabled, the buffer is charged
to the allocator's own cgroup.
Additionally, commit 3c227be90659 ("dma-buf: system_heap: account for
system heap allocation in memcg") adds __GFP_ACCOUNT to system-heap
page allocations. Keeping __GFP_ACCOUNT would charge the same pages
twice (once to kmem, once to MEMCG_DMABUF), thus remove it and route
all accounting through a single MEMCG_DMABUF path.
Usage examples:
1. Central allocator charging to a client at allocation time.
The allocator knows the client's PID (e.g., from binder's
sender_pid) and uses pidfd to attribute the charge:
pid_t client_pid = txn->sender_pid;
int pidfd = pidfd_open(client_pid, 0);
struct dma_heap_allocation_data alloc = {
.len = buffer_size,
.fd_flags = O_RDWR | O_CLOEXEC,
.charge_pid_fd = pidfd,
};
ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc);
close(pidfd);
/* alloc.fd is now charged to client's cgroup */
2. Default allocation (no pidfd, mem_accounting=1).
When charge_pid_fd is not set and the mem_accounting module
parameter is enabled, the buffer is charged to the allocator's
own cgroup:
struct dma_heap_allocation_data alloc = {
.len = buffer_size,
.fd_flags = O_RDWR | O_CLOEXEC,
};
ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc);
/* charged to current process's cgroup */
Current limitations:
- Single-owner model: a dma-buf carries one memcg charge regardless of
how many processes share it. Means only the first owner (and exporter)
of the shared buffer bears the charge.
- Only memcg accounting supported. While this makes sense for system
heap buffers, other heaps (e.g., CMA heaps) will require selectively
charging also for the dmem controller.
Signed-off-by: Albert Esteve <[email protected]>
---
Documentation/admin-guide/cgroup-v2.rst | 5 ++--
drivers/dma-buf/dma-buf.c | 16 ++++---------
drivers/dma-buf/dma-heap.c | 42 ++++++++++++++++++++++++++++++---
drivers/dma-buf/heaps/system_heap.c | 2 --
include/uapi/linux/dma-heap.h | 6 +++++
5 files changed, 53 insertions(+), 18 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst
b/Documentation/admin-guide/cgroup-v2.rst
index 8bdbc2e866430..824d269531eb1 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1636,8 +1636,9 @@ The following nested keys are defined.
structures.
dmabuf (npn)
- Amount of memory used for exported DMA buffers allocated by the
cgroup.
- Stays with the allocating cgroup regardless of how the buffer
is shared.
+ Amount of memory used for exported DMA buffers allocated by or
on
+ behalf of the cgroup. Stays with the allocating cgroup
regardless
+ of how the buffer is shared.
workingset_refault_anon
Number of refaults of previously evicted anonymous pages.
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index ce02377f48908..23fb758b78297 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -181,8 +181,11 @@ static void dma_buf_release(struct dentry *dentry)
*/
BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
- mem_cgroup_uncharge_dmabuf(dmabuf->memcg, PAGE_ALIGN(dmabuf->size) /
PAGE_SIZE);
- mem_cgroup_put(dmabuf->memcg);
+ if (dmabuf->memcg) {
+ mem_cgroup_uncharge_dmabuf(dmabuf->memcg,
+ PAGE_ALIGN(dmabuf->size) / PAGE_SIZE);
+ mem_cgroup_put(dmabuf->memcg);
+ }
dmabuf->ops->release(dmabuf);
@@ -764,13 +767,6 @@ struct dma_buf *dma_buf_export(const struct
dma_buf_export_info *exp_info)
dmabuf->resv = resv;
}
- dmabuf->memcg = get_mem_cgroup_from_mm(current->mm);
- if (!mem_cgroup_charge_dmabuf(dmabuf->memcg, PAGE_ALIGN(dmabuf->size) /
PAGE_SIZE,
- GFP_KERNEL)) {
- ret = -ENOMEM;
- goto err_memcg;
- }
-
file->private_data = dmabuf;
file->f_path.dentry->d_fsdata = dmabuf;
dmabuf->file = file;
@@ -781,8 +777,6 @@ struct dma_buf *dma_buf_export(const struct
dma_buf_export_info *exp_info)
return dmabuf;
-err_memcg:
- mem_cgroup_put(dmabuf->memcg);
err_file:
fput(file);
err_module:
diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c
index ac5f8685a6494..ff6e259afcdc0 100644
--- a/drivers/dma-buf/dma-heap.c
+++ b/drivers/dma-buf/dma-heap.c
@@ -7,13 +7,17 @@
*/
#include <linux/cdev.h>
+#include <linux/cgroup.h>
#include <linux/device.h>
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
+#include <linux/memcontrol.h>
+#include <linux/sched/mm.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/list.h>
#include <linux/nospec.h>
+#include <linux/pidfd.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/xarray.h>
@@ -55,10 +59,12 @@ MODULE_PARM_DESC(mem_accounting,
"Enable cgroup-based memory accounting for dma-buf heap
allocations (default=false).");
static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len,
- u32 fd_flags,
- u64 heap_flags)
+ u32 fd_flags, u64 heap_flags,
+ struct mem_cgroup *charge_to)
{
struct dma_buf *dmabuf;
+ unsigned int nr_pages;
+ struct mem_cgroup *memcg = charge_to;
int fd;
/*
@@ -73,6 +79,22 @@ static int dma_heap_buffer_alloc(struct dma_heap *heap,
size_t len,
if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);
+ nr_pages = len / PAGE_SIZE;
+
+ if (memcg)
+ css_get(&memcg->css);
+ else if (mem_accounting)
+ memcg = get_mem_cgroup_from_mm(current->mm);
+
+ if (memcg) {
+ if (!mem_cgroup_charge_dmabuf(memcg, nr_pages, GFP_KERNEL)) {
+ mem_cgroup_put(memcg);
+ dma_buf_put(dmabuf);
+ return -ENOMEM;
+ }
+ dmabuf->memcg = memcg;
+ }
+
fd = dma_buf_fd(dmabuf, fd_flags);
if (fd < 0) {
dma_buf_put(dmabuf);
@@ -102,6 +124,9 @@ static long dma_heap_ioctl_allocate(struct file *file, void
*data)
{
struct dma_heap_allocation_data *heap_allocation = data;
struct dma_heap *heap = file->private_data;
+ struct mem_cgroup *memcg = NULL;
+ struct task_struct *task;
+ unsigned int pidfd_flags;
int fd;
if (heap_allocation->fd)
@@ -113,9 +138,20 @@ static long dma_heap_ioctl_allocate(struct file *file,
void *data)
if (heap_allocation->heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS)
return -EINVAL;
+ if (heap_allocation->charge_pid_fd) {
+ task = pidfd_get_task(heap_allocation->charge_pid_fd,
&pidfd_flags);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ memcg = get_mem_cgroup_from_mm(task->mm);
+ put_task_struct(task);
+ }
+
fd = dma_heap_buffer_alloc(heap, heap_allocation->len,
heap_allocation->fd_flags,
- heap_allocation->heap_flags);
+ heap_allocation->heap_flags,
+ memcg);
+ mem_cgroup_put(memcg);
if (fd < 0)
return fd;
diff --git a/drivers/dma-buf/heaps/system_heap.c
b/drivers/dma-buf/heaps/system_heap.c
index 03c2b87cb1112..95d7688167b93 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -385,8 +385,6 @@ static struct page *alloc_largest_available(unsigned long
size,
if (max_order < orders[i])
continue;
flags = order_flags[i];
- if (mem_accounting)
- flags |= __GFP_ACCOUNT;
page = alloc_pages(flags, orders[i]);
if (!page)
continue;
diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h
index a4cf716a49fa6..e02b0f8cbc6a1 100644
--- a/include/uapi/linux/dma-heap.h
+++ b/include/uapi/linux/dma-heap.h
@@ -29,6 +29,10 @@
* handle to the allocated dma-buf
* @fd_flags: file descriptor flags used when allocating
* @heap_flags: flags passed to heap
+ * @charge_pid_fd: optional pidfd of the process whose cgroup should be
+ * charged for this allocation; 0 means charge the calling
+ * process's cgroup
+ * @__padding: reserved, must be zero
*
* Provided by userspace as an argument to the ioctl
*/
@@ -37,6 +41,8 @@ struct dma_heap_allocation_data {
__u32 fd;
__u32 fd_flags;
__u64 heap_flags;
+ __u32 charge_pid_fd;
+ __u32 __padding;
};
#define DMA_HEAP_IOC_MAGIC 'H'
--
2.53.0