On Fri, Feb 27, 2026 at 1:49 PM Albert Esteve <[email protected]> wrote: > > Hi Maxime! > > On Thu, Feb 26, 2026 at 11:12 AM Maxime Ripard <[email protected]> wrote: > > > > Hi Albert, > > > > Thanks for your patch! > > > > On Tue, Feb 24, 2026 at 08:57:33AM +0100, Albert Esteve wrote: > > > Add a dma-buf heap for DT coherent reserved-memory > > > (i.e., 'shared-dma-pool' without 'reusable' property), > > > exposing one heap per region for userspace buffers. > > > > > > The heap binds a synthetic platform device to each region > > > so coherent allocations use the correct dev->dma_mem, > > > and it defers registration until late_initcall when > > > normal allocator are available. > > > > > > This patch includes charging of the coherent heap > > > allocator to the dmem cgroup. > > > > > > Signed-off-by: Albert Esteve <[email protected]> > > > --- > > > This patch introduces a new driver to expose DT coherent reserved-memory > > > regions as dma-buf heaps, allowing userspace buffers to be created. > > > > > > Since these regions are device-dependent, we bind a synthetic platform > > > device to each region so coherent allocations use the correct > > > dev->dma_mem. > > > > > > Following Eric’s [1] and Maxime’s [2] work on charging DMA buffers > > > allocated from userspace to cgroups (dmem), this patch adds the same > > > charging pattern used by CMA heaps patch. Charging is done only through > > > the dma-buf heap interface so it can be attributed to a userspace > > > allocator. > > > > > > This allows each device-specific reserved-memory region to enforce its > > > own limits. > > > > > > [1] > > > https://lore.kernel.org/all/[email protected]/ > > > [2] > > > https://lore.kernel.org/all/[email protected]/ > > > --- > > > drivers/dma-buf/heaps/Kconfig | 17 ++ > > > drivers/dma-buf/heaps/Makefile | 1 + > > > drivers/dma-buf/heaps/coherent_heap.c | 485 > > > ++++++++++++++++++++++++++++++++++ > > > include/linux/dma-heap.h | 11 + > > > kernel/dma/coherent.c | 9 + > > > 5 files changed, 523 insertions(+) > > > > > > diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig > > > index a5eef06c42264..93765dca164e3 100644 > > > --- a/drivers/dma-buf/heaps/Kconfig > > > +++ b/drivers/dma-buf/heaps/Kconfig > > > @@ -12,3 +12,20 @@ config DMABUF_HEAPS_CMA > > > Choose this option to enable dma-buf CMA heap. This heap is backed > > > by the Contiguous Memory Allocator (CMA). If your system has these > > > regions, you should say Y here. > > > + > > > +config DMABUF_HEAPS_COHERENT > > > + bool "DMA-BUF Coherent Reserved-Memory Heap" > > > + depends on DMABUF_HEAPS && OF_RESERVED_MEM && DMA_DECLARE_COHERENT > > > + help > > > + Choose this option to enable coherent reserved-memory dma-buf > > > heaps. > > > + This heap is backed by non-reusable DT "shared-dma-pool" regions. > > > + If your system defines coherent reserved-memory regions, you > > > should > > > + say Y here. > > > + > > > +config COHERENT_AREAS_DEFERRED > > > + int "Max deferred coherent reserved-memory regions" > > > + depends on DMABUF_HEAPS_COHERENT > > > + default 16 > > > + help > > > + Maximum number of coherent reserved-memory regions that can be > > > + deferred for later registration during early boot. > > > diff --git a/drivers/dma-buf/heaps/Makefile > > > b/drivers/dma-buf/heaps/Makefile > > > index 974467791032f..96bda7a65f041 100644 > > > --- a/drivers/dma-buf/heaps/Makefile > > > +++ b/drivers/dma-buf/heaps/Makefile > > > @@ -1,3 +1,4 @@ > > > # SPDX-License-Identifier: GPL-2.0 > > > obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o > > > obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o > > > +obj-$(CONFIG_DMABUF_HEAPS_COHERENT) += coherent_heap.o > > > diff --git a/drivers/dma-buf/heaps/coherent_heap.c > > > b/drivers/dma-buf/heaps/coherent_heap.c > > > new file mode 100644 > > > index 0000000000000..870b2b89aefcb > > > --- /dev/null > > > +++ b/drivers/dma-buf/heaps/coherent_heap.c > > > @@ -0,0 +1,485 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > +/* > > > + * DMABUF heap for coherent reserved-memory regions > > > + * > > > + * Copyright (C) 2026 Red Hat, Inc. > > > + * Author: Albert Esteve <[email protected]> > > > + * > > > + */ > > > + > > > +#include <linux/cgroup_dmem.h> > > > +#include <linux/dma-heap.h> > > > +#include <linux/dma-buf.h> > > > +#include <linux/dma-mapping.h> > > > +#include <linux/err.h> > > > +#include <linux/highmem.h> > > > +#include <linux/iosys-map.h> > > > +#include <linux/of_reserved_mem.h> > > > +#include <linux/platform_device.h> > > > +#include <linux/scatterlist.h> > > > +#include <linux/slab.h> > > > +#include <linux/vmalloc.h> > > > + > > > +#define DEFERRED_AREAS_MAX CONFIG_COHERENT_AREAS_DEFERRED > > > > I'm not sure we need to make it configurable. Distros are going to set > > it to the user with the highest number of regions anyway. How about > > using MAX_RESERVED_REGIONS for now? > > Makes sense, will do. > > > > > > > > > [...] > > > > > > +struct coherent_heap { > > > + struct dma_heap *heap; > > > + struct reserved_mem *rmem; > > > + char *name; > > > + struct device *dev; > > > + struct platform_device *pdev; > > > +#if IS_ENABLED(CONFIG_CGROUP_DMEM) > > > + struct dmem_cgroup_region *cg; > > > +#endif > > > > We might want to leave the dmem accounting out for now so we can focus > > on the heap itself. That being said, it ended up being pretty trivial > > for CMA, so maybe it's not too much of a concern. > > Sure. That allows us to follow the same patterns once the CMA series lands. > I will strip all dmem accounting parts for v2. > > > > > > > > > [...] > > > > > > +static int __coherent_heap_register(struct reserved_mem *rmem) > > > +{ > > > + struct dma_heap_export_info exp_info; > > > + struct coherent_heap *coh_heap; > > > +#if IS_ENABLED(CONFIG_CGROUP_DMEM) > > > + struct dmem_cgroup_region *region; > > > +#endif > > > + const char *rmem_name; > > > + int ret; > > > + > > > + if (!rmem) > > > + return -EINVAL; > > > + > > > + rmem_name = rmem->name ? rmem->name : "unknown"; > > > > If the reserved region has no name, we probably shouldn't expose it to > > userspace at all. Using unknown will probably create some bugs if you > > have several, but also it's pretty like to have a name at some point and > > thus we wouldn't have a stable name for the uAPI. > > Agreed. I will return an error code if no name is present. > > > > > > + coh_heap = kzalloc_obj(*coh_heap); > > > + if (!coh_heap) > > > + return -ENOMEM; > > > + > > > + coh_heap->name = kasprintf(GFP_KERNEL, "coherent_%s", rmem_name); > > > + if (!coh_heap->name) { > > > + ret = -ENOMEM; > > > + goto free_coherent_heap; > > > + } > > > > Similarly, we shouldn't use the coherent_ prefix for the heap name. If > > the backing allocator ever changes (and between contiguous and coherent, > > the difference is just a single property value in the DT), then the name > > would change and userspace would break. We should directly use the name > > of the region here. > > > > > + coh_heap->rmem = rmem; > > > + > > > + /* create a platform device per rmem and bind it */ > > > + coh_heap->pdev = platform_device_register_simple("coherent-heap", > > > + > > > PLATFORM_DEVID_AUTO, > > > + NULL, 0); > > > + if (IS_ERR(coh_heap->pdev)) { > > > + ret = PTR_ERR(coh_heap->pdev); > > > + goto free_name; > > > + } > > > > We probably should use a faux_device here instead of a platform_device, > > but more importantly, the heap itself already has a device allocated for > > it (dev_ret in dma_heap_add). > > > > It's not in struct dma_heap yet, but there's a patch that moves it there > > that we should probably carry: > > https://lore.kernel.org/r/[email protected]/ > > Thanks for sharing the link! I will pick the patch. > > > > > > + if (rmem->ops && rmem->ops->device_init) { > > > + ret = rmem->ops->device_init(rmem, &coh_heap->pdev->dev); > > > + if (ret) > > > + goto pdev_unregister; > > > + } > > > > I'm not really a fan of calling ops directly. Maybe we should create an > > of_reserved_mem_device_init_with_mem function that would do it for us > > (and would be called by of_reserved_mem_device_init_by_idx and the > > likes). > > Agreed. > > > > > > + coh_heap->dev = &coh_heap->pdev->dev; > > > +#if IS_ENABLED(CONFIG_CGROUP_DMEM) > > > + region = dmem_cgroup_register_region(rmem->size, "coh/%s", > > > rmem_name); > > > + if (IS_ERR(region)) { > > > + ret = PTR_ERR(region); > > > + goto pdev_unregister; > > > + } > > > + coh_heap->cg = region; > > > +#endif > > > > Same comment than for CMA here: it should really be created by the > > coherent memory region itself. > > > > > + exp_info.name = coh_heap->name; > > > + exp_info.ops = &coherent_heap_ops; > > > + exp_info.priv = coh_heap; > > > + > > > + coh_heap->heap = dma_heap_add(&exp_info); > > > + if (IS_ERR(coh_heap->heap)) { > > > + ret = PTR_ERR(coh_heap->heap); > > > + goto cg_unregister; > > > + } > > > + > > > + return 0; > > > + > > > +cg_unregister: > > > +#if IS_ENABLED(CONFIG_CGROUP_DMEM) > > > + dmem_cgroup_unregister_region(coh_heap->cg); > > > +#endif > > > +pdev_unregister: > > > + platform_device_unregister(coh_heap->pdev); > > > + coh_heap->pdev = NULL; > > > +free_name: > > > + kfree(coh_heap->name); > > > +free_coherent_heap: > > > + kfree(coh_heap); > > > + > > > + return ret; > > > +} > > > + > > > +int dma_heap_coherent_register(struct reserved_mem *rmem) > > > +{ > > > + int ret; > > > + > > > + ret = __coherent_heap_register(rmem); > > > + if (ret == -ENOMEM) > > > + return coherent_heap_add_deferred(rmem); > > > + return ret; > > > +} > > > > I appreciate you did it like we did for CMA, but if we ever want to make > > that heap a module you'll end up with a dependency from the core kernel > > on a module which doesn't work. The best here might be to wait a bit > > until we have somewhat of an agreement on > > > > https://lore.kernel.org/r/[email protected] > > > > > +static int __init coherent_heap_register_deferred(void) > > > +{ > > > + unsigned int i; > > > + int ret; > > > + > > > + for (i = 0; i < rmem_areas_deferred_num; i++) { > > > + struct reserved_mem *rmem = rmem_areas_deferred[i]; > > > + > > > + ret = __coherent_heap_register(rmem); > > > + if (ret) { > > > + pr_warn("Failed to add coherent heap %s", > > > + rmem->name ? rmem->name : "unknown"); > > > + continue; > > > + } > > > + } > > > + > > > + return 0; > > > +} > > > +late_initcall(coherent_heap_register_deferred); > > > > Why do you need a late_initcall here? Isn't module_init enough? > > When I tested this initially, I relied on direct invocations from > cma/coherent.c to register new coherent heap areas. However it failed > in `kzalloc_obj` calls within the register function. Then I read the > article about boot time memory management[1] and saw other drivers > collected info for deferred initialization at late_initcall(), similar > to what I tried to do here. I honestly did not try with module_init(). > Since I will refactor this part to follow your previous comments, I > will try to update if possible. > > [1] https://docs.kernel.org/core-api/boot-time-mm.html > > > > > > +MODULE_DESCRIPTION("DMA-BUF heap for coherent reserved-memory regions"); > > > diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h > > > index 648328a64b27e..e894cfa1ecf1a 100644 > > > --- a/include/linux/dma-heap.h > > > +++ b/include/linux/dma-heap.h > > > @@ -9,9 +9,11 @@ > > > #ifndef _DMA_HEAPS_H > > > #define _DMA_HEAPS_H > > > > > > +#include <linux/errno.h> > > > #include <linux/types.h> > > > > > > struct dma_heap; > > > +struct reserved_mem; > > > > > > /** > > > * struct dma_heap_ops - ops to operate on a given heap > > > @@ -48,4 +50,13 @@ struct dma_heap *dma_heap_add(const struct > > > dma_heap_export_info *exp_info); > > > > > > extern bool mem_accounting; > > > > > > +#if IS_ENABLED(CONFIG_DMABUF_HEAPS_COHERENT) > > > +int dma_heap_coherent_register(struct reserved_mem *rmem); > > > +#else > > > +static inline int dma_heap_coherent_register(struct reserved_mem *rmem) > > > +{ > > > + return -EOPNOTSUPP; > > > +} > > > +#endif > > > + > > > #endif /* _DMA_HEAPS_H */ > > > diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c > > > index 1147497bc512c..f49d13e460e4b 100644 > > > --- a/kernel/dma/coherent.c > > > +++ b/kernel/dma/coherent.c > > > @@ -9,6 +9,7 @@ > > > #include <linux/module.h> > > > #include <linux/dma-direct.h> > > > #include <linux/dma-map-ops.h> > > > +#include <linux/dma-heap.h> > > > > > > struct dma_coherent_mem { > > > void *virt_base; > > > @@ -393,6 +394,14 @@ static int __init rmem_dma_setup(struct reserved_mem > > > *rmem) > > > rmem->ops = &rmem_dma_ops; > > > pr_info("Reserved memory: created DMA memory pool at %pa, size %ld > > > MiB\n", > > > &rmem->base, (unsigned long)rmem->size / SZ_1M); > > > + > > > + if (IS_ENABLED(CONFIG_DMABUF_HEAPS_COHERENT)) { > > > + int ret = dma_heap_coherent_register(rmem); > > > + > > > + if (ret) > > > + pr_warn("Reserved memory: failed to register > > > coherent heap for %s (%d)\n", > > > + rmem->name ? rmem->name : "unknown", ret); > > > + } > > > > I think this should be split into a patch of its own. It's going to be > > reviewed (and possibly merged) by another maintainer, through another > > tree. > > That's fine. I will split it into another series then. I guess I can > send it with a Based-On: tag or similar to link two series together?
Oof, never mind. I see now that the suggestion is to split into a patch of its own, not a different series. I misread it last time. That makes more sense now. > > BR, > Albert > > > > > Maxime
