On Tue, Feb 24, 2026 at 12:39:00AM +0530, Ekansh Gupta wrote: > Introduce a per-device memory manager for the QDA driver that tracks > IOMMU-capable compute context-bank (CB) devices. Each CB device is > represented by a qda_iommu_device and registered with a central > qda_memory_manager instance owned by qda_dev. >
The name makes me expect that this manages memory, but it seems to manage devices and context banks... > The memory manager maintains an xarray of devices and assigns a > unique ID to each CB. It also provides basic lifetime management > and a workqueue for deferred device removal. qda_cb_setup_device() > now allocates a qda_iommu_device for each CB and registers it with > the memory manager after DMA configuration succeeds. > > qda_init_device() is extended to allocate and initialize the memory > manager, while qda_deinit_device() will tear it down in later > patches. "in later patches" makes this extremely hard to review. I had to apply the series to try to navigate the code... > This prepares the QDA driver for fine-grained memory and > IOMMU domain management tied to individual CB devices. > > Signed-off-by: Ekansh Gupta <[email protected]> [..] > obj-$(CONFIG_DRM_ACCEL_QDA_COMPUTE_BUS) += qda_compute_bus.o > diff --git a/drivers/accel/qda/qda_cb.c b/drivers/accel/qda/qda_cb.c [..] > @@ -46,6 +52,18 @@ static int qda_cb_setup_device(struct qda_dev *qdev, > struct device *cb_dev) > rc = dma_set_mask(cb_dev, DMA_BIT_MASK(pa_bits)); > if (rc) { > qda_err(qdev, "%d bit DMA enable failed: %d\n", pa_bits, rc); > + kfree(iommu_dev); > + return rc; > + } > + > + iommu_dev->dev = cb_dev; > + iommu_dev->sid = sid; > + snprintf(iommu_dev->name, sizeof(iommu_dev->name), "qda_iommu_dev_%u", > sid); It's not easy to follow, when you have scattered the code across so many patches and so many files. But I don't think iommu_dev->name is ever used. > + > + rc = qda_memory_manager_register_device(qdev->iommu_mgr, iommu_dev); > + if (rc) { > + qda_err(qdev, "Failed to register IOMMU device: %d\n", rc); > + kfree(iommu_dev); > return rc; > } > > @@ -127,6 +145,8 @@ int qda_create_cb_device(struct qda_dev *qdev, struct > device_node *cb_node) > void qda_destroy_cb_device(struct device *cb_dev) > { > struct iommu_group *group; > + struct qda_iommu_device *iommu_dev; > + struct qda_dev *qdev; > > if (!cb_dev) { > qda_dbg(NULL, "NULL CB device passed to destroy\n"); > @@ -135,6 +155,18 @@ void qda_destroy_cb_device(struct device *cb_dev) > > qda_dbg(NULL, "Destroying CB device %s\n", dev_name(cb_dev)); > > + iommu_dev = dev_get_drvdata(cb_dev); I'm not sure, but I think cb_dev is the struct device allocated in qda_create_cb_device(), but I can not find a place where you set drvdata for this device. > + if (iommu_dev) { > + if (cb_dev->parent) { > + qdev = dev_get_drvdata(cb_dev->parent); > + if (qdev && qdev->iommu_mgr) { > + qda_dbg(NULL, "Unregistering IOMMU device for > %s\n", > + dev_name(cb_dev)); > + > qda_memory_manager_unregister_device(qdev->iommu_mgr, iommu_dev); > + } > + } > + } > + > group = iommu_group_get(cb_dev); > if (group) { > qda_dbg(NULL, "Removing %s from IOMMU group\n", > dev_name(cb_dev)); > diff --git a/drivers/accel/qda/qda_drv.c b/drivers/accel/qda/qda_drv.c [..] > @@ -25,12 +37,46 @@ static void init_device_resources(struct qda_dev *qdev) > atomic_set(&qdev->removing, 0); > } > > +static int init_memory_manager(struct qda_dev *qdev) > +{ > + int ret; > + > + qda_dbg(qdev, "Initializing IOMMU manager\n"); > + > + qdev->iommu_mgr = kzalloc_obj(*qdev->iommu_mgr, GFP_KERNEL); > + if (!qdev->iommu_mgr) > + return -ENOMEM; > + > + ret = qda_memory_manager_init(qdev->iommu_mgr); > + if (ret) { > + qda_err(qdev, "Failed to initialize memory manager: %d\n", ret); qda_memory_manager_init() already logged 1 error and 1 debug prints if you get here. > + kfree(qdev->iommu_mgr); > + qdev->iommu_mgr = NULL; We're going to fail probe, you shouldn't have to clear this. > + return ret; > + } > + > + qda_dbg(qdev, "IOMMU manager initialized successfully\n"); > + return 0; > +} > + > int qda_init_device(struct qda_dev *qdev) > { > + int ret; > + > init_device_resources(qdev); > > + ret = init_memory_manager(qdev); > + if (ret) { > + qda_err(qdev, "IOMMU manager initialization failed: %d\n", ret); And now we have 2 debug prints and two error prints in the log. > + goto err_cleanup_resources; > + } > + > qda_dbg(qdev, "QDA device initialized successfully\n"); Or, if we get here, you have 8 debug prints. Please learn how to use kprobe/kretprobe instead of reimplementing it using printk(). > return 0; > + > +err_cleanup_resources: > + cleanup_device_resources(qdev); > + return ret; > } > > static int __init qda_core_init(void) > diff --git a/drivers/accel/qda/qda_drv.h b/drivers/accel/qda/qda_drv.h > index eb732b7d8091..2cb97e4eafbf 100644 > --- a/drivers/accel/qda/qda_drv.h > +++ b/drivers/accel/qda/qda_drv.h > @@ -11,6 +11,7 @@ > #include <linux/mutex.h> > #include <linux/rpmsg.h> > #include <linux/xarray.h> > +#include "qda_memory_manager.h" > > /* Driver identification */ > #define DRIVER_NAME "qda" > @@ -23,6 +24,8 @@ struct qda_dev { > struct device *dev; > /* Mutex protecting device state */ > struct mutex lock; > + /* IOMMU/memory manager */ > + struct qda_memory_manager *iommu_mgr; > /* Flag indicating device removal in progress */ > atomic_t removing; > /* Name of the DSP (e.g., "cdsp", "adsp") */ > diff --git a/drivers/accel/qda/qda_memory_manager.c > b/drivers/accel/qda/qda_memory_manager.c [..] > +int qda_memory_manager_register_device(struct qda_memory_manager *mem_mgr, > + struct qda_iommu_device *iommu_dev) > +{ > + int ret; > + u32 id; > + > + if (!mem_mgr || !iommu_dev || !iommu_dev->dev) { How could this happen? You call this function from one place, that looks like this: iommu_dev->dev = cb_dev; iommu_dev->sid = sid; rc = qda_memory_manager_register_device(qdev->iommu_mgr, iommu_dev); You just allocated in filled out iommu_dev. Looking up the callstack, we're coming from qda_rpmsg_probe() which just did qda_init_device() which created the qsdev->iommu_mgr. In other words, these can't possibly be NULL. > + qda_err(NULL, "Invalid parameters for device registration\n"); > + return -EINVAL; > + } > + > + init_iommu_device_fields(iommu_dev, mem_mgr); > + > + ret = allocate_device_id(mem_mgr, iommu_dev, &id); > + if (ret) { > + qda_err(NULL, "Failed to allocate device ID: %d (sid=%u)\n", > ret, iommu_dev->sid); > + return ret; > + } > + > + iommu_dev->id = id; > + > + qda_dbg(NULL, "Registered device id=%u (sid=%u)\n", id, iommu_dev->sid); > + > + return 0; > +} > + > +void qda_memory_manager_unregister_device(struct qda_memory_manager *mem_mgr, > + struct qda_iommu_device *iommu_dev) > +{ > + if (!mem_mgr || !iommu_dev) { The one call to this function is wrapped in: if (iommu_dev) { if (qdev->iommu_mgr) { qda_dbg(NULL, ...); qda_memory_manager_unregister_device(qdev->iommu_mgr, iommu_dev); } } > + qda_err(NULL, "Attempted to unregister invalid > device/manager\n"); > + return; > + } > + > + qda_dbg(NULL, "Unregistering device id=%u (refcount=%u)\n", > iommu_dev->id, > + refcount_read(&iommu_dev->refcount)); And just before the call to qda_memory_manager_unregister_device() you print a debug log, saying you will call this function. > + > + if (refcount_read(&iommu_dev->refcount) == 0) { > + xa_erase(&mem_mgr->device_xa, iommu_dev->id); > + kfree(iommu_dev); > + return; > + } > + > + if (refcount_dec_and_test(&iommu_dev->refcount)) { > + qda_info(NULL, "Device id=%u refcount reached zero, queuing > removal\n", > + iommu_dev->id); > + queue_work(mem_mgr->wq, &iommu_dev->remove_work); > + } > +} > + [..] > diff --git a/drivers/accel/qda/qda_memory_manager.h > b/drivers/accel/qda/qda_memory_manager.h [..] > + > +/** This says "kernel-doc" > + * struct qda_iommu_device - IOMMU device instance for memory management > + * > + * This structure represents a single IOMMU-enabled device managed by the > + * memory manager. Each device can be assigned to a specific process. > + */ > +struct qda_iommu_device { > + /* Unique identifier for this IOMMU device */ But this doesn't follow kernel-doc style. At the end of the series, ./scripts/kernel-doc -none -vv -Wall drivers/accel/qda/ reports 270 warnings. > + u32 id; > + /* Pointer to the underlying device */ > + struct device *dev; > + /* Name for the device */ > + char name[32]; > + /* Spinlock protecting concurrent access to device */ > + spinlock_t lock; > + /* Reference counter for device */ > + refcount_t refcount; > + /* Work structure for deferred device removal */ > + struct work_struct remove_work; > + /* Stream ID for IOMMU transactions */ > + u32 sid; > + /* Pointer to parent memory manager */ > + struct qda_memory_manager *manager; > +}; Regards, Bjorn
