On Tue, Feb 24, 2026 at 12:39:00AM +0530, Ekansh Gupta wrote:
> Introduce a per-device memory manager for the QDA driver that tracks
> IOMMU-capable compute context-bank (CB) devices. Each CB device is
> represented by a qda_iommu_device and registered with a central
> qda_memory_manager instance owned by qda_dev.
> 

The name makes me expect that this manages memory, but it seems to
manage devices and context banks...

> The memory manager maintains an xarray of devices and assigns a
> unique ID to each CB. It also provides basic lifetime management
> and a workqueue for deferred device removal. qda_cb_setup_device()
> now allocates a qda_iommu_device for each CB and registers it with
> the memory manager after DMA configuration succeeds.
> 
> qda_init_device() is extended to allocate and initialize the memory
> manager, while qda_deinit_device() will tear it down in later
> patches.

"in later patches" makes this extremely hard to review. I had to apply
the series to try to navigate the code...

> This prepares the QDA driver for fine-grained memory and
> IOMMU domain management tied to individual CB devices.
> 
> Signed-off-by: Ekansh Gupta <[email protected]>
[..]
>  obj-$(CONFIG_DRM_ACCEL_QDA_COMPUTE_BUS) += qda_compute_bus.o
> diff --git a/drivers/accel/qda/qda_cb.c b/drivers/accel/qda/qda_cb.c
[..]
> @@ -46,6 +52,18 @@ static int qda_cb_setup_device(struct qda_dev *qdev, 
> struct device *cb_dev)
>       rc = dma_set_mask(cb_dev, DMA_BIT_MASK(pa_bits));
>       if (rc) {
>               qda_err(qdev, "%d bit DMA enable failed: %d\n", pa_bits, rc);
> +             kfree(iommu_dev);
> +             return rc;
> +     }
> +
> +     iommu_dev->dev = cb_dev;
> +     iommu_dev->sid = sid;
> +     snprintf(iommu_dev->name, sizeof(iommu_dev->name), "qda_iommu_dev_%u", 
> sid);

It's not easy to follow, when you have scattered the code across so many
patches and so many files. But I don't think iommu_dev->name is ever
used.

> +
> +     rc = qda_memory_manager_register_device(qdev->iommu_mgr, iommu_dev);
> +     if (rc) {
> +             qda_err(qdev, "Failed to register IOMMU device: %d\n", rc);
> +             kfree(iommu_dev);
>               return rc;
>       }
>  
> @@ -127,6 +145,8 @@ int qda_create_cb_device(struct qda_dev *qdev, struct 
> device_node *cb_node)
>  void qda_destroy_cb_device(struct device *cb_dev)
>  {
>       struct iommu_group *group;
> +     struct qda_iommu_device *iommu_dev;
> +     struct qda_dev *qdev;
>  
>       if (!cb_dev) {
>               qda_dbg(NULL, "NULL CB device passed to destroy\n");
> @@ -135,6 +155,18 @@ void qda_destroy_cb_device(struct device *cb_dev)
>  
>       qda_dbg(NULL, "Destroying CB device %s\n", dev_name(cb_dev));
>  
> +     iommu_dev = dev_get_drvdata(cb_dev);

I'm not sure, but I think cb_dev is the struct device allocated in
qda_create_cb_device(), but I can not find a place where you set drvdata
for this device.

> +     if (iommu_dev) {
> +             if (cb_dev->parent) {
> +                     qdev = dev_get_drvdata(cb_dev->parent);
> +                     if (qdev && qdev->iommu_mgr) {
> +                             qda_dbg(NULL, "Unregistering IOMMU device for 
> %s\n",
> +                                     dev_name(cb_dev));
> +                             
> qda_memory_manager_unregister_device(qdev->iommu_mgr, iommu_dev);
> +                     }
> +             }
> +     }
> +
>       group = iommu_group_get(cb_dev);
>       if (group) {
>               qda_dbg(NULL, "Removing %s from IOMMU group\n", 
> dev_name(cb_dev));
> diff --git a/drivers/accel/qda/qda_drv.c b/drivers/accel/qda/qda_drv.c
[..]
> @@ -25,12 +37,46 @@ static void init_device_resources(struct qda_dev *qdev)
>       atomic_set(&qdev->removing, 0);
>  }
>  
> +static int init_memory_manager(struct qda_dev *qdev)
> +{
> +     int ret;
> +
> +     qda_dbg(qdev, "Initializing IOMMU manager\n");
> +
> +     qdev->iommu_mgr = kzalloc_obj(*qdev->iommu_mgr, GFP_KERNEL);
> +     if (!qdev->iommu_mgr)
> +             return -ENOMEM;
> +
> +     ret = qda_memory_manager_init(qdev->iommu_mgr);
> +     if (ret) {
> +             qda_err(qdev, "Failed to initialize memory manager: %d\n", ret);

qda_memory_manager_init() already logged 1 error and 1 debug prints if
you get here.

> +             kfree(qdev->iommu_mgr);
> +             qdev->iommu_mgr = NULL;

We're going to fail probe, you shouldn't have to clear this.

> +             return ret;
> +     }
> +
> +     qda_dbg(qdev, "IOMMU manager initialized successfully\n");
> +     return 0;
> +}
> +
>  int qda_init_device(struct qda_dev *qdev)
>  {
> +     int ret;
> +
>       init_device_resources(qdev);
>  
> +     ret = init_memory_manager(qdev);
> +     if (ret) {
> +             qda_err(qdev, "IOMMU manager initialization failed: %d\n", ret);

And now we have 2 debug prints and two error prints in the log.

> +             goto err_cleanup_resources;
> +     }
> +
>       qda_dbg(qdev, "QDA device initialized successfully\n");

Or, if we get here, you have 8 debug prints.

Please learn how to use kprobe/kretprobe instead of reimplementing it
using printk().

>       return 0;
> +
> +err_cleanup_resources:
> +     cleanup_device_resources(qdev);
> +     return ret;
>  }
>  
>  static int __init qda_core_init(void)
> diff --git a/drivers/accel/qda/qda_drv.h b/drivers/accel/qda/qda_drv.h
> index eb732b7d8091..2cb97e4eafbf 100644
> --- a/drivers/accel/qda/qda_drv.h
> +++ b/drivers/accel/qda/qda_drv.h
> @@ -11,6 +11,7 @@
>  #include <linux/mutex.h>
>  #include <linux/rpmsg.h>
>  #include <linux/xarray.h>
> +#include "qda_memory_manager.h"
>  
>  /* Driver identification */
>  #define DRIVER_NAME "qda"
> @@ -23,6 +24,8 @@ struct qda_dev {
>       struct device *dev;
>       /* Mutex protecting device state */
>       struct mutex lock;
> +     /* IOMMU/memory manager */
> +     struct qda_memory_manager *iommu_mgr;
>       /* Flag indicating device removal in progress */
>       atomic_t removing;
>       /* Name of the DSP (e.g., "cdsp", "adsp") */
> diff --git a/drivers/accel/qda/qda_memory_manager.c 
> b/drivers/accel/qda/qda_memory_manager.c
[..]
> +int qda_memory_manager_register_device(struct qda_memory_manager *mem_mgr,
> +                                    struct qda_iommu_device *iommu_dev)
> +{
> +     int ret;
> +     u32 id;
> +
> +     if (!mem_mgr || !iommu_dev || !iommu_dev->dev) {

How could this happen? You call this function from one place, that looks
like this:

iommu_dev->dev = cb_dev;
iommu_dev->sid = sid;
rc = qda_memory_manager_register_device(qdev->iommu_mgr, iommu_dev);

You just allocated in filled out iommu_dev.

Looking up the callstack, we're coming from qda_rpmsg_probe() which just
did qda_init_device() which created the qsdev->iommu_mgr.

In other words, these can't possibly be NULL.

> +             qda_err(NULL, "Invalid parameters for device registration\n");
> +             return -EINVAL;
> +     }
> +
> +     init_iommu_device_fields(iommu_dev, mem_mgr);
> +
> +     ret = allocate_device_id(mem_mgr, iommu_dev, &id);
> +     if (ret) {
> +             qda_err(NULL, "Failed to allocate device ID: %d (sid=%u)\n", 
> ret, iommu_dev->sid);
> +             return ret;
> +     }
> +
> +     iommu_dev->id = id;
> +
> +     qda_dbg(NULL, "Registered device id=%u (sid=%u)\n", id, iommu_dev->sid);
> +
> +     return 0;
> +}
> +
> +void qda_memory_manager_unregister_device(struct qda_memory_manager *mem_mgr,
> +                                       struct qda_iommu_device *iommu_dev)
> +{
> +     if (!mem_mgr || !iommu_dev) {

The one call to this function is wrapped in:

if (iommu_dev) {
        if (qdev->iommu_mgr) {
                qda_dbg(NULL, ...);
                qda_memory_manager_unregister_device(qdev->iommu_mgr, 
iommu_dev);
        }
}

> +             qda_err(NULL, "Attempted to unregister invalid 
> device/manager\n");
> +             return;
> +     }
> +
> +     qda_dbg(NULL, "Unregistering device id=%u (refcount=%u)\n", 
> iommu_dev->id,
> +             refcount_read(&iommu_dev->refcount));

And just before the call to qda_memory_manager_unregister_device() you
print a debug log, saying you will call this function.

> +
> +     if (refcount_read(&iommu_dev->refcount) == 0) {
> +             xa_erase(&mem_mgr->device_xa, iommu_dev->id);
> +             kfree(iommu_dev);
> +             return;
> +     }
> +
> +     if (refcount_dec_and_test(&iommu_dev->refcount)) {
> +             qda_info(NULL, "Device id=%u refcount reached zero, queuing 
> removal\n",
> +                      iommu_dev->id);
> +             queue_work(mem_mgr->wq, &iommu_dev->remove_work);
> +     }
> +}
> +
[..]
> diff --git a/drivers/accel/qda/qda_memory_manager.h 
> b/drivers/accel/qda/qda_memory_manager.h
[..]
> +
> +/**

This says "kernel-doc"

> + * struct qda_iommu_device - IOMMU device instance for memory management
> + *
> + * This structure represents a single IOMMU-enabled device managed by the
> + * memory manager. Each device can be assigned to a specific process.
> + */
> +struct qda_iommu_device {
> +     /* Unique identifier for this IOMMU device */

But this doesn't follow kernel-doc style.

At the end of the series, 

./scripts/kernel-doc -none -vv -Wall drivers/accel/qda/

reports 270 warnings.

> +     u32 id;
> +     /* Pointer to the underlying device */
> +     struct device *dev;
> +     /* Name for the device */
> +     char name[32];
> +     /* Spinlock protecting concurrent access to device */
> +     spinlock_t lock;
> +     /* Reference counter for device */
> +     refcount_t refcount;
> +     /* Work structure for deferred device removal */
> +     struct work_struct remove_work;
> +     /* Stream ID for IOMMU transactions */
> +     u32 sid;
> +     /* Pointer to parent memory manager */
> +     struct qda_memory_manager *manager;
> +};

Regards,
Bjorn

Reply via email to