On 12/3/2025 9:50 PM, Stefan Roese wrote:
Testing on our ZynqMP platform has shown, that some R5 messages might
get dropped under high CPU load. This patch creates a new high-prio
workqueue which is now used instead of the default system workqueue.
With this change we don't experience these message drops any more.
Signed-off-by: Stefan Roese <[email protected]>
Cc: Tanmay Shah <[email protected]>
Cc: Mathieu Poirier <[email protected]>
Cc: Zhongqiu Han <[email protected]>
---
v2:
- Also call destroy_workqueue() in zynqmp_r5_cluster_exit() (suggested by
Zhongqiu Han)
- Correct call seq to avoid UAF (suggested by Zhongqiu Han)
drivers/remoteproc/xlnx_r5_remoteproc.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c
b/drivers/remoteproc/xlnx_r5_remoteproc.c
index feca6de68da28..42c8884bc760f 100644
--- a/drivers/remoteproc/xlnx_r5_remoteproc.c
+++ b/drivers/remoteproc/xlnx_r5_remoteproc.c
@@ -16,6 +16,7 @@
#include <linux/of_reserved_mem.h>
#include <linux/platform_device.h>
#include <linux/remoteproc.h>
+#include <linux/workqueue.h>
#include "remoteproc_internal.h"
@@ -116,6 +117,7 @@ struct zynqmp_r5_cluster {
enum zynqmp_r5_cluster_mode mode;
int core_count;
struct zynqmp_r5_core **r5_cores;
+ struct workqueue_struct *workqueue;
};
/**
@@ -174,10 +176,18 @@ static void handle_event_notified(struct work_struct
*work)
static void zynqmp_r5_mb_rx_cb(struct mbox_client *cl, void *msg)
{
struct zynqmp_ipi_message *ipi_msg, *buf_msg;
+ struct zynqmp_r5_cluster *cluster;
struct mbox_info *ipi;
+ struct device *dev;
size_t len;
ipi = container_of(cl, struct mbox_info, mbox_cl);
+ dev = ipi->r5_core->dev;
+ cluster = dev_get_drvdata(dev->parent);
+ if (!cluster) {
+ dev_err(dev->parent, "Invalid driver data\n");
+ return;
+ }
/* copy data from ipi buffer to r5_core */
ipi_msg = (struct zynqmp_ipi_message *)msg;
@@ -195,7 +205,7 @@ static void zynqmp_r5_mb_rx_cb(struct mbox_client *cl, void
*msg)
if (mbox_send_message(ipi->rx_chan, NULL) < 0)
dev_err(cl->dev, "ack failed to mbox rx_chan\n");
- schedule_work(&ipi->mbox_work);
+ queue_work(cluster->workqueue, &ipi->mbox_work);
}
/**
@@ -1162,6 +1172,7 @@ static void zynqmp_r5_cluster_exit(void *data)
}
kfree(cluster->r5_cores);
+ destroy_workqueue(cluster->workqueue);
Hi Stefan,
Thanks for your patchset v2.
https://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git/tree/drivers/remoteproc/xlnx_r5_remoteproc.c?h=rproc-next
static void zynqmp_r5_cluster_exit(void *data)
{
struct platform_device *pdev = data;
struct zynqmp_r5_cluster *cluster;
struct zynqmp_r5_core *r5_core;
int i;
cluster = platform_get_drvdata(pdev);
if (!cluster)
return;
for (i = 0; i < cluster->core_count; i++) {
r5_core = cluster->r5_cores[i];
zynqmp_r5_free_mbox(r5_core->ipi); <--------freeing ipi
iounmap(r5_core->rsc_tbl_va);
of_reserved_mem_device_release(r5_core->dev);
put_device(r5_core->dev);
rproc_del(r5_core->rproc);
rproc_free(r5_core->rproc);
}
kfree(cluster->r5_cores);
kfree(cluster);
platform_set_drvdata(pdev, NULL);
}
Please consider calling cancel_work_sync before freeing ipi to avoid
potential UAF.
kfree(cluster);
platform_set_drvdata(pdev, NULL);
}
@@ -1194,11 +1205,20 @@ static int zynqmp_r5_remoteproc_probe(struct
platform_device *pdev)
return ret;
}
+ cluster->workqueue = alloc_workqueue(dev_name(dev),
+ WQ_UNBOUND | WQ_HIGHPRI, 0);
+ if (!cluster->workqueue) {
+ dev_err_probe(dev, -ENOMEM, "cannot create workqueue\n");
+ kfree(cluster);
+ return -ENOMEM;
+ }
+
/* wire in so each core can be cleaned up at driver remove */
platform_set_drvdata(pdev, cluster);
ret = zynqmp_r5_cluster_init(cluster);
if (ret) {
+ destroy_workqueue(cluster->workqueue);
kfree(cluster);
platform_set_drvdata(pdev, NULL);
dev_err_probe(dev, ret, "Invalid r5f subsystem device tree\n");
--
Thx and BRs,
Zhongqiu Han