This patch introduces a Remote Management (RM) queue service, which
provides a way to communicate between the management PCIe function (PF0)
and the embedded firmware running on AMD Versal SoCs.

The RM service implements a hardware-based ring buffer for bidirectional
command and response exchange between the host driver and the firmware.

This patch adds the core infrastructure for:
  - Initializing and managing the RM queue
  - Submitting commands to the embedded firmware
  - Polling for command completion

Subsequent patches will integrate the infrastructure with the firmware
management logic to enable firmware download, status query, and other
control operations.

Co-developed-by: Nishad Saraf <[email protected]>
Signed-off-by: Nishad Saraf <[email protected]>
Signed-off-by: David Zhang <[email protected]>
---
 drivers/accel/amd_vpci/Makefile               |   3 +-
 drivers/accel/amd_vpci/versal-pci-rm-queue.c  | 316 ++++++++++++++++++
 drivers/accel/amd_vpci/versal-pci-rm-queue.h  |  21 ++
 .../accel/amd_vpci/versal-pci-rm-service.h    | 209 ++++++++++++
 drivers/accel/amd_vpci/versal-pci.h           |   1 +
 5 files changed, 549 insertions(+), 1 deletion(-)
 create mode 100644 drivers/accel/amd_vpci/versal-pci-rm-queue.c
 create mode 100644 drivers/accel/amd_vpci/versal-pci-rm-queue.h
 create mode 100644 drivers/accel/amd_vpci/versal-pci-rm-service.h

diff --git a/drivers/accel/amd_vpci/Makefile b/drivers/accel/amd_vpci/Makefile
index 03849875ad0b..9e4e56ac2dee 100644
--- a/drivers/accel/amd_vpci/Makefile
+++ b/drivers/accel/amd_vpci/Makefile
@@ -3,4 +3,5 @@
 obj-$(CONFIG_DRM_ACCEL_AMD_VPCI) := versal-pci.o
 
 versal-pci-y := \
-       versal-pci-main.o
+       versal-pci-main.o \
+       versal-pci-rm-queue.o
diff --git a/drivers/accel/amd_vpci/versal-pci-rm-queue.c 
b/drivers/accel/amd_vpci/versal-pci-rm-queue.c
new file mode 100644
index 000000000000..e67c506af752
--- /dev/null
+++ b/drivers/accel/amd_vpci/versal-pci-rm-queue.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#include <linux/pci.h>
+
+#include "versal-pci.h"
+#include "versal-pci-rm-queue.h"
+#include "versal-pci-rm-service.h"
+
+static inline struct rm_device *to_rdev_msg_monitor(struct work_struct *w)
+{
+       return container_of(w, struct rm_device, msg_monitor);
+}
+
+static inline struct rm_device *to_rdev_msg_timer(struct timer_list *t)
+{
+       return container_of(t, struct rm_device, msg_timer);
+}
+
+static inline u32 rm_io_read(struct rm_device *rdev, u32 offset)
+{
+       /* TODO */
+       return 0;
+}
+
+static inline int rm_io_write(struct rm_device *rdev, u32 offset, u32 value)
+{
+       /* TODO */
+       return 0;
+}
+
+static inline u32 rm_queue_read(struct rm_device *rdev, u32 offset)
+{
+       /* TODO */
+       return 0;
+}
+
+static inline void rm_queue_write(struct rm_device *rdev, u32 offset, u32 
value)
+{
+       /* TODO */
+}
+
+static inline void rm_queue_bulk_read(struct rm_device *rdev, u32 offset,
+                                     u32 *value, u32 size)
+{
+       /* TODO */
+}
+
+static inline void rm_queue_bulk_write(struct rm_device *rdev, u32 offset,
+                                      u32 *value, u32 size)
+{
+       /* TODO */
+}
+
+static inline u32 rm_queue_get_cidx(struct rm_device *rdev, enum rm_queue_type 
type)
+{
+       u32 off;
+
+       if (type == RM_QUEUE_SQ)
+               off = offsetof(struct rm_queue_header, sq_cidx);
+       else
+               off = offsetof(struct rm_queue_header, cq_cidx);
+
+       return rm_queue_read(rdev, off);
+}
+
+static inline void rm_queue_set_cidx(struct rm_device *rdev, enum 
rm_queue_type type,
+                                    u32 value)
+{
+       u32 off;
+
+       if (type == RM_QUEUE_SQ)
+               off = offsetof(struct rm_queue_header, sq_cidx);
+       else
+               off = offsetof(struct rm_queue_header, cq_cidx);
+
+       rm_queue_write(rdev, off, value);
+}
+
+static inline u32 rm_queue_get_pidx(struct rm_device *rdev, enum rm_queue_type 
type)
+{
+       if (type == RM_QUEUE_SQ)
+               return rm_io_read(rdev, RM_IO_SQ_PIDX_OFF);
+       else
+               return rm_io_read(rdev, RM_IO_CQ_PIDX_OFF);
+}
+
+static inline int rm_queue_set_pidx(struct rm_device *rdev,
+                                   enum rm_queue_type type, u32 value)
+{
+       if (type == RM_QUEUE_SQ)
+               return rm_io_write(rdev, RM_IO_SQ_PIDX_OFF, value);
+       else
+               return rm_io_write(rdev, RM_IO_CQ_PIDX_OFF, value);
+}
+
+static inline u32 rm_queue_get_sq_slot_offset(struct rm_device *rdev)
+{
+       u32 index;
+
+       if ((rdev->sq.pidx - rdev->sq.cidx) >= rdev->queue_size)
+               return RM_INVALID_SLOT;
+
+       index = rdev->sq.pidx & (rdev->queue_size - 1);
+       return rdev->sq.offset + RM_CMD_SQ_SLOT_SIZE * index;
+}
+
+static inline u32 rm_queue_get_cq_slot_offset(struct rm_device *rdev)
+{
+       u32 index;
+
+       index = rdev->cq.cidx & (rdev->queue_size - 1);
+       return rdev->cq.offset + RM_CMD_CQ_SLOT_SIZE * index;
+}
+
+static int rm_queue_submit_cmd(struct rm_cmd *cmd)
+{
+       struct versal_pci_device *vdev = cmd->rdev->vdev;
+       struct rm_device *rdev = cmd->rdev;
+       u32 offset;
+       int ret;
+
+       guard(mutex)(&rdev->queue);
+
+       offset = rm_queue_get_sq_slot_offset(rdev);
+       if (!offset) {
+               vdev_err(vdev, "No SQ slot available");
+               return -ENOSPC;
+       }
+
+       rm_queue_bulk_write(rdev, offset, (u32 *)&cmd->sq_msg,
+                           sizeof(cmd->sq_msg));
+
+       ret = rm_queue_set_pidx(rdev, RM_QUEUE_SQ, ++rdev->sq.pidx);
+       if (ret) {
+               vdev_err(vdev, "Failed to update PIDX, ret %d", ret);
+               return ret;
+       }
+
+       list_add_tail(&cmd->list, &rdev->submitted_cmds);
+       return ret;
+}
+
+void rm_queue_withdraw_cmd(struct rm_cmd *cmd)
+{
+       guard(mutex)(&cmd->rdev->queue);
+       list_del(&cmd->list);
+}
+
+static int rm_queue_wait_cmd_timeout(struct rm_cmd *cmd, unsigned long timeout)
+{
+       struct versal_pci_device *vdev = cmd->rdev->vdev;
+       int ret;
+
+       if (wait_for_completion_timeout(&cmd->executed, timeout)) {
+               ret = cmd->cq_msg.data.rcode;
+               if (!ret)
+                       return 0;
+
+               vdev_err(vdev, "CMD returned with a failure: %d", ret);
+               return ret;
+       }
+
+       /*
+        * each cmds will be cleaned up by complete before it times out.
+        * if we reach here, the cmd should be cleared and hot reset should
+        * be issued.
+        */
+       vdev_err(vdev, "cmd timed out, please reset the card");
+       rm_queue_withdraw_cmd(cmd);
+       return -ETIME;
+}
+
+int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout)
+{
+       int ret;
+
+       ret = rm_queue_submit_cmd(cmd);
+       if (ret)
+               return ret;
+
+       return rm_queue_wait_cmd_timeout(cmd, timeout);
+}
+
+static int rm_process_msg(struct rm_device *rdev)
+{
+       struct versal_pci_device *vdev = rdev->vdev;
+       struct rm_cmd *cmd, *next;
+       struct rm_cmd_cq_hdr header;
+       u32 offset;
+
+       offset = rm_queue_get_cq_slot_offset(rdev);
+       if (!offset) {
+               vdev_err(vdev, "Invalid CQ offset");
+               return -EINVAL;
+       }
+
+       rm_queue_bulk_read(rdev, offset, (u32 *)&header, sizeof(header));
+
+       list_for_each_entry_safe(cmd, next, &rdev->submitted_cmds, list) {
+               u32 value = 0;
+
+               if (cmd->sq_msg.hdr.id != header.id)
+                       continue;
+
+               rm_queue_bulk_read(rdev, offset + sizeof(cmd->cq_msg.hdr),
+                                  (u32 *)&cmd->cq_msg.data,
+                                  sizeof(cmd->cq_msg.data));
+
+               rm_queue_write(rdev, offset, value);
+
+               list_del(&cmd->list);
+               complete(&cmd->executed);
+               return 0;
+       }
+
+       vdev_err(vdev, "Unknown cmd ID %d found in CQ", header.id);
+       return -EFAULT;
+}
+
+static void rm_check_msg(struct work_struct *w)
+{
+       struct rm_device *rdev = to_rdev_msg_monitor(w);
+       int ret;
+
+       guard(mutex)(&rdev->queue);
+
+       rdev->sq.cidx = rm_queue_get_cidx(rdev, RM_QUEUE_SQ);
+       rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ);
+
+       while (rdev->cq.cidx < rdev->cq.pidx) {
+               ret = rm_process_msg(rdev);
+               if (ret)
+                       break;
+
+               rdev->cq.cidx++;
+
+               rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx);
+       }
+}
+
+static void rm_sched_work(struct timer_list *t)
+{
+       struct rm_device *rdev = to_rdev_msg_timer(t);
+
+       /* Schedule a work in the general workqueue */
+       schedule_work(&rdev->msg_monitor);
+       /* Periodic timer */
+       mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER);
+}
+
+void rm_queue_fini(struct rm_device *rdev)
+{
+       timer_delete_sync(&rdev->msg_timer);
+       cancel_work_sync(&rdev->msg_monitor);
+}
+
+int rm_queue_init(struct rm_device *rdev)
+{
+       struct versal_pci_device *vdev = rdev->vdev;
+       struct rm_queue_header header = {0};
+       int ret;
+
+       INIT_LIST_HEAD(&rdev->submitted_cmds);
+       ret = devm_mutex_init(&vdev->pdev->dev, &rdev->queue);
+       if (ret)
+               return ret;
+
+       rm_queue_bulk_read(rdev, RM_HDR_OFF, (u32 *)&header, sizeof(header));
+
+       if (header.magic != RM_QUEUE_HDR_MAGIC_NUM) {
+               vdev_err(vdev, "Invalid RM queue header");
+               return -ENODEV;
+       }
+
+       if (!header.version) {
+               vdev_err(vdev, "Invalid RM queue header");
+               return -ENODEV;
+       }
+
+       sema_init(&rdev->sq.data_lock, 1);
+       sema_init(&rdev->cq.data_lock, 1);
+       rdev->queue_size = header.size;
+       rdev->sq.offset = header.sq_off;
+       rdev->cq.offset = header.cq_off;
+       rdev->sq.type = RM_QUEUE_SQ;
+       rdev->cq.type = RM_QUEUE_CQ;
+       rdev->sq.data_size = rdev->queue_buffer_size - RM_CMD_CQ_BUFFER_SIZE;
+       rdev->cq.data_size = RM_CMD_CQ_BUFFER_SIZE;
+       rdev->sq.data_offset = rdev->queue_buffer_start +
+                              RM_CMD_CQ_BUFFER_OFFSET + RM_CMD_CQ_BUFFER_SIZE;
+       rdev->cq.data_offset = rdev->queue_buffer_start +
+                              RM_CMD_CQ_BUFFER_OFFSET;
+       rdev->sq.cidx = header.sq_cidx;
+       rdev->cq.cidx = header.cq_cidx;
+
+       rdev->sq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_SQ);
+       rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ);
+
+       if (rdev->cq.cidx != rdev->cq.pidx) {
+               vdev_warn(vdev, "Clearing stale completions");
+               rdev->cq.cidx = rdev->cq.pidx;
+               rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx);
+       }
+
+       /* Create and schedule timer to do recurring work */
+       INIT_WORK(&rdev->msg_monitor, &rm_check_msg);
+       timer_setup(&rdev->msg_timer, &rm_sched_work, 0);
+       mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER);
+
+       return 0;
+}
diff --git a/drivers/accel/amd_vpci/versal-pci-rm-queue.h 
b/drivers/accel/amd_vpci/versal-pci-rm-queue.h
new file mode 100644
index 000000000000..d5d991704d5c
--- /dev/null
+++ b/drivers/accel/amd_vpci/versal-pci-rm-queue.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#ifndef __RM_QUEUE_H
+#define __RM_QUEUE_H
+
+struct rm_device;
+
+/* rm queue hardware setup */
+int rm_queue_init(struct rm_device *rdev);
+void rm_queue_fini(struct rm_device *rdev);
+
+/* rm queue common API */
+int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout);
+void rm_queue_withdraw_cmd(struct rm_cmd *cmd);
+
+#endif /* __RM_QUEUE_H */
diff --git a/drivers/accel/amd_vpci/versal-pci-rm-service.h 
b/drivers/accel/amd_vpci/versal-pci-rm-service.h
new file mode 100644
index 000000000000..d2397a1a672c
--- /dev/null
+++ b/drivers/accel/amd_vpci/versal-pci-rm-service.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#ifndef __RM_SERVICE_H
+#define __RM_SERVICE_H
+
+#define RM_HDR_OFF                     0x0
+#define RM_HDR_MAGIC_NUM               0x564D5230
+#define RM_QUEUE_HDR_MAGIC_NUM         0x5847513F
+#define RM_PCI_IO_BAR_OFF              0x2010000
+#define RM_PCI_IO_SIZE                 SZ_4K
+#define RM_PCI_SHMEM_BAR_OFF           0x8000000
+#define RM_PCI_SHMEM_SIZE              SZ_128M
+#define RM_PCI_SHMEM_HDR_SIZE          0x28
+
+#define RM_QUEUE_HDR_MAGIC_NUM_OFF     0x0
+#define RM_IO_SQ_PIDX_OFF              0x0
+#define RM_IO_CQ_PIDX_OFF              0x100
+
+#define RM_CMD_ID_MIN                  1
+#define RM_CMD_ID_MAX                  (BIT(17) - 1)
+#define RM_CMD_SQ_HDR_OPS_MSK          GENMASK(15, 0)
+#define RM_CMD_SQ_HDR_SIZE_MSK         GENMASK(14, 0)
+#define RM_CMD_SQ_SLOT_SIZE            SZ_512
+#define RM_CMD_CQ_SLOT_SIZE            SZ_16
+#define RM_CMD_CQ_BUFFER_SIZE          (1024 * 1024)
+#define RM_CMD_CQ_BUFFER_OFFSET                0x0
+#define RM_CMD_LOG_PAGE_TYPE_MASK      GENMASK(15, 0)
+#define RM_CMD_VMR_CONTROL_MSK         GENMASK(10, 8)
+#define RM_CMD_VMR_CONTROL_PS_MASK     BIT(9)
+
+#define RM_CMD_WAIT_CONFIG_TIMEOUT     msecs_to_jiffies(10 * 1000)
+#define RM_CMD_WAIT_DOWNLOAD_TIMEOUT   msecs_to_jiffies(300 * 1000)
+
+#define RM_COMPLETION_TIMER            (HZ / 10)
+#define RM_HEALTH_CHECK_TIMER          (HZ)
+
+#define RM_INVALID_SLOT                        0
+
+enum rm_queue_opcode {
+       RM_QUEUE_OP_LOAD_XCLBIN         = 0x0,
+       RM_QUEUE_OP_GET_LOG_PAGE        = 0x8,
+       RM_QUEUE_OP_LOAD_FW             = 0xA,
+       RM_QUEUE_OP_LOAD_APU_FW         = 0xD,
+       RM_QUEUE_OP_VMR_CONTROL         = 0xE,
+       RM_QUEUE_OP_IDENTIFY            = 0x202,
+};
+
+struct rm_cmd_sq_hdr {
+       __u16 opcode;
+       __u16 msg_size;
+       __u16 id;
+       __u16 reserved;
+} __packed;
+
+struct rm_cmd_cq_hdr {
+       __u16 id;
+       __u16 reserved;
+} __packed;
+
+struct rm_cmd_sq_bin {
+       __u64                   address;
+       __u32                   size;
+       __u32                   reserved1;
+       __u32                   reserved2;
+       __u32                   reserved3;
+       __u64                   reserved4;
+} __packed;
+
+struct rm_cmd_sq_log_page {
+       __u64                   address;
+       __u32                   size;
+       __u32                   reserved1;
+       __u32                   type;
+       __u32                   reserved2;
+} __packed;
+
+struct rm_cmd_sq_ctrl {
+       __u32                   status;
+} __packed;
+
+struct rm_cmd_sq_data {
+       union {
+               struct rm_cmd_sq_log_page       page;
+               struct rm_cmd_sq_bin            bin;
+               struct rm_cmd_sq_ctrl           ctrl;
+       };
+} __packed;
+
+struct rm_cmd_cq_identify {
+       __u16                   major;
+       __u16                   minor;
+       __u32                   reserved;
+} __packed;
+
+struct rm_cmd_cq_log_page {
+       __u32                   len;
+       __u32                   reserved;
+} __packed;
+
+struct rm_cmd_cq_control {
+       __u16                   status;
+       __u16                   reserved1;
+       __u32                   reserved2;
+} __packed;
+
+struct rm_cmd_cq_data {
+       union {
+               struct rm_cmd_cq_identify       identify;
+               struct rm_cmd_cq_log_page       page;
+               struct rm_cmd_cq_control        ctrl;
+               __u32                           reserved[2];
+       };
+       __u32                   rcode;
+} __packed;
+
+struct rm_cmd_sq_msg {
+       struct rm_cmd_sq_hdr    hdr;
+       struct rm_cmd_sq_data   data;
+} __packed;
+
+struct rm_cmd_cq_msg {
+       struct rm_cmd_cq_hdr    hdr;
+       struct rm_cmd_cq_data   data;
+} __packed;
+
+struct rm_cmd {
+       struct rm_device        *rdev;
+       struct list_head        list;
+       struct completion       executed;
+       struct rm_cmd_sq_msg    sq_msg;
+       struct rm_cmd_cq_msg    cq_msg;
+       enum rm_queue_opcode    opcode;
+       __u8                    *buffer;
+       ssize_t                 size;
+};
+
+enum rm_queue_type {
+       RM_QUEUE_SQ,
+       RM_QUEUE_CQ
+};
+
+enum rm_cmd_log_page_type {
+       RM_CMD_LOG_PAGE_AXI_TRIP_STATUS = 0x0,
+       RM_CMD_LOG_PAGE_FW_ID           = 0xA,
+};
+
+struct rm_queue {
+       enum rm_queue_type      type;
+       __u32                   pidx;
+       __u32                   cidx;
+       __u32                   offset;
+       __u32                   data_offset;
+       __u32                   data_size;
+       struct semaphore        data_lock;
+};
+
+struct rm_queue_header {
+       __u32                   magic;
+       __u32                   version;
+       __u32                   size;
+       __u32                   sq_off;
+       __u32                   sq_slot_size;
+       __u32                   cq_off;
+       __u32                   sq_cidx;
+       __u32                   cq_cidx;
+};
+
+struct rm_header {
+       __u32                   magic;
+       __u32                   queue_base;
+       __u32                   queue_size;
+       __u32                   status_off;
+       __u32                   status_len;
+       __u32                   log_index;
+       __u32                   log_off;
+       __u32                   log_size;
+       __u32                   data_start;
+       __u32                   data_end;
+};
+
+struct rm_device {
+       struct versal_pci_device        *vdev;
+
+       struct rm_header        rm_metadata;
+       __u32                   queue_buffer_start;
+       __u32                   queue_buffer_size;
+       __u32                   queue_base;
+
+       /* Lock to queue access */
+       struct mutex            queue;
+       struct rm_queue         sq;
+       struct rm_queue         cq;
+       __u32                   queue_size;
+
+       struct timer_list       msg_timer;
+       struct work_struct      msg_monitor;
+       struct timer_list       health_timer;
+       struct work_struct      health_monitor;
+       struct list_head        submitted_cmds;
+
+       __u32                   firewall_tripped;
+};
+
+#endif /* __RM_SERVICE_H */
diff --git a/drivers/accel/amd_vpci/versal-pci.h 
b/drivers/accel/amd_vpci/versal-pci.h
index ca309aee87ad..33f0ef881a33 100644
--- a/drivers/accel/amd_vpci/versal-pci.h
+++ b/drivers/accel/amd_vpci/versal-pci.h
@@ -26,6 +26,7 @@
        dev_dbg(&(vdev)->pdev->dev, fmt, ##args)
 
 struct versal_pci_device;
+struct rm_cmd;
 
 struct axlf_header {
        __u64                           length;
-- 
2.34.1

Reply via email to