Add the core infrastructure for debugger event handling on render nodes. A debugger can attach to a render-node client and receive events through a debug fd. This introduces a debug fd, event FIFO management, read and forward operations, and support for attaching and detaching debugger sessions.
Cc: Alex Deucher <[email protected]> Cc: Christian König <[email protected]> Signed-off-by: Srinivasan Shanmugam <[email protected]> --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + .../gpu/drm/amd/amdgpu/amdgpu_debug_event.c | 408 ++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_debug_event.h | 52 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + 5 files changed, 465 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 0d3b08e4d9c9..71b700173a99 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -72,8 +72,7 @@ amdgpu-y += amdgpu_device.o amdgpu_reg_access.o amdgpu_doorbell_mgr.o amdgpu_kms amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o \ - amdgpu_eventfd.o \ - amdgpu_wait_event.o + amdgpu_eventfd.o amdgpu_wait_event.o amdgpu_debug_event.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6edd3ea5cbd2..b42cff392f34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -106,6 +106,7 @@ #include "amdgpu_aca.h" #include "amdgpu_wait_event.h" #include "amdgpu_eventfd.h" +#include "amdgpu_debug_event.h" #include "amdgpu_ras.h" #include "amdgpu_cper.h" #include "amdgpu_xcp.h" @@ -460,6 +461,7 @@ struct amdgpu_fpriv { struct amdgpu_eventfd_mgr eventfd_mgr; struct amdgpu_wait_event_mgr wait_event_mgr; + struct amdgpu_debug_event_mgr debug_event_mgr; }; struct drm_device; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.c new file mode 100644 index 000000000000..72a6f65ddcfd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.c @@ -0,0 +1,408 @@ +/* + * Copyright 2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/kfifo.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +#include <drm/amdgpu_drm.h> + +#include "amdgpu.h" +#include "amdgpu_debug_event.h" +#include "amdgpu_eventfd.h" +#include "amdgpu_wait_event.h" + +#define AMDGPU_DEBUG_EVENT_FIFO_SIZE 64 + +struct amdgpu_debug_event_session { + struct kref ref; + struct amdgpu_debug_event_mgr *mgr; + + spinlock_t lock; + bool detached; + + DECLARE_KFIFO_PTR(fifo, struct drm_amdgpu_debug_event_record *); + wait_queue_head_t read_wq; + wait_queue_head_t space_wq; + atomic64_t seqno; +}; + +static void amdgpu_debug_event_session_free(struct kref *ref) +{ + struct amdgpu_debug_event_session *session = + container_of(ref, struct amdgpu_debug_event_session, ref); + struct drm_amdgpu_debug_event_record *rec; + + while (kfifo_get(&session->fifo, &rec)) + kfree(rec); + + kfifo_free(&session->fifo); + kfree(session); +} + +static void amdgpu_debug_event_session_get(struct amdgpu_debug_event_session *session) +{ + kref_get(&session->ref); +} + +static void amdgpu_debug_event_session_put(struct amdgpu_debug_event_session *session) +{ + kref_put(&session->ref, amdgpu_debug_event_session_free); +} + +void amdgpu_debug_event_mgr_init(struct amdgpu_debug_event_mgr *mgr, + struct amdgpu_fpriv *fpriv) +{ + mutex_init(&mgr->lock); + mgr->fpriv = fpriv; + mgr->session = NULL; +} + +static void amdgpu_debug_event_deliver_to_app(struct amdgpu_fpriv *fpriv, + const struct drm_amdgpu_wait_event_data *data) +{ + amdgpu_eventfd_signal(&fpriv->eventfd_mgr, data->event_type, + data->queue_id); + amdgpu_wait_event_push_data(&fpriv->wait_event_mgr, data); +} + +static void amdgpu_debug_event_flush_to_app(struct amdgpu_debug_event_session *session) +{ + struct drm_amdgpu_debug_event_record *rec; + struct amdgpu_fpriv *fpriv = NULL; + + if (session->mgr) + fpriv = session->mgr->fpriv; + + while (kfifo_get(&session->fifo, &rec)) { + if (fpriv) + amdgpu_debug_event_deliver_to_app(fpriv, &rec->data); + kfree(rec); + } +} + +static void amdgpu_debug_event_detach_locked(struct amdgpu_debug_event_mgr *mgr) +{ + struct amdgpu_debug_event_session *session = mgr->session; + + if (!session) + return; + + mgr->session = NULL; + WRITE_ONCE(session->detached, true); + wake_up_all(&session->read_wq); + wake_up_all(&session->space_wq); + + /* Forward unread events to the app so detach does not block progress. */ + amdgpu_debug_event_flush_to_app(session); + amdgpu_debug_event_session_put(session); +} + +void amdgpu_debug_event_mgr_fini(struct amdgpu_debug_event_mgr *mgr) +{ + mutex_lock(&mgr->lock); + amdgpu_debug_event_detach_locked(mgr); + mgr->fpriv = NULL; + mutex_unlock(&mgr->lock); +} + +static struct amdgpu_debug_event_session * +amdgpu_debug_event_session_create(struct amdgpu_debug_event_mgr *mgr) +{ + struct amdgpu_debug_event_session *session; + int r; + + session = kzalloc(sizeof(*session), GFP_KERNEL); + if (!session) + return ERR_PTR(-ENOMEM); + + kref_init(&session->ref); + session->mgr = mgr; + spin_lock_init(&session->lock); + init_waitqueue_head(&session->read_wq); + init_waitqueue_head(&session->space_wq); + atomic64_set(&session->seqno, 0); + + r = kfifo_alloc(&session->fifo, + AMDGPU_DEBUG_EVENT_FIFO_SIZE * + sizeof(struct drm_amdgpu_debug_event_record *), + GFP_KERNEL); + if (r) { + kfree(session); + return ERR_PTR(r); + } + + return session; +} + +bool amdgpu_debug_event_attached(struct amdgpu_fpriv *fpriv) +{ + bool attached; + + if (!fpriv) + return false; + + mutex_lock(&fpriv->debug_event_mgr.lock); + attached = !!fpriv->debug_event_mgr.session; + mutex_unlock(&fpriv->debug_event_mgr.lock); + + return attached; +} + +int amdgpu_debug_event_route(struct amdgpu_fpriv *fpriv, + const struct drm_amdgpu_wait_event_data *data) +{ + struct amdgpu_debug_event_session *session; + struct drm_amdgpu_debug_event_record *rec; + unsigned long flags; + int r = 0; + + if (!fpriv || !data) + return -EINVAL; + + mutex_lock(&fpriv->debug_event_mgr.lock); + session = fpriv->debug_event_mgr.session; + if (!session) { + mutex_unlock(&fpriv->debug_event_mgr.lock); + amdgpu_debug_event_deliver_to_app(fpriv, data); + return 0; + } + amdgpu_debug_event_session_get(session); + mutex_unlock(&fpriv->debug_event_mgr.lock); + + rec = kzalloc(sizeof(*rec), GFP_ATOMIC); + if (!rec) { + r = -ENOMEM; + goto out_put; + } + + rec->len = sizeof(*rec); + rec->event_type = data->event_type; + rec->queue_id = data->queue_id; + rec->flags = DRM_AMDGPU_DEBUG_EVENT_NEED_FORWARD; + rec->seqno = atomic64_inc_return(&session->seqno); + rec->data = *data; + + spin_lock_irqsave(&session->lock, flags); + if (session->detached) { + spin_unlock_irqrestore(&session->lock, flags); + amdgpu_debug_event_deliver_to_app(fpriv, data); + kfree(rec); + goto out_put; + } + + if (!kfifo_in(&session->fifo, &rec, 1)) { + spin_unlock_irqrestore(&session->lock, flags); + /* Avoid blocking IRQ paths. Preserve app progress if debug FIFO fills. */ + amdgpu_debug_event_deliver_to_app(fpriv, data); + kfree(rec); + r = -ENOSPC; + goto out_put; + } + spin_unlock_irqrestore(&session->lock, flags); + + wake_up_interruptible_all(&session->read_wq); + +out_put: + amdgpu_debug_event_session_put(session); + return r; +} + +static __poll_t amdgpu_debug_event_poll(struct file *file, + struct poll_table_struct *wait) +{ + struct amdgpu_debug_event_session *session = file->private_data; + __poll_t mask = 0; + unsigned long flags; + bool empty; + + poll_wait(file, &session->read_wq, wait); + + spin_lock_irqsave(&session->lock, flags); + empty = kfifo_is_empty(&session->fifo); + spin_unlock_irqrestore(&session->lock, flags); + + if (READ_ONCE(session->detached)) + mask |= POLLHUP; + + if (!empty) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static long amdgpu_debug_event_read_ioctl(struct amdgpu_debug_event_session *session, + void __user *argp) +{ + struct drm_amdgpu_debug_event_record user; + struct drm_amdgpu_debug_event_record *rec = NULL; + unsigned long flags; + int r; + + if (copy_from_user(&user, argp, sizeof(user))) + return -EFAULT; + + if (user.len < sizeof(user)) { + user.len = sizeof(user); + if (copy_to_user(argp, &user, sizeof(user))) + return -EFAULT; + return -EMSGSIZE; + } + + r = wait_event_interruptible(session->read_wq, + READ_ONCE(session->detached) || + !kfifo_is_empty(&session->fifo)); + if (r) + return r; + + spin_lock_irqsave(&session->lock, flags); + if (!kfifo_get(&session->fifo, &rec)) + r = READ_ONCE(session->detached) ? -ENOTCONN : -EAGAIN; + spin_unlock_irqrestore(&session->lock, flags); + + if (r) + return r; + + if (copy_to_user(argp, rec, sizeof(*rec))) + r = -EFAULT; + + kfree(rec); + wake_up_interruptible_all(&session->space_wq); + return r; +} + +static long amdgpu_debug_event_forward_ioctl(struct amdgpu_debug_event_session *session, + void __user *argp) +{ + struct drm_amdgpu_debug_event_record rec; + struct amdgpu_fpriv *fpriv; + + if (copy_from_user(&rec, argp, sizeof(rec))) + return -EFAULT; + + if (rec.len < sizeof(rec) || !rec.event_type) + return -EINVAL; + + if (!session->mgr) + return -ENOTCONN; + + mutex_lock(&session->mgr->lock); + fpriv = session->mgr->fpriv; + if (!fpriv || session->mgr->session != session) { + mutex_unlock(&session->mgr->lock); + return -ENOTCONN; + } + amdgpu_debug_event_deliver_to_app(fpriv, &rec.data); + mutex_unlock(&session->mgr->lock); + + return 0; +} + +static long amdgpu_debug_event_fd_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct amdgpu_debug_event_session *session = file->private_data; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case DRM_AMDGPU_DEBUG_FD_READ_EVENT: + return amdgpu_debug_event_read_ioctl(session, argp); + case DRM_AMDGPU_DEBUG_FD_FORWARD_EVENT: + return amdgpu_debug_event_forward_ioctl(session, argp); + default: + return -EINVAL; + } +} + +static int amdgpu_debug_event_release(struct inode *inode, struct file *file) +{ + struct amdgpu_debug_event_session *session = file->private_data; + struct amdgpu_debug_event_mgr *mgr = session->mgr; + + if (mgr) { + mutex_lock(&mgr->lock); + if (mgr->session == session) + amdgpu_debug_event_detach_locked(mgr); + mutex_unlock(&mgr->lock); + } + + amdgpu_debug_event_session_put(session); + return 0; +} + +static const struct file_operations amdgpu_debug_event_fops = { + .owner = THIS_MODULE, + .poll = amdgpu_debug_event_poll, + .unlocked_ioctl = amdgpu_debug_event_fd_ioctl, + .compat_ioctl = amdgpu_debug_event_fd_ioctl, + .release = amdgpu_debug_event_release, +}; + +int amdgpu_debug_event_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + union drm_amdgpu_debug_event *args = data; + struct amdgpu_debug_event_session *session; + int fd; + + if (!fpriv) + return -EINVAL; + + if (args->attach.op != DRM_AMDGPU_DEBUG_EVENT_OP_ATTACH || + args->attach.flags || args->attach.reserved) + return -EINVAL; + + session = amdgpu_debug_event_session_create(&fpriv->debug_event_mgr); + if (IS_ERR(session)) + return PTR_ERR(session); + + mutex_lock(&fpriv->debug_event_mgr.lock); + if (fpriv->debug_event_mgr.session) { + mutex_unlock(&fpriv->debug_event_mgr.lock); + amdgpu_debug_event_session_put(session); + return -EBUSY; + } + + amdgpu_debug_event_session_get(session); /* manager reference */ + fpriv->debug_event_mgr.session = session; + mutex_unlock(&fpriv->debug_event_mgr.lock); + + fd = anon_inode_getfd("amdgpu_debug_event", &amdgpu_debug_event_fops, + session, O_CLOEXEC | O_RDWR); + if (fd < 0) { + mutex_lock(&fpriv->debug_event_mgr.lock); + if (fpriv->debug_event_mgr.session == session) + amdgpu_debug_event_detach_locked(&fpriv->debug_event_mgr); + mutex_unlock(&fpriv->debug_event_mgr.lock); + amdgpu_debug_event_session_put(session); + return fd; + } + + args->attach.debug_fd = fd; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.h new file mode 100644 index 000000000000..e80111ca8a3f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debug_event.h @@ -0,0 +1,52 @@ +/* + * Copyright 2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DEBUG_EVENT_H__ +#define __AMDGPU_DEBUG_EVENT_H__ + +#include <linux/mutex.h> +#include <linux/types.h> + +struct amdgpu_fpriv; +struct drm_device; +struct drm_file; +struct drm_amdgpu_wait_event_data; +struct amdgpu_debug_event_session; + +struct amdgpu_debug_event_mgr { + struct mutex lock; + struct amdgpu_fpriv *fpriv; + struct amdgpu_debug_event_session *session; +}; + +void amdgpu_debug_event_mgr_init(struct amdgpu_debug_event_mgr *mgr, + struct amdgpu_fpriv *fpriv); +void amdgpu_debug_event_mgr_fini(struct amdgpu_debug_event_mgr *mgr); + +bool amdgpu_debug_event_attached(struct amdgpu_fpriv *fpriv); +int amdgpu_debug_event_route(struct amdgpu_fpriv *fpriv, + const struct drm_amdgpu_wait_event_data *data); +int amdgpu_debug_event_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +#endif /* __AMDGPU_DEBUG_EVENT_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b768d4be7cc8..6ebbb692598d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1501,6 +1501,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_eventfd_mgr_init(&fpriv->eventfd_mgr); amdgpu_wait_event_mgr_init(&fpriv->wait_event_mgr); + amdgpu_debug_event_mgr_init(&fpriv->debug_event_mgr, fpriv); pasid = amdgpu_pasid_alloc(16); if (pasid < 0) { @@ -1599,6 +1600,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, return; /* Drop all subscriptions before fpriv goes away. */ + amdgpu_debug_event_mgr_fini(&fpriv->debug_event_mgr); amdgpu_eventfd_mgr_fini(&fpriv->eventfd_mgr); amdgpu_wait_event_mgr_fini(&fpriv->wait_event_mgr); -- 2.34.1
