On 3/5/26 04:07, Srinivasan Shanmugam wrote:
> Introduce a per-drm_file eventfd manager to support render-node event
> subscriptions.
>
> The manager is implemented in amdgpu_eventfd.[ch] and is owned by the
> drm_file (amdgpu_fpriv). It maps event_id -> eventfd_id object, where
> each eventfd_id can have multiple eventfds bound (fan-out).
>
> The design is IRQ-safe for signaling: IRQ path takes the xarray lock
> (irqsave) and signals eventfds while still holding the lock.
>
> This patch only adds the core manager
>
> v4:
> - Use eventfd_ctx pointer as binding identity instead of fd number
> - Make duplicate (event_id, ctx) binds idempotent
> - Replace mgr lock with atomic bind limit
> - Add helper for xa get-or-create event_id
>
> Cc: Alex Deucher <[email protected]>
> Suggested-by: Christian König <[email protected]>
> Signed-off-by: Srinivasan Shanmugam <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c | 279 ++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h | 59 +++++
> 3 files changed, 340 insertions(+), 1 deletion(-)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 006d49d6b4af..30b1cf3c6cdf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -67,7 +67,8 @@ amdgpu-y += amdgpu_device.o amdgpu_reg_access.o
> amdgpu_doorbell_mgr.o amdgpu_kms
> amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> amdgpu_dev_coredump.o \
> - amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
> + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o \
> + amdgpu_eventfd.o
>
> amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c
> new file mode 100644
> index 000000000000..9806ec515cfc
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c
> @@ -0,0 +1,279 @@
> +/*
> + * Copyright 2026 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +/*
> + * Render-node eventfd subscription infrastructure.
> + */
> +
> +#include <linux/slab.h>
> +#include <linux/err.h>
> +
> +#include "amdgpu_eventfd.h"
> +
> +#define AMDGPU_EVENTFD_MAX_BINDS 4096
> +
> +/*
> + * Helper: caller holds xa_lock_irqsave(&mgr->ids, flags).
It would be better if that function is called without holding a lock and also
allocates the new entry.
> + *
> + * If id exists -> returns it, keeps new_id untouched (caller frees it).
> + * If id missing -> stores new_id and consumes it (sets *new_id = NULL).
> + */
> +static struct amdgpu_eventfd_id *
> +amdgpu_eventfd_get_or_create_id_locked(struct amdgpu_eventfd_mgr *mgr,
> + u32 event_id,
> + struct amdgpu_eventfd_id **new_id)
> +{
> + struct amdgpu_eventfd_id *id;
> +
> + id = xa_load(&mgr->ids, event_id);
> + if (id)
> + return id;
> +
> + if (!*new_id)
> + return NULL;
> +
> + if (xa_err(__xa_store(&mgr->ids, event_id, *new_id,
You can simplyfy this by using xa_insert(), that function returns -EBUSY when
there is already an entry at the specific location.
GFP_NOWAIT)))
That is incorrect. If xa_store can't drop and retacke the lock you would need
to use GFP_ATOMIC here.
> + return NULL;
> +
> + id = *new_id;
> + *new_id = NULL;
> + return id;
> +}
> +
> +static struct amdgpu_eventfd_id *amdgpu_eventfd_id_alloc(u32 event_id)
> +{
> + struct amdgpu_eventfd_id *id;
> +
> + id = kzalloc(sizeof(*id), GFP_KERNEL);
> + if (!id)
> + return NULL;
> +
> + id->event_id = event_id;
> + INIT_HLIST_HEAD(&id->entries);
> + id->n_entries = 0;
> + return id;
> +}
> +
> +/*
> + * mgr lifetime is tied to fpriv:
> + * - init at open
> + * - fini at postclose (after unregister / wait for ongoing IRQs if needed)
> + */
> +void amdgpu_eventfd_mgr_init(struct amdgpu_eventfd_mgr *mgr)
> +{
> + xa_init_flags(&mgr->ids, XA_FLAGS_LOCK_IRQ);
> + atomic_set(&mgr->bind_count, 0);
> +}
> +
> +void amdgpu_eventfd_mgr_fini(struct amdgpu_eventfd_mgr *mgr)
> +{
> + unsigned long index;
> + struct amdgpu_eventfd_id *id;
> +
> + /*
> + * Final teardown: keep xa locked while we remove ids and
> + * drop all eventfd references (IRQ-safe).
> + */
> + xa_lock_irq(&mgr->ids);
That is only necessary if the we didn't waited on IRQs before, but I think we
should do exactly that in the next patch set.
> + xa_for_each(&mgr->ids, index, id) {
> + struct amdgpu_eventfd_entry *e;
> + struct hlist_node *tmp;
> +
> + __xa_erase(&mgr->ids, index);
That is superflous, xa_destroy() takes case of removing all entries and is much
faster.
> +
> + hlist_for_each_entry_safe(e, tmp, &id->entries, hnode) {
> + hlist_del(&e->hnode);
> + id->n_entries--;
> + eventfd_ctx_put(e->ctx);
> + kfree(e);
> + }
> +
> + kfree(id);
> + }
> + xa_unlock_irq(&mgr->ids);
> +
> + xa_destroy(&mgr->ids);
> +}
> +
> +int amdgpu_eventfd_bind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int
> eventfd)
> +{
> + struct amdgpu_eventfd_id *id, *new_id = NULL;
> + struct amdgpu_eventfd_entry *e = NULL;
> + struct eventfd_ctx *ctx;
> + unsigned long flags;
> + bool found = false;
> + int r;
> +
> + if (!mgr || !event_id || eventfd < 0)
> + return -EINVAL;
> +
> + /* Enforce total bind limit (atomic, no mgr lock). */
> + if (atomic_inc_return(&mgr->bind_count) > AMDGPU_EVENTFD_MAX_BINDS) {
> + atomic_dec(&mgr->bind_count);
> + return -ENOSPC;
> + }
> +
> + /*
> + * Allocate objects first (can sleep). Take references later.
> + * new_id is cheap even if unused.
> + */
> + new_id = amdgpu_eventfd_id_alloc(event_id);
> + e = kzalloc(sizeof(*e), GFP_KERNEL);
Only allocate that after all the pre-requisites have been checked. E.g. if ctx,
id etc... have been allocated.
> + if (!e) {
> + r = -ENOMEM;
> + goto err_dec_limit;
> + }
> +
> + ctx = eventfd_ctx_fdget(eventfd);
> + if (IS_ERR(ctx)) {
> + r = PTR_ERR(ctx);
> + goto err_free_entry;
> + }
> + e->ctx = ctx;
> +
> + xa_lock_irqsave(&mgr->ids, flags);
> +
> + id = amdgpu_eventfd_get_or_create_id_locked(mgr, event_id, &new_id);
Move all the functionality of this into amdgpu_eventfd_id_alloc(). It is not
problematic that we drop and take the lock multiple times as far as I see.
Regards,
Christian.
> + if (!id) {
> + xa_unlock_irqrestore(&mgr->ids, flags);
> + r = -ENOMEM;
> + goto err_put_ctx_free_newid;
> + }
> +
> + /* Duplicate bind policy: idempotent no-op. Compare ctx pointers. */
> + {
> + struct amdgpu_eventfd_entry *it;
> +
> + hlist_for_each_entry(it, &id->entries, hnode) {
> + if (it->ctx == ctx) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (!found) {
> + hlist_add_head(&e->hnode, &id->entries);
> + id->n_entries++;
> + }
> + }
> +
> + xa_unlock_irqrestore(&mgr->ids, flags);
> +
> + /* If event_id already existed, drop unused allocation. */
> + kfree(new_id);
> +
> + if (found) {
> + /* Drop the new reference + entry; keep existing binding. */
> + eventfd_ctx_put(ctx);
> + kfree(e);
> + atomic_dec(&mgr->bind_count);
> + }
> +
> + return 0;
> +
> +err_put_ctx_free_newid:
> + kfree(new_id);
> + eventfd_ctx_put(ctx);
> +err_free_entry:
> + kfree(e);
> +err_dec_limit:
> + atomic_dec(&mgr->bind_count);
> + return r;
> +}
> +
> +int amdgpu_eventfd_unbind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int
> eventfd)
> +{
> + struct amdgpu_eventfd_id *id;
> + struct amdgpu_eventfd_entry *e;
> + struct hlist_node *tmp;
> + struct eventfd_ctx *ctx;
> + unsigned long flags;
> + bool removed = false;
> +
> + if (!mgr || !event_id || eventfd < 0)
> + return -EINVAL;
> +
> + ctx = eventfd_ctx_fdget(eventfd);
> + if (IS_ERR(ctx))
> + return PTR_ERR(ctx);
> +
> + xa_lock_irqsave(&mgr->ids, flags);
> +
> + id = xa_load(&mgr->ids, event_id);
> + if (!id)
> + goto out_unlock;
> +
> + hlist_for_each_entry_safe(e, tmp, &id->entries, hnode) {
> + if (e->ctx == ctx) {
> + hlist_del(&e->hnode);
> + id->n_entries--;
> + removed = true;
> +
> + eventfd_ctx_put(e->ctx);
> + kfree(e);
> +
> + atomic_dec(&mgr->bind_count);
> +
> + if (id->n_entries == 0) {
> + __xa_erase(&mgr->ids, event_id);
> + kfree(id);
> + }
> + break;
> + }
> + }
> +
> +out_unlock:
> + xa_unlock_irqrestore(&mgr->ids, flags);
> +
> + /* Drop the temporary ref from fdget */
> + eventfd_ctx_put(ctx);
> +
> + return removed ? 0 : -ENOENT;
> +}
> +
> +void amdgpu_eventfd_signal(struct amdgpu_eventfd_mgr *mgr, u32 event_id)
> +{
> + struct amdgpu_eventfd_id *id;
> + struct amdgpu_eventfd_entry *e;
> + unsigned long flags;
> +
> + if (!mgr || !event_id)
> + return;
> +
> + /*
> + * IRQ-safe: keep xa locked while signaling.
> + * eventfd_signal(ctx) is IRQ-safe.
> + */
> + xa_lock_irqsave(&mgr->ids, flags);
> +
> + id = xa_load(&mgr->ids, event_id);
> + if (!id) {
> + xa_unlock_irqrestore(&mgr->ids, flags);
> + return;
> + }
> +
> + hlist_for_each_entry(e, &id->entries, hnode)
> + eventfd_signal(e->ctx);
> +
> + xa_unlock_irqrestore(&mgr->ids, flags);
> +}
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h
> new file mode 100644
> index 000000000000..248afb1f2f14
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h
> @@ -0,0 +1,59 @@
> +/*
> + * Copyright 2026 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +/*
> + * Render-node eventfd subscription infrastructure.
> + */
> +
> +#ifndef __AMDGPU_EVENTFD_H__
> +#define __AMDGPU_EVENTFD_H__
> +
> +#include <linux/eventfd.h>
> +#include <linux/xarray.h>
> +#include <linux/atomic.h>
> +
> +struct amdgpu_eventfd_entry {
> + struct eventfd_ctx *ctx;
> + struct hlist_node hnode;
> +};
> +
> +struct amdgpu_eventfd_id {
> + u32 event_id;
> + struct hlist_head entries;
> + u32 n_entries;
> +};
> +
> +struct amdgpu_eventfd_mgr {
> + struct xarray ids; /* event_id -> struct amdgpu_eventfd_id* */
> + atomic_t bind_count; /* total binds across all event_ids */
> +};
> +
> +void amdgpu_eventfd_mgr_init(struct amdgpu_eventfd_mgr *mgr);
> +void amdgpu_eventfd_mgr_fini(struct amdgpu_eventfd_mgr *mgr);
> +
> +int amdgpu_eventfd_bind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int
> eventfd);
> +int amdgpu_eventfd_unbind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int
> eventfd);
> +
> +void amdgpu_eventfd_signal(struct amdgpu_eventfd_mgr *mgr, u32 event_id);
> +
> +#endif /* __AMDGPU_EVENTFD_H__ */