Extend KFD event signaling to also notify render-node eventfd subscribers
via amdgpu_eventfd_notify(pasid, event_id, count).

Add a weak no-op amdgpu_eventfd_notify() so KFD keeps building even when
amdgpu does not provide the bridge implementation.

Use PASID already tracked by KFD (pdd->pasid, or derived from the process
pdds) to avoid drm_file-based PASID derivation.

Hook notification into:
  - CPU-driven SIGNAL events (kfd_set_event)
  - IRQ-driven SIGNAL events (kfd_signal_event_interrupt)
  - HW exception, VM fault, reset, poison consumed, and process terminate
    event paths.

This preserves existing KFD semantics while enabling render-node clients
to observe the same KFD events via standard eventfd mechanisms.

Cc: Harish Kasiviswanathan <[email protected]>
Cc: Felix Kuehling <[email protected]>
Cc: Alex Deucher <[email protected]>
Cc: Christian König <[email protected]>
Signed-off-by: Srinivasan Shanmugam <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 76 +++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 13416bff7763..00416f6ec261 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -57,6 +57,35 @@ struct kfd_signal_page {
        bool need_to_free_pages;
 };
 
+/*
+ * Optional KGD hook for render-node eventfd signaling.
+ * Weak no-op so KFD builds even when amdgpu doesn't provide it.
+ */
+__weak void amdgpu_eventfd_notify(u32 pasid, u32 event_id, u64 count)
+{
+}
+
+static u32 kfd_pasid_from_pdd(struct kfd_process_device *pdd)
+{
+       return pdd ? pdd->pasid : 0;
+}
+
+static u32 kfd_pasid_from_process(struct kfd_process *p)
+{
+       u32 i;
+
+       if (!p || !p->n_pdds)
+               return 0;
+
+       for (i = 0; i < p->n_pdds; i++) {
+               u32 pasid = kfd_pasid_from_pdd(p->pdds[i]);
+
+               if (pasid)
+                       return pasid;
+       }
+       return 0;
+}
+
 static uint64_t *page_slots(struct kfd_signal_page *page)
 {
        return page->kernel_address;
@@ -654,6 +683,7 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
 {
        int ret = 0;
        struct kfd_event *ev;
+       u32 pasid = kfd_pasid_from_process(p);
 
        rcu_read_lock();
 
@@ -670,6 +700,14 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
                ret = -EINVAL;
 
        spin_unlock(&ev->lock);
+
+       /*
+        * CPU-driven SIGNAL event (KFD_IOC_SET_EVENT) should also wake
+        * render-node eventfd subscribers for the same (PASID,event_id).
+        */
+       if (!ret && pasid)
+               amdgpu_eventfd_notify(pasid, ev->event_id, 1);
+
 unlock_rcu:
        rcu_read_unlock();
        return ret;
@@ -713,13 +751,15 @@ static void acknowledge_signal(struct kfd_process *p, 
struct kfd_event *ev)
 }
 
 static void set_event_from_interrupt(struct kfd_process *p,
-                                       struct kfd_event *ev)
+                                       struct kfd_event *ev, u32 pasid)
 {
        if (ev && event_can_be_gpu_signaled(ev)) {
                acknowledge_signal(p, ev);
                spin_lock(&ev->lock);
                set_event(ev);
                spin_unlock(&ev->lock);
+               /* Bridge: wake render-node subscribers for same 
(PASID,event_id). */
+               amdgpu_eventfd_notify(pasid, ev->event_id, 1);
        }
 }
 
@@ -744,7 +784,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t 
partial_id,
                ev = lookup_signaled_event_by_partial_id(p, partial_id,
                                                         valid_id_bits);
        if (ev) {
-               set_event_from_interrupt(p, ev);
+               set_event_from_interrupt(p, ev, pasid);
        } else if (p->signal_page) {
                /*
                 * Partial ID lookup failed. Assume that the event ID
@@ -767,7 +807,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t 
partial_id,
                                        break;
 
                                if (READ_ONCE(slots[id]) != 
UNSIGNALED_EVENT_SLOT)
-                                       set_event_from_interrupt(p, ev);
+                                       set_event_from_interrupt(p, ev, pasid);
                        }
                } else {
                        /* With relatively many events, it's faster to
@@ -777,7 +817,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t 
partial_id,
                        for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++)
                                if (READ_ONCE(slots[id]) != 
UNSIGNALED_EVENT_SLOT) {
                                        ev = lookup_event_by_id(p, id);
-                                       set_event_from_interrupt(p, ev);
+                                       set_event_from_interrupt(p, ev, pasid);
                                }
                }
        }
@@ -1107,7 +1147,7 @@ int kfd_event_mmap(struct kfd_process *p, struct 
vm_area_struct *vma)
  * Assumes that p is not going away.
  */
 static void lookup_events_by_type_and_signal(struct kfd_process *p,
-               int type, void *event_data)
+               int type, void *event_data, u32 pasid)
 {
        struct kfd_hsa_memory_exception_data *ev_data;
        struct kfd_event *ev;
@@ -1130,6 +1170,8 @@ static void lookup_events_by_type_and_signal(struct 
kfd_process *p,
                        if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
                                ev->memory_exception_data = *ev_data;
                        spin_unlock(&ev->lock);
+                       if (pasid)
+                               amdgpu_eventfd_notify(pasid, ev->event_id, 1);
                }
 
        if (type == KFD_EVENT_TYPE_MEMORY) {
@@ -1168,7 +1210,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
        if (!p)
                return; /* Presumably process exited. */
 
-       lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
+       lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL, 
pasid);
        kfd_unref_process(p);
 }
 
@@ -1233,11 +1275,14 @@ void kfd_signal_vm_fault_event(struct 
kfd_process_device *pdd,
        id = KFD_FIRST_NONSIGNAL_EVENT_ID;
        idr_for_each_entry_continue(&p->event_idr, ev, id)
                if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+                       u32 pasid = kfd_pasid_from_pdd(pdd);
                        spin_lock(&ev->lock);
                        ev->memory_exception_data = data ? *data :
                                                        memory_exception_data;
                        set_event(ev);
                        spin_unlock(&ev->lock);
+                       if (pasid)
+                               amdgpu_eventfd_notify(pasid, ev->event_id, 1);
                }
 
        rcu_read_unlock();
@@ -1251,6 +1296,7 @@ void kfd_signal_reset_event(struct kfd_node *dev)
        struct kfd_event *ev;
        unsigned int temp;
        uint32_t id, idx;
+       u32 pasid;
        int reset_cause = atomic_read(&dev->sram_ecc_flag) ?
                        KFD_HW_EXCEPTION_ECC :
                        KFD_HW_EXCEPTION_GPU_HANG;
@@ -1274,6 +1320,8 @@ void kfd_signal_reset_event(struct kfd_node *dev)
                        continue;
                }
 
+               pasid = kfd_pasid_from_pdd(pdd);
+
                if (unlikely(!pdd)) {
                        WARN_ONCE(1, "Could not get device data from process 
pid:%d\n",
                                  p->lead_thread->pid);
@@ -1312,6 +1360,9 @@ void kfd_signal_reset_event(struct kfd_node *dev)
                                ev->hw_exception_data.gpu_id = user_gpu_id;
                                set_event(ev);
                                spin_unlock(&ev->lock);
+
+                               if (pasid)
+                                       amdgpu_eventfd_notify(pasid, 
ev->event_id, 1);
                        }
                        if (ev->type == KFD_EVENT_TYPE_MEMORY &&
                            reset_cause == KFD_HW_EXCEPTION_ECC) {
@@ -1320,6 +1371,9 @@ void kfd_signal_reset_event(struct kfd_node *dev)
                                ev->memory_exception_data.gpu_id = user_gpu_id;
                                set_event(ev);
                                spin_unlock(&ev->lock);
+
+                               if (pasid)
+                                       amdgpu_eventfd_notify(pasid, 
ev->event_id, 1);
                        }
                }
 
@@ -1367,6 +1421,9 @@ void kfd_signal_poison_consumed_event(struct kfd_node 
*dev, u32 pasid)
                        ev->hw_exception_data = hw_exception_data;
                        set_event(ev);
                        spin_unlock(&ev->lock);
+
+                       if (pasid)
+                               amdgpu_eventfd_notify(pasid, ev->event_id, 1);
                }
 
                if (ev->type == KFD_EVENT_TYPE_MEMORY) {
@@ -1374,6 +1431,9 @@ void kfd_signal_poison_consumed_event(struct kfd_node 
*dev, u32 pasid)
                        ev->memory_exception_data = memory_exception_data;
                        set_event(ev);
                        spin_unlock(&ev->lock);
+
+                       if (pasid)
+                               amdgpu_eventfd_notify(pasid, ev->event_id, 1);
                }
        }
 
@@ -1394,6 +1454,7 @@ void kfd_signal_process_terminate_event(struct 
kfd_process *p)
 {
        struct kfd_event *ev;
        u32 id;
+       u32 pasid = kfd_pasid_from_process(p);
 
        rcu_read_lock();
 
@@ -1404,6 +1465,9 @@ void kfd_signal_process_terminate_event(struct 
kfd_process *p)
                        spin_lock(&ev->lock);
                        set_event(ev);
                        spin_unlock(&ev->lock);
+
+                       if (pasid)
+                               amdgpu_eventfd_notify(pasid, ev->event_id, 1);
                }
 
        /* Send SIGBUS to p->lead_thread */
-- 
2.34.1

Reply via email to