Added debug logging to provide insights into KGD/KFD scheduling, cleaner
shader emission, and isolation processes.

Cc: Christian König <[email protected]>
Cc: Alex Deucher <[email protected]>
Signed-off-by: Srinivasan Shanmugam <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  5 ++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 89f7c89d1392..ef985a7bda1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1912,16 +1912,19 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct 
amdgpu_device *adev, u32 idx,
 
                adev->gfx.kfd_sch_req_count[idx]--;
 
+               dev_dbg(adev->dev, "%s: Enabling KFD scheduler for idx: %u\n", 
__func__, idx);
                if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
                    adev->gfx.kfd_sch_inactive[idx]) {
                        
schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
                                              
msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
                }
        } else {
+               dev_dbg(adev->dev, "%s: Disabling KFD scheduler for idx: %u\n", 
__func__, idx);
                if (adev->gfx.kfd_sch_req_count[idx] == 0) {
                        
cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
                        if (!adev->gfx.kfd_sch_inactive[idx]) {
                                amdgpu_amdkfd_stop_sched(adev, idx);
+                               dev_dbg(adev->dev, "%s: KFD stopped\n", 
__func__);
                                adev->gfx.kfd_sch_inactive[idx] = true;
                        }
                }
@@ -1960,7 +1963,12 @@ void amdgpu_gfx_enforce_isolation_handler(struct 
work_struct *work)
        if (idx >= MAX_XCP)
                return;
 
+       dev_dbg(adev->dev, "%s: Starting enforce isolation handler for idx: 
%u\n",
+               __func__, idx);
+
        mutex_lock(&adev->enforce_isolation_mutex);
+       dev_dbg(adev->dev, "%s: Processing fences for idx: %u. Checking GFX and 
Compute rings.\n",
+               __func__, idx);
        for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
                if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
                        fences += 
amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
@@ -1969,16 +1977,23 @@ void amdgpu_gfx_enforce_isolation_handler(struct 
work_struct *work)
                if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
                        fences += 
amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
        }
+       dev_dbg(adev->dev, "%s: Total fences processed for idx: %u: %u\n",
+               __func__, idx, fences);
        if (fences) {
+               dev_dbg(adev->dev, "%s: Fences detected for idx: %u, scheduling 
delayed work\n",
+                       __func__, idx);
                /* we've already had our timeslice, so let's wrap this up */
                schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
                                      msecs_to_jiffies(1));
        } else {
+               dev_dbg(adev->dev, "%s: No fences detected for idx: %u, no 
delayed work scheduled\n",
+                       __func__, idx);
                /* Tell KFD to resume the runqueue */
                if (adev->kfd.init_complete) {
                        WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
                        WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
                                amdgpu_amdkfd_start_sched(adev, idx);
+                               dev_dbg(adev->dev, "%s: KFD started\n", 
__func__);
                                adev->gfx.kfd_sch_inactive[idx] = false;
                }
        }
@@ -2009,6 +2024,8 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct 
amdgpu_device *adev,
                if (!adev->gfx.enforce_isolation_jiffies[idx]) {
                        adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
                        adev->gfx.enforce_isolation_time[idx] = 
GFX_SLICE_PERIOD_MS;
+                       dev_dbg(adev->dev, "%s: Initializing time slice for 
idx: %u. Setting start time and duration for fair GPU access between KGD and 
KFD. Initial jiffies: %lu, Time slice: %u ms\n",
+                               __func__, idx, jiffies, GFX_SLICE_PERIOD_MS);
                }
                /* Make sure KFD gets a chance to run */
                if (amdgpu_amdkfd_compute_active(adev, idx)) {
@@ -2020,19 +2037,28 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct 
amdgpu_device *adev,
                                        wait = true;
                                        /* reset the timer period */
                                        adev->gfx.enforce_isolation_time[idx] = 
GFX_SLICE_PERIOD_MS;
+                                       dev_dbg(adev->dev, "%s: Time slice 
expired for idx: %u. Allowing KGD to finish its work before KFD resumes. 
Resetting time slice to %u ms\n",
+                                               __func__, idx, 
GFX_SLICE_PERIOD_MS);
                                } else {
                                        /* set the timer period to what's left 
in our time slice */
                                        adev->gfx.enforce_isolation_time[idx] =
                                                GFX_SLICE_PERIOD_MS - 
jiffies_to_msecs(cjiffies);
+                                       dev_dbg(adev->dev, "%s: Adjusting 
remaining time slice for idx: %u. KFD can use the GPU for %lu more ms\n",
+                                               __func__, idx,
+                                               
adev->gfx.enforce_isolation_time[idx]);
                                }
                        } else {
                                /* if jiffies wrap around we will just wait a 
little longer */
                                adev->gfx.enforce_isolation_jiffies[idx] = 
jiffies;
+                               dev_dbg(adev->dev, "%s: Jiffies reset for idx: 
%u. Starting new time slice to ensure fair GPU access between KGD and KFD. New 
jiffies: %lu\n",
+                                       __func__, idx, jiffies);
                        }
                } else {
                        /* if there is no KFD work, then set the full slice 
period */
                        adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
                        adev->gfx.enforce_isolation_time[idx] = 
GFX_SLICE_PERIOD_MS;
+                       dev_dbg(adev->dev, "%s: No KFD work for idx: %u. 
Setting full slice period to %u ms\n",
+                               __func__, idx, GFX_SLICE_PERIOD_MS);
                }
        }
        mutex_unlock(&adev->enforce_isolation_mutex);
@@ -2055,6 +2081,8 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct 
amdgpu_ring *ring)
        struct amdgpu_device *adev = ring->adev;
        u32 idx;
 
+       dev_dbg(adev->dev, "%s: begin_use\n", __func__);
+
        if (!adev->gfx.enable_cleaner_shader)
                return;
 
@@ -2089,6 +2117,8 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct 
amdgpu_ring *ring)
        struct amdgpu_device *adev = ring->adev;
        u32 idx;
 
+       dev_dbg(adev->dev, "%s: end_use\n", __func__);
+
        if (!adev->gfx.enable_cleaner_shader)
                return;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c9c48b782ec1..06b527388402 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -688,8 +688,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
 
        if (adev->gfx.enable_cleaner_shader &&
            ring->funcs->emit_cleaner_shader &&
-           job->enforce_isolation)
+           job->enforce_isolation) {
+               dev_dbg(ring->adev->dev, "%s: Emitting cleaner shader for ring: 
%s\n",
+                       __func__, ring->name);
                ring->funcs->emit_cleaner_shader(ring);
+       }
 
        if (vm_flush_needed) {
                trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
-- 
2.34.1

Reply via email to