On 2018年07月04日 23:04, Andrey Grodzovsky wrote:
Extract and present the reposnsible process and thread when
VM_FAULT happens.
Signed-off-by: Andrey Grodzovsky <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 10 ++++++++--
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 +++++++--
3 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 7a625f3..1c483ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -187,6 +187,18 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser
*p, void *data)
if (p->uf_entry.robj)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
+
+ /* Use this opportunity to fill in task info for the vm */
+ if (!vm->task_info.pid) {
+ vm->task_info.pid = current->pid;
+ get_task_comm(vm->task_info.task_name, current);
+
+ if (current->group_leader->mm == current->mm) {
+ vm->task_info.tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info.process_name,
current->group_leader);
+ }
+ }
+
you can wrap this segment to a function like amdgpu_vm_set_task_info.
return 0;
free_all_kdata:
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 08753e7..7ad19f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -46,6 +46,7 @@
#include "ivsrcid/ivsrcid_vislands30.h"
+#include "amdgpu_vm.h"
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1449,8 +1450,13 @@ static int gmc_v8_0_process_interrupt(struct
amdgpu_device *adev,
gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) {
- dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
- entry->src_id, entry->src_data[0]);
+ struct amdgpu_task_info task_info = { 0 };
+
+ amdgpu_vm_task_info(adev, entry->pasid, &task_info);
you can rename this function to amdgpu_vm_get_task_info.
general, it looks very good to me and does what I want to do before.
Thanks,
David Zhou
+
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid
%d thread %s pid %d\n",
+ entry->src_id, entry->src_data[0],
task_info.process_name,
+ task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR
0x%08X\n",
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS
0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 691a659..384a89c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -259,11 +259,16 @@ static int gmc_v9_0_process_interrupt(struct
amdgpu_device *adev,
}
if (printk_ratelimit()) {
+ struct amdgpu_task_info task_info = { 0 };
+
+ amdgpu_vm_task_info(adev, entry->pasid, &task_info);
+
dev_err(adev->dev,
- "[%s] VMC page fault (src_id:%u ring:%u vmid:%u
pasid:%u)\n",
+ "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u,
for process %s pid %d thread %s pid %d\n)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid);
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
dev_err(adev->dev, " at page 0x%016llx from %d\n",
addr, entry->client_id);
if (!amdgpu_sriov_vf(adev))
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx