Re: [PATCH v3] drm/amdgpu: add support for user trap handlers

2021-05-06 Thread Samuel Pitoiset

Added GF10.3 support.

I re-tested this on GFX9 and it still works, though on GFX10+ the trap 
handler is never reached, is there something obviously wrong in this patch?


Thanks!

On 5/6/21 8:54 AM, Samuel Pitoiset wrote:

A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

Signed-off-by: Samuel Pitoiset 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  3 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 47 
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 19 ++
  include/uapi/drm/amdgpu_drm.h|  8 
  9 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 90136f9dedd6..0cc9f5eb0484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -103,6 +103,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,
return r;
  }
  
+static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,

+struct drm_amdgpu_cs_chunk_trap *data,
+uint64_t *tba_addr, uint64_t *tma_addr)
+{
+   if (!data->tba_addr || !data->tma_addr)
+   return -EINVAL;
+
+   *tba_addr = data->tba_addr;
+   *tma_addr = data->tma_addr;
+
+   return 0;
+}
+
  static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
drm_amdgpu_cs *cs)
  {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -111,6 +124,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
+   uint64_t tba_addr = 0, tma_addr = 0;
int i;
int ret;
  
@@ -213,6 +227,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
  
  			break;
  
+		case AMDGPU_CHUNK_ID_TRAP:

+   size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+   if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+   ret = -EINVAL;
+   goto free_partial_kdata;
+   }
+
+   ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+   &tba_addr, &tma_addr);
+   if (ret)
+   goto free_partial_kdata;
+   break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -238,6 +265,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
  
  	if (p->uf_entry.tv.bo)

p->job->uf_addr = uf_offset;
+
+   p->job->tba_addr = tba_addr;
+   p->job->tma_addr = tma_addr;
+
kvfree(chunk_array);
  
  	/* Use this opportunity to fill in task info for the vm */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d4a40cd0fe09..21c3b6eaf359 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -94,9 +94,10 @@
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
   * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
   * - 3.41.0 - Add video codec query
+ * - 3.42.0 - Add AMDGPU_CHUNK_ID_TRAP
   */
  #define KMS_DRIVER_MAJOR  3
-#define KMS_DRIVER_MINOR   41
+#define KMS_DRIVER_MINOR   42
  #define KMS_DRIVER_PATCHLEVEL 0
  
  int amdgpu_vram_limit;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 0c3b4fa1f936..d165970ffdd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
uint32_toa_base;
uint32_toa_size;
  
+	/* user trap */

+   uint64_ttba_addr;
+   uint64_ttma_addr;
+
unsignedpasid;
struct dma_fence*pasid_mapping;
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 81caac9b958a..b8ed5b13e

[PATCH v3] drm/amdgpu: add support for user trap handlers

2021-05-06 Thread Samuel Pitoiset
A trap handler can be used by userspace to catch shader exceptions
like divide by zero, memory violations etc.

On GFX6-GFX8, the registers used to configure TBA/TMA aren't
privileged and can be configured from userpace.

On GFX9+ they are per VMID and privileged, only the KMD can
configure them. At the moment, we don't know how to set them
via the CP, so they are only emitted if a VMID is reserved.

This introduces a new CS chunk that can be used to set the
TBA/TMA virtual address at submit time.

Signed-off-by: Samuel Pitoiset 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 47 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 19 ++
 include/uapi/drm/amdgpu_drm.h|  8 
 9 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 90136f9dedd6..0cc9f5eb0484 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -103,6 +103,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,
return r;
 }
 
+static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+struct drm_amdgpu_cs_chunk_trap *data,
+uint64_t *tba_addr, uint64_t *tma_addr)
+{
+   if (!data->tba_addr || !data->tma_addr)
+   return -EINVAL;
+
+   *tba_addr = data->tba_addr;
+   *tma_addr = data->tma_addr;
+
+   return 0;
+}
+
 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
drm_amdgpu_cs *cs)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -111,6 +124,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
+   uint64_t tba_addr = 0, tma_addr = 0;
int i;
int ret;
 
@@ -213,6 +227,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
 
break;
 
+   case AMDGPU_CHUNK_ID_TRAP:
+   size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+   if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+   ret = -EINVAL;
+   goto free_partial_kdata;
+   }
+
+   ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+   &tba_addr, &tma_addr);
+   if (ret)
+   goto free_partial_kdata;
+   break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -238,6 +265,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
 
if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset;
+
+   p->job->tba_addr = tba_addr;
+   p->job->tma_addr = tma_addr;
+
kvfree(chunk_array);
 
/* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d4a40cd0fe09..21c3b6eaf359 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -94,9 +94,10 @@
  * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
  * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
  * - 3.41.0 - Add video codec query
+ * - 3.42.0 - Add AMDGPU_CHUNK_ID_TRAP
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   41
+#define KMS_DRIVER_MINOR   42
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 0c3b4fa1f936..d165970ffdd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
uint32_toa_base;
uint32_toa_size;
 
+   /* user trap */
+   uint64_ttba_addr;
+   uint64_ttma_addr;
+
unsignedpasid;
struct dma_fence*pasid_mapping;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 81caac9b958a..b8ed5b13ea44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -62,6 +62,10 @@ struct amdgpu_job {
/* user fence handling */
uint64_tuf