Re: [RFC PATCH v1 3/3] drm/msm: Improve the a6xx page fault handler

2020-11-09 Thread Rob Clark
On Mon, Nov 9, 2020 at 2:23 PM Jordan Crouse  wrote:
>
> Use the new adreno-smmu-priv fault info function to get more SMMU
> debug registers and print the current TTBR0 to debug per-instance
> pagetables and figure out which GPU block generated the request.
>
> Signed-off-by: Jordan Crouse 
> ---
>
>  drivers/gpu/drm/msm/adreno/a5xx_gpu.c |  4 +-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 76 +--
>  drivers/gpu/drm/msm/msm_iommu.c   | 11 +++-
>  drivers/gpu/drm/msm/msm_mmu.h |  4 +-
>  4 files changed, 87 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index d6804a802355..ed4cb81af874 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -933,7 +933,7 @@ bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer 
> *ring)
> return true;
>  }
>
> -static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
> +static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void 
> *data)
>  {
> struct msm_gpu *gpu = arg;
> pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d 
> (%u,%u,%u,%u)\n",
> @@ -943,7 +943,7 @@ static int a5xx_fault_handler(void *arg, unsigned long 
> iova, int flags)
> gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
> gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
>
> -   return -EFAULT;
> +   return 0;
>  }
>
>  static void a5xx_cp_err_irq(struct msm_gpu *gpu)
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 948f3656c20c..ac6e8cd5cf1a 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -905,18 +905,88 @@ static void a6xx_recover(struct msm_gpu *gpu)
> msm_gpu_hw_init(gpu);
>  }
>
> -static int a6xx_fault_handler(void *arg, unsigned long iova, int flags)
> +static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
> +{
> +   static const char *uche_clients[7] = {
> +   "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
> +   };
> +   u32 val;
> +
> +   if (mid < 1 || mid > 3)
> +   return "UNKNOWN";
> +
> +   /*
> +* The source of the data depends on the mid ID read from FSYNR1.
> +* and the client ID read from the UCHE block
> +*/
> +   val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
> +
> +   /* mid = 3 is most precise and refers to only one block per client */
> +   if (mid == 3)
> +   return uche_clients[val & 7];
> +
> +   /* For mid=2 the source is TP or VFD except when the client id is 0 */
> +   if (mid == 2)
> +   return ((val & 7) == 0) ? "TP" : "TP|VFD";
> +
> +   /* For mid=1 just return "UCHE" as a catchall for everything else */
> +   return "UCHE";
> +}
> +
> +static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
> +{
> +   if (id == 0)
> +   return "CP";
> +   else if (id == 4)
> +   return "CCU";
> +   else if (id == 6)
> +   return "CDP Prefetch";
> +
> +   return a6xx_uche_fault_block(gpu, id);
> +}
> +
> +#define ARM_SMMU_FSR_TF BIT(1)
> +#define ARM_SMMU_FSR_PFBIT(3)
> +#define ARM_SMMU_FSR_EFBIT(4)
> +
> +static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void 
> *data)
>  {
> struct msm_gpu *gpu = arg;
> +   struct adreno_smmu_fault_info *info = data;
> +   const char *type = "UNKNOWN";
>
> -   pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d 
> (%u,%u,%u,%u)\n",
> +   /*
> +* Print a default message if we couldn't get the data from the
> +* adreno-smmu-priv
> +*/
> +   if (!info) {
> +   pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d 
> (%u,%u,%u,%u)\n",
> iova, flags,
> gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
> gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
> gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
> gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
>
> -   return -EFAULT;
> +   return 0;
> +   }
> +
> +   if (info->fsr & ARM_SMMU_FSR_TF)
> +   type = "TRANSLATION";
> +   else if (info->fsr & ARM_SMMU_FSR_PF)
> +   type = "PERMISSION";
> +   else if (info->fsr & ARM_SMMU_FSR_EF)
> +   type = "EXTERNAL";
> +
> +   pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s 
> type=%s source=%s (%u,%u,%u,%u)\n",
> +   info->ttbr0, iova,
> +   flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", type,
> +   a6xx_fault_block(gpu, info->fsynr1 & 0xff),
> +   gpu_read(gpu, 

[RFC PATCH v1 3/3] drm/msm: Improve the a6xx page fault handler

2020-11-09 Thread Jordan Crouse
Use the new adreno-smmu-priv fault info function to get more SMMU
debug registers and print the current TTBR0 to debug per-instance
pagetables and figure out which GPU block generated the request.

Signed-off-by: Jordan Crouse 
---

 drivers/gpu/drm/msm/adreno/a5xx_gpu.c |  4 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 76 +--
 drivers/gpu/drm/msm/msm_iommu.c   | 11 +++-
 drivers/gpu/drm/msm/msm_mmu.h |  4 +-
 4 files changed, 87 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index d6804a802355..ed4cb81af874 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -933,7 +933,7 @@ bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer 
*ring)
return true;
 }
 
-static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
+static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void 
*data)
 {
struct msm_gpu *gpu = arg;
pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d 
(%u,%u,%u,%u)\n",
@@ -943,7 +943,7 @@ static int a5xx_fault_handler(void *arg, unsigned long 
iova, int flags)
gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 
-   return -EFAULT;
+   return 0;
 }
 
 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 948f3656c20c..ac6e8cd5cf1a 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -905,18 +905,88 @@ static void a6xx_recover(struct msm_gpu *gpu)
msm_gpu_hw_init(gpu);
 }
 
-static int a6xx_fault_handler(void *arg, unsigned long iova, int flags)
+static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
+{
+   static const char *uche_clients[7] = {
+   "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
+   };
+   u32 val;
+
+   if (mid < 1 || mid > 3)
+   return "UNKNOWN";
+
+   /*
+* The source of the data depends on the mid ID read from FSYNR1.
+* and the client ID read from the UCHE block
+*/
+   val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
+
+   /* mid = 3 is most precise and refers to only one block per client */
+   if (mid == 3)
+   return uche_clients[val & 7];
+
+   /* For mid=2 the source is TP or VFD except when the client id is 0 */
+   if (mid == 2)
+   return ((val & 7) == 0) ? "TP" : "TP|VFD";
+
+   /* For mid=1 just return "UCHE" as a catchall for everything else */
+   return "UCHE";
+}
+
+static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
+{
+   if (id == 0)
+   return "CP";
+   else if (id == 4)
+   return "CCU";
+   else if (id == 6)
+   return "CDP Prefetch";
+
+   return a6xx_uche_fault_block(gpu, id);
+}
+
+#define ARM_SMMU_FSR_TF BIT(1)
+#define ARM_SMMU_FSR_PFBIT(3)
+#define ARM_SMMU_FSR_EFBIT(4)
+
+static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void 
*data)
 {
struct msm_gpu *gpu = arg;
+   struct adreno_smmu_fault_info *info = data;
+   const char *type = "UNKNOWN";
 
-   pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d 
(%u,%u,%u,%u)\n",
+   /*
+* Print a default message if we couldn't get the data from the
+* adreno-smmu-priv
+*/
+   if (!info) {
+   pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d 
(%u,%u,%u,%u)\n",
iova, flags,
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
 
-   return -EFAULT;
+   return 0;
+   }
+
+   if (info->fsr & ARM_SMMU_FSR_TF)
+   type = "TRANSLATION";
+   else if (info->fsr & ARM_SMMU_FSR_PF)
+   type = "PERMISSION";
+   else if (info->fsr & ARM_SMMU_FSR_EF)
+   type = "EXTERNAL";
+
+   pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s 
type=%s source=%s (%u,%u,%u,%u)\n",
+   info->ttbr0, iova,
+   flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ", type,
+   a6xx_fault_block(gpu, info->fsynr1 & 0xff),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
+   gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
+
+   return 0;
 }
 
 static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)