Re: [PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training

2020-01-20 Thread Yin, Tianci (Rico)
[AMD Official Use Only - Internal Distribution Only]

Thanks very much Christian!

Please review again.

Rico

From: Christian König 
Sent: Monday, January 20, 2020 19:58
To: Yin, Tianci (Rico) ; amd-gfx@lists.freedesktop.org 

Cc: Long, Gang ; Xu, Feifei ; Wang, 
Kevin(Yang) ; Tuikov, Luben ; 
Deucher, Alexander ; Zhang, Hawking 
; Koenig, Christian ; Yuan, 
Xiaojie 
Subject: Re: [PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in 
GDDR6 memory training

Am 20.01.20 um 12:08 schrieb Tianci Yin:
> From: "Tianci.Yin" 
>
> [why]
> In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by
> UMC, that causes problems(like GTT corrupted and page fault observed).
>
> [how]
> Saving the content of this bottom VRAM to system memory before training, and
> restoring it after training to avoid VRAM corruption.

You need to re-order the patches, this one should come first and the
other one last.

One more style nit pick below.

>
> Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53
> Signed-off-by: Tianci.Yin 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 ++
>   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 32 -
>   2 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> index 3265487b859f..611021514c52 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> @@ -172,6 +172,8 @@ struct psp_dtm_context {
>   #define MEM_TRAIN_SYSTEM_SIGNATURE  0x54534942
>   #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES   0x1000
>   #define GDDR6_MEM_TRAINING_OFFSET   0x8000
> +/*Define the VRAM size that will be encroached by BIST training.*/
> +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE   0x200
>
>   enum psp_memory_training_init_flag {
>PSP_MEM_TRAIN_NOT_SUPPORT   = 0x0,
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> index 685dd9754c67..51011b661ba8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> @@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct 
> psp_context *psp)
>   static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
>   {
>int ret;
> + void *buf;
> + uint32_t sz;
>uint32_t p2c_header[4];
> + struct amdgpu_device *adev = psp->adev;
>struct psp_memory_training_context *ctx = >mem_train_ctx;
>uint32_t *pcache = (uint32_t*)ctx->sys_cache;

In general it is preferred to order the lines in reverse xmas tree.

E.g. long lines with pre-initializes variables such as adev, ctx, pcache
first. And temporary stuff like i, ret, buf etc last.

Apart from that this looks good to me,
Christian.

>
> @@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context 
> *psp, uint32_t ops)
>return 0;
>}
>
> - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, 
> p2c_header, sizeof(p2c_header), false);
> + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, 
> sizeof(p2c_header), false);
>DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] 
> p2c_header[%08x,%08x,%08x,%08x]\n",
>  pcache[0], pcache[1], pcache[2], pcache[3],
>  p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
> @@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct 
> psp_context *psp, uint32_t ops)
>DRM_DEBUG("Memory training ops:%x.\n", ops);
>
>if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
> + /*
> +  * Long traing will encroach certain mount of bottom VRAM,
> +  * saving the content of this bottom VRAM to system memory
> +  * before training, and restoring it after training to avoid
> +  * VRAM corruption.
> +  */
> + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
> +
> + if (adev->gmc.visible_vram_size < sz || 
> !adev->mman.aper_base_kaddr) {
> + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p 
> is not initialized.\n",
> +   adev->gmc.visible_vram_size,
> +   adev->mman.aper_base_kaddr);
> + return -EINVAL;
> + }
> +
> + buf = vmalloc(sz);
> + if (!buf) {
> + DRM_ERROR("failed to allocate system memory.\n");
> + return -ENOMEM;
> 

Re: [PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training

2020-01-20 Thread Christian König

Am 20.01.20 um 12:08 schrieb Tianci Yin:

From: "Tianci.Yin" 

[why]
In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by
UMC, that causes problems(like GTT corrupted and page fault observed).

[how]
Saving the content of this bottom VRAM to system memory before training, and
restoring it after training to avoid VRAM corruption.


You need to re-order the patches, this one should come first and the 
other one last.


One more style nit pick below.



Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53
Signed-off-by: Tianci.Yin 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 32 -
  2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 3265487b859f..611021514c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -172,6 +172,8 @@ struct psp_dtm_context {
  #define MEM_TRAIN_SYSTEM_SIGNATURE0x54534942
  #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
  #define GDDR6_MEM_TRAINING_OFFSET 0x8000
+/*Define the VRAM size that will be encroached by BIST training.*/
+#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x200
  
  enum psp_memory_training_init_flag {

PSP_MEM_TRAIN_NOT_SUPPORT   = 0x0,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 685dd9754c67..51011b661ba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct 
psp_context *psp)
  static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
  {
int ret;
+   void *buf;
+   uint32_t sz;
uint32_t p2c_header[4];
+   struct amdgpu_device *adev = psp->adev;
struct psp_memory_training_context *ctx = >mem_train_ctx;
uint32_t *pcache = (uint32_t*)ctx->sys_cache;


In general it is preferred to order the lines in reverse xmas tree.

E.g. long lines with pre-initializes variables such as adev, ctx, pcache 
first. And temporary stuff like i, ret, buf etc last.


Apart from that this looks good to me,
Christian.

  
@@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)

return 0;
}
  
-	amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);

+   amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, 
sizeof(p2c_header), false);
DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] 
p2c_header[%08x,%08x,%08x,%08x]\n",
  pcache[0], pcache[1], pcache[2], pcache[3],
  p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
@@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)
DRM_DEBUG("Memory training ops:%x.\n", ops);
  
  	if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {

+   /*
+* Long traing will encroach certain mount of bottom VRAM,
+* saving the content of this bottom VRAM to system memory
+* before training, and restoring it after training to avoid
+* VRAM corruption.
+*/
+   sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+   if (adev->gmc.visible_vram_size < sz || 
!adev->mman.aper_base_kaddr) {
+   DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is 
not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+   return -EINVAL;
+   }
+
+   buf = vmalloc(sz);
+   if (!buf) {
+   DRM_ERROR("failed to allocate system memory.\n");
+   return -ENOMEM;
+   }
+
+   memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
ret = psp_v11_0_memory_training_send_msg(psp, 
PSP_BL__DRAM_LONG_TRAIN);
if (ret) {
DRM_ERROR("Send long training msg failed.\n");
+   vfree(buf);
return ret;
}
+
+   memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+   adev->nbio.funcs->hdp_flush(adev, NULL);
+   vfree(buf);
}
  
  	if (ops & PSP_MEM_TRAIN_SAVE) {


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training

2020-01-20 Thread Tianci Yin
From: "Tianci.Yin" 

[why]
In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by
UMC, that causes problems(like GTT corrupted and page fault observed).

[how]
Saving the content of this bottom VRAM to system memory before training, and
restoring it after training to avoid VRAM corruption.

Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53
Signed-off-by: Tianci.Yin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 32 -
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 3265487b859f..611021514c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -172,6 +172,8 @@ struct psp_dtm_context {
 #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES  0x1000
 #define GDDR6_MEM_TRAINING_OFFSET  0x8000
+/*Define the VRAM size that will be encroached by BIST training.*/
+#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x200
 
 enum psp_memory_training_init_flag {
PSP_MEM_TRAIN_NOT_SUPPORT   = 0x0,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 685dd9754c67..51011b661ba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct 
psp_context *psp)
 static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
 {
int ret;
+   void *buf;
+   uint32_t sz;
uint32_t p2c_header[4];
+   struct amdgpu_device *adev = psp->adev;
struct psp_memory_training_context *ctx = >mem_train_ctx;
uint32_t *pcache = (uint32_t*)ctx->sys_cache;
 
@@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)
return 0;
}
 
-   amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, 
p2c_header, sizeof(p2c_header), false);
+   amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, 
sizeof(p2c_header), false);
DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] 
p2c_header[%08x,%08x,%08x,%08x]\n",
  pcache[0], pcache[1], pcache[2], pcache[3],
  p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
@@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)
DRM_DEBUG("Memory training ops:%x.\n", ops);
 
if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+   /*
+* Long traing will encroach certain mount of bottom VRAM,
+* saving the content of this bottom VRAM to system memory
+* before training, and restoring it after training to avoid
+* VRAM corruption.
+*/
+   sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+   if (adev->gmc.visible_vram_size < sz || 
!adev->mman.aper_base_kaddr) {
+   DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p 
is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+   return -EINVAL;
+   }
+
+   buf = vmalloc(sz);
+   if (!buf) {
+   DRM_ERROR("failed to allocate system memory.\n");
+   return -ENOMEM;
+   }
+
+   memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
ret = psp_v11_0_memory_training_send_msg(psp, 
PSP_BL__DRAM_LONG_TRAIN);
if (ret) {
DRM_ERROR("Send long training msg failed.\n");
+   vfree(buf);
return ret;
}
+
+   memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+   adev->nbio.funcs->hdp_flush(adev, NULL);
+   vfree(buf);
}
 
if (ops & PSP_MEM_TRAIN_SAVE) {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx