Re: [PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training
[AMD Official Use Only - Internal Distribution Only] Thanks very much Christian! Please review again. Rico From: Christian König Sent: Monday, January 20, 2020 19:58 To: Yin, Tianci (Rico) ; amd-gfx@lists.freedesktop.org Cc: Long, Gang ; Xu, Feifei ; Wang, Kevin(Yang) ; Tuikov, Luben ; Deucher, Alexander ; Zhang, Hawking ; Koenig, Christian ; Yuan, Xiaojie Subject: Re: [PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training Am 20.01.20 um 12:08 schrieb Tianci Yin: > From: "Tianci.Yin" > > [why] > In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by > UMC, that causes problems(like GTT corrupted and page fault observed). > > [how] > Saving the content of this bottom VRAM to system memory before training, and > restoring it after training to avoid VRAM corruption. You need to re-order the patches, this one should come first and the other one last. One more style nit pick below. > > Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53 > Signed-off-by: Tianci.Yin > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 ++ > drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 32 - > 2 files changed, 33 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > index 3265487b859f..611021514c52 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h > @@ -172,6 +172,8 @@ struct psp_dtm_context { > #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 > #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 > #define GDDR6_MEM_TRAINING_OFFSET 0x8000 > +/*Define the VRAM size that will be encroached by BIST training.*/ > +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x200 > > enum psp_memory_training_init_flag { >PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, > diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > index 685dd9754c67..51011b661ba8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > @@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct > psp_context *psp) > static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) > { >int ret; > + void *buf; > + uint32_t sz; >uint32_t p2c_header[4]; > + struct amdgpu_device *adev = psp->adev; >struct psp_memory_training_context *ctx = >mem_train_ctx; >uint32_t *pcache = (uint32_t*)ctx->sys_cache; In general it is preferred to order the lines in reverse xmas tree. E.g. long lines with pre-initializes variables such as adev, ctx, pcache first. And temporary stuff like i, ret, buf etc last. Apart from that this looks good to me, Christian. > > @@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context > *psp, uint32_t ops) >return 0; >} > > - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, > p2c_header, sizeof(p2c_header), false); > + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, > sizeof(p2c_header), false); >DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] > p2c_header[%08x,%08x,%08x,%08x]\n", > pcache[0], pcache[1], pcache[2], pcache[3], > p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); > @@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct > psp_context *psp, uint32_t ops) >DRM_DEBUG("Memory training ops:%x.\n", ops); > >if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { > + /* > + * Long traing will encroach certain mount of bottom VRAM, > + * saving the content of this bottom VRAM to system memory > + * before training, and restoring it after training to avoid > + * VRAM corruption. > + */ > + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; > + > + if (adev->gmc.visible_vram_size < sz || > !adev->mman.aper_base_kaddr) { > + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p > is not initialized.\n", > + adev->gmc.visible_vram_size, > + adev->mman.aper_base_kaddr); > + return -EINVAL; > + } > + > + buf = vmalloc(sz); > + if (!buf) { > + DRM_ERROR("failed to allocate system memory.\n"); > + return -ENOMEM; >
Re: [PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training
Am 20.01.20 um 12:08 schrieb Tianci Yin: From: "Tianci.Yin" [why] In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by UMC, that causes problems(like GTT corrupted and page fault observed). [how] Saving the content of this bottom VRAM to system memory before training, and restoring it after training to avoid VRAM corruption. You need to re-order the patches, this one should come first and the other one last. One more style nit pick below. Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53 Signed-off-by: Tianci.Yin --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 ++ drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 32 - 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3265487b859f..611021514c52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -172,6 +172,8 @@ struct psp_dtm_context { #define MEM_TRAIN_SYSTEM_SIGNATURE0x54534942 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 #define GDDR6_MEM_TRAINING_OFFSET 0x8000 +/*Define the VRAM size that will be encroached by BIST training.*/ +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x200 enum psp_memory_training_init_flag { PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 685dd9754c67..51011b661ba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct psp_context *psp) static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) { int ret; + void *buf; + uint32_t sz; uint32_t p2c_header[4]; + struct amdgpu_device *adev = psp->adev; struct psp_memory_training_context *ctx = >mem_train_ctx; uint32_t *pcache = (uint32_t*)ctx->sys_cache; In general it is preferred to order the lines in reverse xmas tree. E.g. long lines with pre-initializes variables such as adev, ctx, pcache first. And temporary stuff like i, ret, buf etc last. Apart from that this looks good to me, Christian. @@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) return 0; } - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", pcache[0], pcache[1], pcache[2], pcache[3], p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); @@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) DRM_DEBUG("Memory training ops:%x.\n", ops); if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* +* Long traing will encroach certain mount of bottom VRAM, +* saving the content of this bottom VRAM to system memory +* before training, and restoring it after training to avoid +* VRAM corruption. +*/ + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + DRM_ERROR("failed to allocate system memory.\n"); + return -ENOMEM; + } + + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); if (ret) { DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); return ret; } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->nbio.funcs->hdp_flush(adev, NULL); + vfree(buf); } if (ops & PSP_MEM_TRAIN_SAVE) { ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: fix VRAM partially encroached issue in GDDR6 memory training
From: "Tianci.Yin" [why] In GDDR6 BIST training, a certain mount of bottom VRAM will be encroached by UMC, that causes problems(like GTT corrupted and page fault observed). [how] Saving the content of this bottom VRAM to system memory before training, and restoring it after training to avoid VRAM corruption. Change-Id: I04a8a6e8e63b3619f7c693fe67883b229cbf3c53 Signed-off-by: Tianci.Yin --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 ++ drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 32 - 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3265487b859f..611021514c52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -172,6 +172,8 @@ struct psp_dtm_context { #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 #define GDDR6_MEM_TRAINING_OFFSET 0x8000 +/*Define the VRAM size that will be encroached by BIST training.*/ +#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x200 enum psp_memory_training_init_flag { PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 685dd9754c67..51011b661ba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -972,7 +972,10 @@ static int psp_v11_0_memory_training_init(struct psp_context *psp) static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) { int ret; + void *buf; + uint32_t sz; uint32_t p2c_header[4]; + struct amdgpu_device *adev = psp->adev; struct psp_memory_training_context *ctx = >mem_train_ctx; uint32_t *pcache = (uint32_t*)ctx->sys_cache; @@ -989,7 +992,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) return 0; } - amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", pcache[0], pcache[1], pcache[2], pcache[3], p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); @@ -1026,11 +1029,38 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) DRM_DEBUG("Memory training ops:%x.\n", ops); if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* +* Long traing will encroach certain mount of bottom VRAM, +* saving the content of this bottom VRAM to system memory +* before training, and restoring it after training to avoid +* VRAM corruption. +*/ + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + DRM_ERROR("failed to allocate system memory.\n"); + return -ENOMEM; + } + + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); if (ret) { DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); return ret; } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->nbio.funcs->hdp_flush(adev, NULL); + vfree(buf); } if (ops & PSP_MEM_TRAIN_SAVE) { -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx