Re: [PATCH] drm/amdgpu: introduce vram lost for reset

2019-08-28 Thread Christian König

Am 27.08.19 um 10:34 schrieb Monk Liu:

for SOC15/vega10 the BACO reset & mode1 would introduce vram lost
in high end address range, current kmd's vram lost checking cannot
catch it since it only check very ahead visible frame buffer


Looks mostly good to me.

Only question I have is why to use a macro? Some inline function should 
do as well.


Christian.



Signed-off-by: Monk Liu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
  drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++
  3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f6ae565..11e0fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1155,6 +1155,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
  #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) 
((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
  #define amdgpu_asic_need_reset_on_init(adev) 
(adev)->asic_funcs->need_reset_on_init((adev))
  #define amdgpu_asic_get_pcie_replay_count(adev) 
((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
  
  /* Common functions */

  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 02b3e7d..31690e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3482,7 +3482,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-   atomic_inc(>vram_lost_counter);
+   amdgpu_inc_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
}
  
@@ -3648,7 +3648,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,

vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU 
reset!\n");
-   
atomic_inc(_adev->vram_lost_counter);
+   amdgpu_inc_vram_lost(tmp_adev);
}
  
  r = amdgpu_gtt_mgr_recover(

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index fe2212df..8af7501 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -557,10 +557,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
  {
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_BACO:
+   amdgpu_inc_vram_lost(adev);
return soc15_asic_baco_reset(adev);
case AMD_RESET_METHOD_MODE2:
return soc15_mode2_reset(adev);
default:
+   amdgpu_inc_vram_lost(adev);
return soc15_asic_mode1_reset(adev);
}
  }


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH] drm/amdgpu: introduce vram lost for reset

2019-08-27 Thread Liu, Monk
sure

_
Monk Liu|GPU Virtualization Team |AMD
[sig-cloud-gpu]

From: Deucher, Alexander 
Sent: Tuesday, August 27, 2019 8:48 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: introduce vram lost for reset

Can you update nv.c as well?  With that fixed,
Reviewed-by: Alex Deucher 
mailto:alexander.deuc...@amd.com>>


From: amd-gfx 
mailto:amd-gfx-boun...@lists.freedesktop.org>>
 on behalf of Monk Liu mailto:monk@amd.com>>
Sent: Tuesday, August 27, 2019 4:34 AM
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> 
mailto:amd-gfx@lists.freedesktop.org>>
Cc: Liu, Monk mailto:monk@amd.com>>
Subject: [PATCH] drm/amdgpu: introduce vram lost for reset

for SOC15/vega10 the BACO reset & mode1 would introduce vram lost
in high end address range, current kmd's vram lost checking cannot
catch it since it only check very ahead visible frame buffer

Signed-off-by: Monk Liu mailto:monk@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f6ae565..11e0fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1155,6 +1155,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) 
((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
 #define amdgpu_asic_need_reset_on_init(adev) 
(adev)->asic_funcs->need_reset_on_init((adev))
 #define amdgpu_asic_get_pcie_replay_count(adev) 
((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));

 /* Common functions */
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 02b3e7d..31690e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3482,7 +3482,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
 amdgpu_virt_init_data_exchange(adev);
 amdgpu_virt_release_full_gpu(adev, true);
 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-   atomic_inc(>vram_lost_counter);
+   amdgpu_inc_vram_lost(adev);
 r = amdgpu_device_recover_vram(adev);
 }

@@ -3648,7 +3648,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
 vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
 if (vram_lost) {
 DRM_INFO("VRAM is lost due to GPU 
reset!\n");
-   
atomic_inc(_adev->vram_lost_counter);
+   amdgpu_inc_vram_lost(tmp_adev);
 }

 r = amdgpu_gtt_mgr_recover(
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index fe2212df..8af7501 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -557,10 +557,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 {
 switch (soc15_asic_reset_method(adev)) {
 case AMD_RESET_METHOD_BACO:
+   amdgpu_inc_vram_lost(adev);
 return soc15_asic_baco_reset(adev);
 case AMD_RESET_METHOD_MODE2:
 return soc15_mode2_reset(adev);
 default:
+   amdgpu_inc_vram_lost(adev);
 return soc15_asic_mode1_reset(adev);
 }
 }
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: introduce vram lost for reset

2019-08-27 Thread Deucher, Alexander
Can you update nv.c as well?  With that fixed,
Reviewed-by: Alex Deucher 


From: amd-gfx  on behalf of Monk Liu 

Sent: Tuesday, August 27, 2019 4:34 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Liu, Monk 
Subject: [PATCH] drm/amdgpu: introduce vram lost for reset

for SOC15/vega10 the BACO reset & mode1 would introduce vram lost
in high end address range, current kmd's vram lost checking cannot
catch it since it only check very ahead visible frame buffer

Signed-off-by: Monk Liu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f6ae565..11e0fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1155,6 +1155,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) 
((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
 #define amdgpu_asic_need_reset_on_init(adev) 
(adev)->asic_funcs->need_reset_on_init((adev))
 #define amdgpu_asic_get_pcie_replay_count(adev) 
((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));

 /* Common functions */
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 02b3e7d..31690e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3482,7 +3482,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
 amdgpu_virt_init_data_exchange(adev);
 amdgpu_virt_release_full_gpu(adev, true);
 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
-   atomic_inc(>vram_lost_counter);
+   amdgpu_inc_vram_lost(adev);
 r = amdgpu_device_recover_vram(adev);
 }

@@ -3648,7 +3648,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
 vram_lost = 
amdgpu_device_check_vram_lost(tmp_adev);
 if (vram_lost) {
 DRM_INFO("VRAM is lost due to GPU 
reset!\n");
-   
atomic_inc(_adev->vram_lost_counter);
+   amdgpu_inc_vram_lost(tmp_adev);
 }

 r = amdgpu_gtt_mgr_recover(
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
b/drivers/gpu/drm/amd/amdgpu/soc15.c
index fe2212df..8af7501 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -557,10 +557,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 {
 switch (soc15_asic_reset_method(adev)) {
 case AMD_RESET_METHOD_BACO:
+   amdgpu_inc_vram_lost(adev);
 return soc15_asic_baco_reset(adev);
 case AMD_RESET_METHOD_MODE2:
 return soc15_mode2_reset(adev);
 default:
+   amdgpu_inc_vram_lost(adev);
 return soc15_asic_mode1_reset(adev);
 }
 }
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx