Re: [PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend
On Wed, Feb 7, 2024 at 5:36 PM Mario Limonciello wrote: > > On 2/7/2024 16:34, Alex Deucher wrote: > > On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello > > wrote: > >> > >> commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() > >> callback") > >> intentionally moved the eviction of resources to earlier in the suspend > >> process, but this introduced a subtle change that it occurs before > >> adev->in_s0ix > >> or adev->in_s3 are set. This meant that APUs actually started to evict > >> resources at suspend time as well. > >> > >> Add a new `in_prepare` flag that is set for the life of the prepare() > >> callback > >> to return the old code flow. Drop the existing call to return 1 in this > >> case because > >> the suspend() callback looks for the flags too. > >> > >> Also, introduce a new amdgpu_device_freeze() function to call at S4 and > >> evict > >> resources in this callback so that APUs will still get resources evicted. > >> > >> Reported-by: Jürg Billeter > >> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038 > >> Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() > >> callback") > >> Signed-off-by: Mario Limonciello > >> --- > >> v1->v2: > >> * Add and use new in_prepare member > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 +- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 -- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++ > >> 3 files changed, 48 insertions(+), 23 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >> index 5d5be3e20687..f9db09a9017a 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > >> @@ -1075,7 +1075,8 @@ struct amdgpu_device { > >> u8 > >> reset_magic[AMDGPU_RESET_MAGIC_NUM]; > >> > >> /* s3/s4 mask */ > >> - boolin_suspend; > >> + boolin_prepare; > >> + boolin_suspend; > >> boolin_s3; > >> boolin_s4; > >> boolin_s0ix; > >> @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device > >> *adev); > >> int amdgpu_device_prepare(struct drm_device *dev); > >> int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); > >> int amdgpu_device_resume(struct drm_device *dev, bool fbcon); > >> +int amdgpu_device_freeze(struct drm_device *drm_dev); > >> u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); > >> int amdgpu_enable_vblank_kms(struct drm_crtc *crtc); > >> void amdgpu_disable_vblank_kms(struct drm_crtc *crtc); > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > >> index 2bc460cb993d..0a337fcd89b4 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > >> @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct > >> amdgpu_device *adev) > >> int ret; > >> > >> /* No need to evict vram on APUs for suspend to ram or s2idle */ > >> - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) > >> + if ((adev->in_prepare) && (adev->flags & AMD_IS_APU)) > > > > Could probably simplify this to: > > if ((!adev->in_s4) && (adev->flags & AMD_IS_APU)) > > > > Then you could drop the in_prepare variable. > > > >> return 0; > >> > >> ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); > >> @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev) > >> if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) > >> return 0; > >> > >> + adev->in_prepare = true; > >> + > >> /* Evict the majority of BOs before starting suspend sequence */ > >> r = amdgpu_device_evict_resources(adev); > >> if (r) > >> - return r; > >> + goto unprepare; > >> > >> for (i = 0; i < adev->num_ip_blocks; i++) { > >> if (!adev->ip_blocks[i].status.valid) > >> @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev) > >> continue; > >> r = > >> adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); > >> if (r) > >> - return r; > >> + goto unprepare; > >> } > >> > >> - return 0; > >> +unprepare: > >> + adev->in_prepare = FALSE; > >> + > >> + return r; > >> +} > >> + > >> +/** > >> + * amdgpu_device_freeze - run S4 sequence > >> + * > >> + * @dev: drm dev pointer > >> + * > >> + * Prepare to put the hw in the S4 state (all asics). > >> + * Returns 0 for success or an error on failure. > >> + * Called at d
Re: [PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend
On 2/7/2024 16:34, Alex Deucher wrote: On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello wrote: commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") intentionally moved the eviction of resources to earlier in the suspend process, but this introduced a subtle change that it occurs before adev->in_s0ix or adev->in_s3 are set. This meant that APUs actually started to evict resources at suspend time as well. Add a new `in_prepare` flag that is set for the life of the prepare() callback to return the old code flow. Drop the existing call to return 1 in this case because the suspend() callback looks for the flags too. Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict resources in this callback so that APUs will still get resources evicted. Reported-by: Jürg Billeter Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038 Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Signed-off-by: Mario Limonciello --- v1->v2: * Add and use new in_prepare member --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++ 3 files changed, 48 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5d5be3e20687..f9db09a9017a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1075,7 +1075,8 @@ struct amdgpu_device { u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; /* s3/s4 mask */ - boolin_suspend; + boolin_prepare; + boolin_suspend; boolin_s3; boolin_s4; boolin_s0ix; @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); +int amdgpu_device_freeze(struct drm_device *drm_dev); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); int amdgpu_enable_vblank_kms(struct drm_crtc *crtc); void amdgpu_disable_vblank_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2bc460cb993d..0a337fcd89b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) int ret; /* No need to evict vram on APUs for suspend to ram or s2idle */ - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) + if ((adev->in_prepare) && (adev->flags & AMD_IS_APU)) Could probably simplify this to: if ((!adev->in_s4) && (adev->flags & AMD_IS_APU)) Then you could drop the in_prepare variable. return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + adev->in_prepare = true; + /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - return r; + goto unprepare; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); if (r) - return r; + goto unprepare; } - return 0; +unprepare: + adev->in_prepare = FALSE; + + return r; +} + +/** + * amdgpu_device_freeze - run S4 sequence + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the S4 state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver freeze. + */ +int amdgpu_device_freeze(struct drm_device *drm_dev) +{ + struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; + + adev->in_s4 = true; + + r = amdgpu_device_evict_resources(adev); Won't this be too late to allocate memory? Doesn't this need to happen in prepare() even for S4? Hmm; possibly. I'll swap it back with your other suggestion. Thanks Alex + if (r) + goto cleanup; + + r = amdgpu_device_suspend(drm_dev, true); + if (r) + goto cleanup; + + if (amdgpu_acpi_should_gpu_reset(adev)) + r = amdgpu_asi
Re: [PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend
On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello wrote: > > commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() > callback") > intentionally moved the eviction of resources to earlier in the suspend > process, but this introduced a subtle change that it occurs before > adev->in_s0ix > or adev->in_s3 are set. This meant that APUs actually started to evict > resources at suspend time as well. > > Add a new `in_prepare` flag that is set for the life of the prepare() callback > to return the old code flow. Drop the existing call to return 1 in this case > because > the suspend() callback looks for the flags too. > > Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict > resources in this callback so that APUs will still get resources evicted. > > Reported-by: Jürg Billeter > Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038 > Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() > callback") > Signed-off-by: Mario Limonciello > --- > v1->v2: > * Add and use new in_prepare member > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++ > 3 files changed, 48 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 5d5be3e20687..f9db09a9017a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1075,7 +1075,8 @@ struct amdgpu_device { > u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; > > /* s3/s4 mask */ > - boolin_suspend; > + boolin_prepare; > + boolin_suspend; > boolin_s3; > boolin_s4; > boolin_s0ix; > @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device > *adev); > int amdgpu_device_prepare(struct drm_device *dev); > int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); > int amdgpu_device_resume(struct drm_device *dev, bool fbcon); > +int amdgpu_device_freeze(struct drm_device *drm_dev); > u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); > int amdgpu_enable_vblank_kms(struct drm_crtc *crtc); > void amdgpu_disable_vblank_kms(struct drm_crtc *crtc); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 2bc460cb993d..0a337fcd89b4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct > amdgpu_device *adev) > int ret; > > /* No need to evict vram on APUs for suspend to ram or s2idle */ > - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) > + if ((adev->in_prepare) && (adev->flags & AMD_IS_APU)) Could probably simplify this to: if ((!adev->in_s4) && (adev->flags & AMD_IS_APU)) Then you could drop the in_prepare variable. > return 0; > > ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); > @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev) > if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) > return 0; > > + adev->in_prepare = true; > + > /* Evict the majority of BOs before starting suspend sequence */ > r = amdgpu_device_evict_resources(adev); > if (r) > - return r; > + goto unprepare; > > for (i = 0; i < adev->num_ip_blocks; i++) { > if (!adev->ip_blocks[i].status.valid) > @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev) > continue; > r = adev->ip_blocks[i].version->funcs->prepare_suspend((void > *)adev); > if (r) > - return r; > + goto unprepare; > } > > - return 0; > +unprepare: > + adev->in_prepare = FALSE; > + > + return r; > +} > + > +/** > + * amdgpu_device_freeze - run S4 sequence > + * > + * @dev: drm dev pointer > + * > + * Prepare to put the hw in the S4 state (all asics). > + * Returns 0 for success or an error on failure. > + * Called at driver freeze. > + */ > +int amdgpu_device_freeze(struct drm_device *drm_dev) > +{ > + struct amdgpu_device *adev = drm_to_adev(drm_dev); > + int r; > + > + adev->in_s4 = true; > + > + r = amdgpu_device_evict_resources(adev); Won't this be too late to allocate memory? Doesn't this need to happen in prepare() even for S4? Alex > + if (r) > + goto cleanup; > + > + r = amdgpu_device_suspend(drm_dev, true); > + if (r) > + goto cl
[PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend
commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") intentionally moved the eviction of resources to earlier in the suspend process, but this introduced a subtle change that it occurs before adev->in_s0ix or adev->in_s3 are set. This meant that APUs actually started to evict resources at suspend time as well. Add a new `in_prepare` flag that is set for the life of the prepare() callback to return the old code flow. Drop the existing call to return 1 in this case because the suspend() callback looks for the flags too. Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict resources in this callback so that APUs will still get resources evicted. Reported-by: Jürg Billeter Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038 Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Signed-off-by: Mario Limonciello --- v1->v2: * Add and use new in_prepare member --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++ 3 files changed, 48 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5d5be3e20687..f9db09a9017a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1075,7 +1075,8 @@ struct amdgpu_device { u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; /* s3/s4 mask */ - boolin_suspend; + boolin_prepare; + boolin_suspend; boolin_s3; boolin_s4; boolin_s0ix; @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); +int amdgpu_device_freeze(struct drm_device *drm_dev); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); int amdgpu_enable_vblank_kms(struct drm_crtc *crtc); void amdgpu_disable_vblank_kms(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2bc460cb993d..0a337fcd89b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) int ret; /* No need to evict vram on APUs for suspend to ram or s2idle */ - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) + if ((adev->in_prepare) && (adev->flags & AMD_IS_APU)) return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + adev->in_prepare = true; + /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - return r; + goto unprepare; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); if (r) - return r; + goto unprepare; } - return 0; +unprepare: + adev->in_prepare = FALSE; + + return r; +} + +/** + * amdgpu_device_freeze - run S4 sequence + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the S4 state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver freeze. + */ +int amdgpu_device_freeze(struct drm_device *drm_dev) +{ + struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; + + adev->in_s4 = true; + + r = amdgpu_device_evict_resources(adev); + if (r) + goto cleanup; + + r = amdgpu_device_suspend(drm_dev, true); + if (r) + goto cleanup; + + if (amdgpu_acpi_should_gpu_reset(adev)) + r = amdgpu_asic_reset(adev); + +cleanup: + adev->in_s4 = false; + + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b74f68a15802..fc9caa14c9d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2456,6 +2456,7 @@ static int amdgpu_pmops_prepare(struct device *dev) { struct drm_device *drm_dev = dev_get_drv