Re: [PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend

2024-02-07 Thread Alex Deucher
On Wed, Feb 7, 2024 at 5:36 PM Mario Limonciello
 wrote:
>
> On 2/7/2024 16:34, Alex Deucher wrote:
> > On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello
> >  wrote:
> >>
> >> commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
> >> callback")
> >> intentionally moved the eviction of resources to earlier in the suspend
> >> process, but this introduced a subtle change that it occurs before 
> >> adev->in_s0ix
> >> or adev->in_s3 are set. This meant that APUs actually started to evict
> >> resources at suspend time as well.
> >>
> >> Add a new `in_prepare` flag that is set for the life of the prepare() 
> >> callback
> >> to return the old code flow. Drop the existing call to return 1 in this 
> >> case because
> >> the suspend() callback looks for the flags too.
> >>
> >> Also, introduce a new amdgpu_device_freeze() function to call at S4 and 
> >> evict
> >> resources in this callback so that APUs will still get resources evicted.
> >>
> >> Reported-by: Jürg Billeter 
> >> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038
> >> Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
> >> callback")
> >> Signed-off-by: Mario Limonciello 
> >> ---
> >> v1->v2:
> >>   * Add and use new in_prepare member
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 --
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++
> >>   3 files changed, 48 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index 5d5be3e20687..f9db09a9017a 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -1075,7 +1075,8 @@ struct amdgpu_device {
> >>  u8  
> >> reset_magic[AMDGPU_RESET_MAGIC_NUM];
> >>
> >>  /* s3/s4 mask */
> >> -   boolin_suspend;
> >> +   boolin_prepare;
> >> +   boolin_suspend;
> >>  boolin_s3;
> >>  boolin_s4;
> >>  boolin_s0ix;
> >> @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device 
> >> *adev);
> >>   int amdgpu_device_prepare(struct drm_device *dev);
> >>   int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
> >>   int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
> >> +int amdgpu_device_freeze(struct drm_device *drm_dev);
> >>   u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
> >>   int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
> >>   void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index 2bc460cb993d..0a337fcd89b4 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct 
> >> amdgpu_device *adev)
> >>  int ret;
> >>
> >>  /* No need to evict vram on APUs for suspend to ram or s2idle */
> >> -   if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
> >> +   if ((adev->in_prepare) && (adev->flags & AMD_IS_APU))
> >
> > Could probably simplify this to:
> > if ((!adev->in_s4) && (adev->flags & AMD_IS_APU))
> >
> > Then you could drop the in_prepare variable.
> >
> >>  return 0;
> >>
> >>  ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
> >> @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev)
> >>  if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> >>  return 0;
> >>
> >> +   adev->in_prepare = true;
> >> +
> >>  /* Evict the majority of BOs before starting suspend sequence */
> >>  r = amdgpu_device_evict_resources(adev);
> >>  if (r)
> >> -   return r;
> >> +   goto unprepare;
> >>
> >>  for (i = 0; i < adev->num_ip_blocks; i++) {
> >>  if (!adev->ip_blocks[i].status.valid)
> >> @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev)
> >>  continue;
> >>  r = 
> >> adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
> >>  if (r)
> >> -   return r;
> >> +   goto unprepare;
> >>  }
> >>
> >> -   return 0;
> >> +unprepare:
> >> +   adev->in_prepare = FALSE;
> >> +
> >> +   return r;
> >> +}
> >> +
> >> +/**
> >> + * amdgpu_device_freeze - run S4 sequence
> >> + *
> >> + * @dev: drm dev pointer
> >> + *
> >> + * Prepare to put the hw in the S4 state (all asics).
> >> + * Returns 0 for success or an error on failure.
> >> + * Called at d

Re: [PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend

2024-02-07 Thread Mario Limonciello

On 2/7/2024 16:34, Alex Deucher wrote:

On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello
 wrote:


commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
callback")
intentionally moved the eviction of resources to earlier in the suspend
process, but this introduced a subtle change that it occurs before adev->in_s0ix
or adev->in_s3 are set. This meant that APUs actually started to evict
resources at suspend time as well.

Add a new `in_prepare` flag that is set for the life of the prepare() callback
to return the old code flow. Drop the existing call to return 1 in this case 
because
the suspend() callback looks for the flags too.

Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict
resources in this callback so that APUs will still get resources evicted.

Reported-by: Jürg Billeter 
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038
Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
callback")
Signed-off-by: Mario Limonciello 
---
v1->v2:
  * Add and use new in_prepare member
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++
  3 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5d5be3e20687..f9db09a9017a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1075,7 +1075,8 @@ struct amdgpu_device {
 u8  reset_magic[AMDGPU_RESET_MAGIC_NUM];

 /* s3/s4 mask */
-   boolin_suspend;
+   boolin_prepare;
+   boolin_suspend;
 boolin_s3;
 boolin_s4;
 boolin_s0ix;
@@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
  int amdgpu_device_prepare(struct drm_device *dev);
  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
  int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
+int amdgpu_device_freeze(struct drm_device *drm_dev);
  u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
  int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
  void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2bc460cb993d..0a337fcd89b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct 
amdgpu_device *adev)
 int ret;

 /* No need to evict vram on APUs for suspend to ram or s2idle */
-   if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
+   if ((adev->in_prepare) && (adev->flags & AMD_IS_APU))


Could probably simplify this to:
if ((!adev->in_s4) && (adev->flags & AMD_IS_APU))

Then you could drop the in_prepare variable.


 return 0;

 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
@@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev)
 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 return 0;

+   adev->in_prepare = true;
+
 /* Evict the majority of BOs before starting suspend sequence */
 r = amdgpu_device_evict_resources(adev);
 if (r)
-   return r;
+   goto unprepare;

 for (i = 0; i < adev->num_ip_blocks; i++) {
 if (!adev->ip_blocks[i].status.valid)
@@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev)
 continue;
 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void 
*)adev);
 if (r)
-   return r;
+   goto unprepare;
 }

-   return 0;
+unprepare:
+   adev->in_prepare = FALSE;
+
+   return r;
+}
+
+/**
+ * amdgpu_device_freeze - run S4 sequence
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the S4 state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver freeze.
+ */
+int amdgpu_device_freeze(struct drm_device *drm_dev)
+{
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;
+
+   adev->in_s4 = true;
+
+   r = amdgpu_device_evict_resources(adev);


Won't this be too late to allocate memory?  Doesn't this need to
happen in prepare() even for S4?


Hmm; possibly.  I'll swap it back with your other suggestion.

Thanks


Alex


+   if (r)
+   goto cleanup;
+
+   r = amdgpu_device_suspend(drm_dev, true);
+   if (r)
+   goto cleanup;
+
+   if (amdgpu_acpi_should_gpu_reset(adev))
+   r = amdgpu_asi

Re: [PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend

2024-02-07 Thread Alex Deucher
On Wed, Feb 7, 2024 at 3:48 PM Mario Limonciello
 wrote:
>
> commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
> callback")
> intentionally moved the eviction of resources to earlier in the suspend
> process, but this introduced a subtle change that it occurs before 
> adev->in_s0ix
> or adev->in_s3 are set. This meant that APUs actually started to evict
> resources at suspend time as well.
>
> Add a new `in_prepare` flag that is set for the life of the prepare() callback
> to return the old code flow. Drop the existing call to return 1 in this case 
> because
> the suspend() callback looks for the flags too.
>
> Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict
> resources in this callback so that APUs will still get resources evicted.
>
> Reported-by: Jürg Billeter 
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038
> Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
> callback")
> Signed-off-by: Mario Limonciello 
> ---
> v1->v2:
>  * Add and use new in_prepare member
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++
>  3 files changed, 48 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 5d5be3e20687..f9db09a9017a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1075,7 +1075,8 @@ struct amdgpu_device {
> u8  reset_magic[AMDGPU_RESET_MAGIC_NUM];
>
> /* s3/s4 mask */
> -   boolin_suspend;
> +   boolin_prepare;
> +   boolin_suspend;
> boolin_s3;
> boolin_s4;
> boolin_s0ix;
> @@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device 
> *adev);
>  int amdgpu_device_prepare(struct drm_device *dev);
>  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
>  int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
> +int amdgpu_device_freeze(struct drm_device *drm_dev);
>  u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
>  int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
>  void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2bc460cb993d..0a337fcd89b4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct 
> amdgpu_device *adev)
> int ret;
>
> /* No need to evict vram on APUs for suspend to ram or s2idle */
> -   if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
> +   if ((adev->in_prepare) && (adev->flags & AMD_IS_APU))

Could probably simplify this to:
if ((!adev->in_s4) && (adev->flags & AMD_IS_APU))

Then you could drop the in_prepare variable.

> return 0;
>
> ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
> @@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev)
> if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> return 0;
>
> +   adev->in_prepare = true;
> +
> /* Evict the majority of BOs before starting suspend sequence */
> r = amdgpu_device_evict_resources(adev);
> if (r)
> -   return r;
> +   goto unprepare;
>
> for (i = 0; i < adev->num_ip_blocks; i++) {
> if (!adev->ip_blocks[i].status.valid)
> @@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev)
> continue;
> r = adev->ip_blocks[i].version->funcs->prepare_suspend((void 
> *)adev);
> if (r)
> -   return r;
> +   goto unprepare;
> }
>
> -   return 0;
> +unprepare:
> +   adev->in_prepare = FALSE;
> +
> +   return r;
> +}
> +
> +/**
> + * amdgpu_device_freeze - run S4 sequence
> + *
> + * @dev: drm dev pointer
> + *
> + * Prepare to put the hw in the S4 state (all asics).
> + * Returns 0 for success or an error on failure.
> + * Called at driver freeze.
> + */
> +int amdgpu_device_freeze(struct drm_device *drm_dev)
> +{
> +   struct amdgpu_device *adev = drm_to_adev(drm_dev);
> +   int r;
> +
> +   adev->in_s4 = true;
> +
> +   r = amdgpu_device_evict_resources(adev);

Won't this be too late to allocate memory?  Doesn't this need to
happen in prepare() even for S4?

Alex

> +   if (r)
> +   goto cleanup;
> +
> +   r = amdgpu_device_suspend(drm_dev, true);
> +   if (r)
> +   goto cl

[PATCH v2 2/2] drm/amd: Stop evicting resources on APUs in suspend

2024-02-07 Thread Mario Limonciello
commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
callback")
intentionally moved the eviction of resources to earlier in the suspend
process, but this introduced a subtle change that it occurs before adev->in_s0ix
or adev->in_s3 are set. This meant that APUs actually started to evict
resources at suspend time as well.

Add a new `in_prepare` flag that is set for the life of the prepare() callback
to return the old code flow. Drop the existing call to return 1 in this case 
because
the suspend() callback looks for the flags too.

Also, introduce a new amdgpu_device_freeze() function to call at S4 and evict
resources in this callback so that APUs will still get resources evicted.

Reported-by: Jürg Billeter 
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038
Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() 
callback")
Signed-off-by: Mario Limonciello 
---
v1->v2:
 * Add and use new in_prepare member
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 21 ++
 3 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5d5be3e20687..f9db09a9017a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1075,7 +1075,8 @@ struct amdgpu_device {
u8  reset_magic[AMDGPU_RESET_MAGIC_NUM];
 
/* s3/s4 mask */
-   boolin_suspend;
+   boolin_prepare;
+   boolin_suspend;
boolin_s3;
boolin_s4;
boolin_s0ix;
@@ -1462,6 +1463,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
 int amdgpu_device_prepare(struct drm_device *dev);
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
 int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
+int amdgpu_device_freeze(struct drm_device *drm_dev);
 u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
 int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
 void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2bc460cb993d..0a337fcd89b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4492,7 +4492,7 @@ static int amdgpu_device_evict_resources(struct 
amdgpu_device *adev)
int ret;
 
/* No need to evict vram on APUs for suspend to ram or s2idle */
-   if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
+   if ((adev->in_prepare) && (adev->flags & AMD_IS_APU))
return 0;
 
ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
@@ -4521,10 +4521,12 @@ int amdgpu_device_prepare(struct drm_device *dev)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
 
+   adev->in_prepare = true;
+
/* Evict the majority of BOs before starting suspend sequence */
r = amdgpu_device_evict_resources(adev);
if (r)
-   return r;
+   goto unprepare;
 
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
@@ -4533,10 +4535,46 @@ int amdgpu_device_prepare(struct drm_device *dev)
continue;
r = adev->ip_blocks[i].version->funcs->prepare_suspend((void 
*)adev);
if (r)
-   return r;
+   goto unprepare;
}
 
-   return 0;
+unprepare:
+   adev->in_prepare = FALSE;
+
+   return r;
+}
+
+/**
+ * amdgpu_device_freeze - run S4 sequence
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the S4 state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver freeze.
+ */
+int amdgpu_device_freeze(struct drm_device *drm_dev)
+{
+   struct amdgpu_device *adev = drm_to_adev(drm_dev);
+   int r;
+
+   adev->in_s4 = true;
+
+   r = amdgpu_device_evict_resources(adev);
+   if (r)
+   goto cleanup;
+
+   r = amdgpu_device_suspend(drm_dev, true);
+   if (r)
+   goto cleanup;
+
+   if (amdgpu_acpi_should_gpu_reset(adev))
+   r = amdgpu_asic_reset(adev);
+
+cleanup:
+   adev->in_s4 = false;
+
+   return r;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b74f68a15802..fc9caa14c9d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2456,6 +2456,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
 {
struct drm_device *drm_dev = dev_get_drv