On 2/7/26 00:58, Hamza Mahfooz wrote:
> There should be a mechanism for drivers to respond to flip_done
> timeouts. Since, as it stands it is possible for the display to stall
> indefinitely, necessitating a hard reset. So, introduce a new mechanism
> that tries various methods of recovery with increasing aggression, in
> the following order:
>
> 1. Force a full modeset (have the compositor reprogram the state from
> scratch).
> 3. As a last resort, have the driver attempt a vendor specific reset
> (assuming it provides an implementation to
> drm_crtc_funcs.page_flip_timeout()).
>
> Signed-off-by: Hamza Mahfooz <[email protected]>
> ---
> v2: new to the series
> ---
> drivers/gpu/drm/drm_atomic_helper.c | 36 ++++++++++++++++++++++++++---
> include/drm/drm_crtc.h | 9 ++++++++
> include/drm/drm_device.h | 24 +++++++++++++++++++
> 3 files changed, 66 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c
> b/drivers/gpu/drm/drm_atomic_helper.c
> index 5840e9cc6f66..f46d68418e32 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -42,6 +42,7 @@
> #include <drm/drm_gem_atomic_helper.h>
> #include <drm/drm_panic.h>
> #include <drm/drm_print.h>
> +#include <drm/drm_probe_helper.h>
> #include <drm/drm_self_refresh_helper.h>
> #include <drm/drm_vblank.h>
> #include <drm/drm_writeback.h>
> @@ -1881,11 +1882,40 @@ void drm_atomic_helper_wait_for_flip_done(struct
> drm_device *dev,
> continue;
>
> ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
> - if (ret == 0)
> - drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n",
> - crtc->base.id, crtc->name);
> + if (!ret) {
> + switch (dev->reset_phase) {
> + case DRM_KMS_RESET_NONE:
> + drm_err(dev, "[CRTC:%d:%s] flip_done timed
> out\n",
> + crtc->base.id, crtc->name);
> + dev->reset_phase = DRM_KMS_RESET_FORCE_MODESET;
> + drm_kms_helper_hotplug_event(dev);
> + break;
> + case DRM_KMS_RESET_FORCE_MODESET:
> + drm_err(dev, "[CRTC:%d:%s] force full modeset
> failed\n",
> + crtc->base.id, crtc->name);
> + dev->reset_phase = DRM_KMS_RESET_VENDOR;
> + if (crtc->funcs->page_flip_timeout)
> + crtc->funcs->page_flip_timeout(crtc);
> + break;
> + case DRM_KMS_RESET_VENDOR:
> + drm_err(dev, "[CRTC:%d:%s] KMS recovery
> failed!\n",
> + crtc->base.id, crtc->name);
> + dev->reset_phase = DRM_KMS_RESET_GIVE_UP;
> + break;
> + default:
> + break;
> + }
> +
> + goto exit;
> + }
> + }
> +
> + if (dev->reset_phase) {
> + drm_info(dev, "KMS recovery succeeded!\n");
> + dev->reset_phase = DRM_KMS_RESET_NONE;
> }
>
> +exit:
> if (state->fake_commit)
> complete_all(&state->fake_commit->flip_done);
> }
> diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
> index 66278ffeebd6..45dc5a76e915 100644
> --- a/include/drm/drm_crtc.h
> +++ b/include/drm/drm_crtc.h
> @@ -609,6 +609,15 @@ struct drm_crtc_funcs {
> uint32_t flags, uint32_t target,
> struct drm_modeset_acquire_ctx *ctx);
>
> + /**
> + * @page_flip_timeout:
> + *
> + * This optional hook is called if &drm_crtc_commit.flip_done times out,
> + * and can be used by drivers to attempt to recover from a page flip
> + * timeout.
> + */
> + void (*page_flip_timeout)(struct drm_crtc *crtc);
As far as I can see a callback is clearly not the right approach.
The drm_atomic_helper_wait_for_flip_done() helper is called by the driver,
isn't it?
So what we need is just to give an error code back to the driver.
Regards,
Christian.
> +
> /**
> * @set_property:
> *
> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> index bc78fb77cc27..1244d7527e7b 100644
> --- a/include/drm/drm_device.h
> +++ b/include/drm/drm_device.h
> @@ -66,6 +66,23 @@ enum switch_power_state {
> DRM_SWITCH_POWER_DYNAMIC_OFF = 3,
> };
>
> +/**
> + * enum drm_kms_reset_phase - reset phase of drm device
> + */
> +enum drm_kms_reset_phase {
> + /** @DRM_KMS_RESET_NONE: Not currently attempting recovery */
> + DRM_KMS_RESET_NONE,
> +
> + /** @DRM_KMS_RESET_FORCE_MODESET: Force a full modeset */
> + DRM_KMS_RESET_FORCE_MODESET,
> +
> + /** @DRM_KMS_RESET_VENDOR: Attempt a vendor reset */
> + DRM_KMS_RESET_VENDOR,
> +
> + /** @DRM_KMS_RESET_GIVE_UP: All recovery methods failed */
> + DRM_KMS_RESET_GIVE_UP,
> +};
> +
> /**
> * struct drm_device - DRM device structure
> *
> @@ -375,6 +392,13 @@ struct drm_device {
> * Root directory for debugfs files.
> */
> struct dentry *debugfs_root;
> +
> + /**
> + * @reset_phase:
> + *
> + * Reset phase that the device is in.
> + */
> + enum drm_kms_reset_phase reset_phase;
> };
>
> void drm_dev_set_dma_dev(struct drm_device *dev, struct device *dma_dev);