RE: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and deadlocking

2022-11-15 Thread Lin, Wayne
[Public]



> -Original Message-
> From: Lyude Paul 
> Sent: Tuesday, November 15, 2022 5:55 AM
> To: Lin, Wayne ; amd-...@lists.freedesktop.org
> Cc: Wentland, Harry ; sta...@vger.kernel.org;
> Li, Sun peng (Leo) ; Siqueira, Rodrigo
> ; Deucher, Alexander
> ; Koenig, Christian
> ; Pan, Xinhui ; David
> Airlie ; Daniel Vetter ; Kazlauskas,
> Nicholas ; Pillai, Aurabindo
> ; Li, Roman ; Zuo, Jerry
> ; Wu, Hersen ; Thomas
> Zimmermann ; Mahfooz, Hamza
> ; Hung, Alex ; Francis,
> David ; Mikita Lipski ; Liu,
> Wenjing ; open list:DRM DRIVERS  de...@lists.freedesktop.org>; open list 
> Subject: Re: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and
> deadlocking
> 
> On Wed, 2022-11-09 at 09:48 +, Lin, Wayne wrote:
> > >   }
> > > - if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) {
> > > + ret = drm_dp_mst_atomic_check(state);
> > > + if (ret == 0 && !debugfs_overwrite) {
> > >   set_dsc_configs_from_fairness_vars(params, vars, count, 
> > > k);
> > > - return true;
> > > + return 0;
> > > + } else if (ret == -EDEADLK) {
> > > + return ret;
> >
> > I think we should return here whenever there is an error. Not just for
> > EDEADLK case.
> 
> Are we sure about this one? I think we may actually want to make this so it
> returns on ret != -ENOSPC, since we want the function to continue if there's
> no space in the atomic state available so it can try recomputing things with
> compression enabled. On ret == 0 it should return early without doing
> compression, and on ret == -ENOSPC it should just continue the function
> from there
> 
Oh, right.. Thanks for saving me from causing disaster : )

> --
> Cheers,
>  Lyude Paul (she/her)
>  Software Engineer at Red Hat
--
Regards,
Wayne

Re: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and deadlocking

2022-11-14 Thread Lyude Paul
On Wed, 2022-11-09 at 09:48 +, Lin, Wayne wrote:
> >     }
> > -   if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) {
> > +   ret = drm_dp_mst_atomic_check(state);
> > +   if (ret == 0 && !debugfs_overwrite) {
> >     set_dsc_configs_from_fairness_vars(params, vars, count,
> > k);
> > -   return true;
> > +   return 0;
> > +   } else if (ret == -EDEADLK) {
> > +   return ret;
> 
> I think we should return here whenever there is an error. Not just for
> EDEADLK case. 

Are we sure about this one? I think we may actually want to make this so it
returns on ret != -ENOSPC, since we want the function to continue if there's
no space in the atomic state available so it can try recomputing things with
compression enabled. On ret == 0 it should return early without doing
compression, and on ret == -ENOSPC it should just continue the function from
there

-- 
Cheers,
 Lyude Paul (she/her)
 Software Engineer at Red Hat



Re: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and deadlocking

2022-11-14 Thread Lyude Paul
On Wed, 2022-11-09 at 09:48 +, Lin, Wayne wrote:
> [Public]
> 
> Thanks, Lyude!
> Comments inline.
> 
> > -Original Message-
> > From: Lyude Paul 
> > Sent: Saturday, November 5, 2022 7:59 AM
> > To: amd-...@lists.freedesktop.org
> > Cc: Wentland, Harry ; sta...@vger.kernel.org;
> > Li, Sun peng (Leo) ; Siqueira, Rodrigo
> > ; Deucher, Alexander
> > ; Koenig, Christian
> > ; Pan, Xinhui ; David
> > Airlie ; Daniel Vetter ; Kazlauskas,
> > Nicholas ; Pillai, Aurabindo
> > ; Li, Roman ; Zuo, Jerry
> > ; Wu, Hersen ; Lin, Wayne
> > ; Thomas Zimmermann ;
> > Mahfooz, Hamza ; Hung, Alex
> > ; Francis, David ; Mikita
> > Lipski ; Liu, Wenjing ;
> > open list:DRM DRIVERS ; open list  > ker...@vger.kernel.org>
> > Subject: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and
> > deadlocking
> > 
> > It appears that amdgpu makes the mistake of completely ignoring the return
> > values from the DP MST helpers, and instead just returns a simple 
> > true/false.
> > In this case, it seems to have come back to bite us because as a result of
> > simply returning false from compute_mst_dsc_configs_for_state(), amdgpu
> > had no way of telling when a deadlock happened from these helpers. This
> > could definitely result in some kernel splats.
> > 
> > Signed-off-by: Lyude Paul 
> > Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share")
> > Cc: Harry Wentland 
> > Cc:  # v5.6+
> > ---
> >  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  18 +--
> >  .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 107 ++--
> > --
> >  .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  12 +-
> >  3 files changed, 73 insertions(+), 64 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 0db2a88cd4d7b..6f76b2c84cdb5 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -6462,7 +6462,7 @@ static int
> > dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
> > struct drm_connector_state *new_con_state;
> > struct amdgpu_dm_connector *aconnector;
> > struct dm_connector_state *dm_conn_state;
> > -   int i, j;
> > +   int i, j, ret;
> > int vcpi, pbn_div, pbn, slot_num = 0;
> > 
> > for_each_new_connector_in_state(state, connector,
> > new_con_state, i) { @@ -6509,8 +6509,11 @@ static int
> > dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
> > dm_conn_state->pbn = pbn;
> > dm_conn_state->vcpi_slots = slot_num;
> > 
> > -   drm_dp_mst_atomic_enable_dsc(state, aconnector-
> > > port, dm_conn_state->pbn,
> > -false);
> > +   ret = drm_dp_mst_atomic_enable_dsc(state,
> > aconnector->port,
> > +  dm_conn_state-
> > > pbn, false);
> > +   if (ret != 0)
> > +   return ret;
> > +
> > continue;
> > }
> > 
> > @@ -9523,10 +9526,9 @@ static int amdgpu_dm_atomic_check(struct
> > drm_device *dev,
> > 
> >  #if defined(CONFIG_DRM_AMD_DC_DCN)
> > if (dc_resource_is_dsc_encoding_supported(dc)) {
> > -   if (!pre_validate_dsc(state, _state, vars)) {
> > -   ret = -EINVAL;
> > +   ret = pre_validate_dsc(state, _state, vars);
> > +   if (ret != 0)
> > goto fail;
> > -   }
> > }
> >  #endif
> > 
> > @@ -9621,9 +9623,9 @@ static int amdgpu_dm_atomic_check(struct
> > drm_device *dev,
> > }
> > 
> >  #if defined(CONFIG_DRM_AMD_DC_DCN)
> > -   if (!compute_mst_dsc_configs_for_state(state, dm_state-
> > > context, vars)) {
> > +   ret = compute_mst_dsc_configs_for_state(state, dm_state-
> > > context, vars);
> > +   if (ret) {
> > 
> > DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state()
> > failed\n");
> > -   ret = -EINVAL;
> > goto fail;
> > }
> > 
> > diff --git
> > a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c

RE: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and deadlocking

2022-11-09 Thread Lin, Wayne
[Public]

Thanks, Lyude!
Comments inline.

> -Original Message-
> From: Lyude Paul 
> Sent: Saturday, November 5, 2022 7:59 AM
> To: amd-...@lists.freedesktop.org
> Cc: Wentland, Harry ; sta...@vger.kernel.org;
> Li, Sun peng (Leo) ; Siqueira, Rodrigo
> ; Deucher, Alexander
> ; Koenig, Christian
> ; Pan, Xinhui ; David
> Airlie ; Daniel Vetter ; Kazlauskas,
> Nicholas ; Pillai, Aurabindo
> ; Li, Roman ; Zuo, Jerry
> ; Wu, Hersen ; Lin, Wayne
> ; Thomas Zimmermann ;
> Mahfooz, Hamza ; Hung, Alex
> ; Francis, David ; Mikita
> Lipski ; Liu, Wenjing ;
> open list:DRM DRIVERS ; open list  ker...@vger.kernel.org>
> Subject: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and
> deadlocking
> 
> It appears that amdgpu makes the mistake of completely ignoring the return
> values from the DP MST helpers, and instead just returns a simple true/false.
> In this case, it seems to have come back to bite us because as a result of
> simply returning false from compute_mst_dsc_configs_for_state(), amdgpu
> had no way of telling when a deadlock happened from these helpers. This
> could definitely result in some kernel splats.
> 
> Signed-off-by: Lyude Paul 
> Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share")
> Cc: Harry Wentland 
> Cc:  # v5.6+
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  18 +--
>  .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 107 ++--
> --
>  .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  12 +-
>  3 files changed, 73 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 0db2a88cd4d7b..6f76b2c84cdb5 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -6462,7 +6462,7 @@ static int
> dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
>   struct drm_connector_state *new_con_state;
>   struct amdgpu_dm_connector *aconnector;
>   struct dm_connector_state *dm_conn_state;
> - int i, j;
> + int i, j, ret;
>   int vcpi, pbn_div, pbn, slot_num = 0;
> 
>   for_each_new_connector_in_state(state, connector,
> new_con_state, i) { @@ -6509,8 +6509,11 @@ static int
> dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
>   dm_conn_state->pbn = pbn;
>   dm_conn_state->vcpi_slots = slot_num;
> 
> - drm_dp_mst_atomic_enable_dsc(state, aconnector-
> >port, dm_conn_state->pbn,
> -  false);
> + ret = drm_dp_mst_atomic_enable_dsc(state,
> aconnector->port,
> +dm_conn_state-
> >pbn, false);
> + if (ret != 0)
> + return ret;
> +
>   continue;
>   }
> 
> @@ -9523,10 +9526,9 @@ static int amdgpu_dm_atomic_check(struct
> drm_device *dev,
> 
>  #if defined(CONFIG_DRM_AMD_DC_DCN)
>   if (dc_resource_is_dsc_encoding_supported(dc)) {
> - if (!pre_validate_dsc(state, _state, vars)) {
> - ret = -EINVAL;
> + ret = pre_validate_dsc(state, _state, vars);
> + if (ret != 0)
>   goto fail;
> - }
>   }
>  #endif
> 
> @@ -9621,9 +9623,9 @@ static int amdgpu_dm_atomic_check(struct
> drm_device *dev,
>   }
> 
>  #if defined(CONFIG_DRM_AMD_DC_DCN)
> - if (!compute_mst_dsc_configs_for_state(state, dm_state-
> >context, vars)) {
> + ret = compute_mst_dsc_configs_for_state(state, dm_state-
> >context, vars);
> + if (ret) {
> 
>   DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state()
> failed\n");
> - ret = -EINVAL;
>   goto fail;
>   }
> 
> diff --git
> a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> index 6ff96b4bdda5c..30bc2e5058b70 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> +++
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> @@ -864,25 +864,25 @@ static bool try_disable_dsc(struct
> drm_atomic_state *state,
>   return true;
>  }
> 
> -static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state
> *state,
> -  struct dc_state *dc_state,
> -  struct dc

[PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and deadlocking

2022-11-04 Thread Lyude Paul
It appears that amdgpu makes the mistake of completely ignoring the return
values from the DP MST helpers, and instead just returns a simple
true/false. In this case, it seems to have come back to bite us because as
a result of simply returning false from
compute_mst_dsc_configs_for_state(), amdgpu had no way of telling when a
deadlock happened from these helpers. This could definitely result in some
kernel splats.

Signed-off-by: Lyude Paul 
Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share")
Cc: Harry Wentland 
Cc:  # v5.6+
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  18 +--
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 107 ++
 .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  12 +-
 3 files changed, 73 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0db2a88cd4d7b..6f76b2c84cdb5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6462,7 +6462,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct 
drm_atomic_state *state,
struct drm_connector_state *new_con_state;
struct amdgpu_dm_connector *aconnector;
struct dm_connector_state *dm_conn_state;
-   int i, j;
+   int i, j, ret;
int vcpi, pbn_div, pbn, slot_num = 0;
 
for_each_new_connector_in_state(state, connector, new_con_state, i) {
@@ -6509,8 +6509,11 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct 
drm_atomic_state *state,
dm_conn_state->pbn = pbn;
dm_conn_state->vcpi_slots = slot_num;
 
-   drm_dp_mst_atomic_enable_dsc(state, aconnector->port, 
dm_conn_state->pbn,
-false);
+   ret = drm_dp_mst_atomic_enable_dsc(state, 
aconnector->port,
+  dm_conn_state->pbn, 
false);
+   if (ret != 0)
+   return ret;
+
continue;
}
 
@@ -9523,10 +9526,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
if (dc_resource_is_dsc_encoding_supported(dc)) {
-   if (!pre_validate_dsc(state, _state, vars)) {
-   ret = -EINVAL;
+   ret = pre_validate_dsc(state, _state, vars);
+   if (ret != 0)
goto fail;
-   }
}
 #endif
 
@@ -9621,9 +9623,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-   if (!compute_mst_dsc_configs_for_state(state, 
dm_state->context, vars)) {
+   ret = compute_mst_dsc_configs_for_state(state, 
dm_state->context, vars);
+   if (ret) {
DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() 
failed\n");
-   ret = -EINVAL;
goto fail;
}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 6ff96b4bdda5c..30bc2e5058b70 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -864,25 +864,25 @@ static bool try_disable_dsc(struct drm_atomic_state 
*state,
return true;
 }
 
-static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
-struct dc_state *dc_state,
-struct dc_link *dc_link,
-struct dsc_mst_fairness_vars *vars,
-struct drm_dp_mst_topology_mgr 
*mgr,
-int *link_vars_start_index)
+static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+   struct dc_state *dc_state,
+   struct dc_link *dc_link,
+   struct dsc_mst_fairness_vars *vars,
+   struct drm_dp_mst_topology_mgr *mgr,
+   int *link_vars_start_index)
 {
struct dc_stream_state *stream;
struct dsc_mst_fairness_params params[MAX_PIPES];
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_state *mst_state = 
drm_atomic_get_mst_topology_state(state, mgr);
int count = 0;
-   int i, k;
+   int i, k, ret;
bool debugfs_overwrite = false;
 
memset(params, 0, sizeof(params));
 
if (IS_ERR(mst_state))
-   return false;
+   return PTR_ERR(mst_state);
 
mst_state->pbn_div =