Applied.  Thanks!

On Thu, Oct 2, 2025 at 5:11 PM Ard Biesheuvel <[email protected]> wrote:
>
> From: Ard Biesheuvel <[email protected]>
>
> The point of isolating code that uses kernel mode FPU in separate
> compilation units is to ensure that even implicit uses of, e.g., SIMD
> registers for spilling occur only in a context where this is permitted,
> i.e., from inside a kernel_fpu_begin/end block.
>
> This is important on arm64, which uses -mgeneral-regs-only to build all
> kernel code, with the exception of such compilation units where FP or
> SIMD registers are expected to be used. Given that the compiler may
> invent uses of FP/SIMD anywhere in such a unit, none of its code may be
> accessible from outside a kernel_fpu_begin/end block.
>
> This means that all callers into such compilation units must use the
> DC_FP start/end macros, which must not occur there themselves. For
> robustness, all functions with external linkage that reside there should
> call dc_assert_fp_enabled() to assert that the FPU context was set up
> correctly.
>
> Fix this for the DCN35, DCN351 and DCN36 implementations.
>
> Cc: Austin Zheng <[email protected]>
> Cc: Jun Lei <[email protected]>
> Cc: Harry Wentland <[email protected]>
> Cc: Leo Li <[email protected]>
> Cc: Rodrigo Siqueira <[email protected]>
> Cc: Alex Deucher <[email protected]>
> Cc: "Christian König" <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Ard Biesheuvel <[email protected]>
> ---
>  .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c    |  4 ++++
>  .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c    |  6 ++++--
>  .../drm/amd/display/dc/dml/dcn351/dcn351_fpu.c  |  4 ++--
>  .../display/dc/resource/dcn35/dcn35_resource.c  | 16 +++++++++++++++-
>  .../dc/resource/dcn351/dcn351_resource.c        | 17 ++++++++++++++++-
>  .../display/dc/resource/dcn36/dcn36_resource.c  | 16 +++++++++++++++-
>  6 files changed, 56 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
> index 17a21bcbde17..1a28061bb9ff 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
> @@ -808,6 +808,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct 
> clk_bw_params *bw_param
>
>  int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st 
> *soc)
>  {
> +       dc_assert_fp_enabled();
> +
>         return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + 
> soc->dcn_downspread_percent / 100.0);
>  }
>
> @@ -815,6 +817,8 @@ int dcn_get_approx_det_segs_required_for_pstate(
>                 struct _vcs_dpi_soc_bounding_box_st *soc,
>                 int pix_clk_100hz, int bpp, int seg_size_kb)
>  {
> +       dc_assert_fp_enabled();
> +
>         /* Roughly calculate required crb to hide latency. In practice there 
> is slightly
>          * more buffer available for latency hiding
>          */
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
> index 5d73efa2f0c9..15a1d77dfe36 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
> @@ -445,6 +445,8 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc 
> *dc,
>         bool upscaled = false;
>         const unsigned int max_allowed_vblank_nom = 1023;
>
> +       dc_assert_fp_enabled();
> +
>         dcn31_populate_dml_pipes_from_context(dc, context, pipes,
>                                               validate_mode);
>
> @@ -498,9 +500,7 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc 
> *dc,
>
>                 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>
> -               DC_FP_START();
>                 dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> -               DC_FP_END();
>
>                 pipes[pipe_cnt].pipe.dest.vfront_porch = 
> timing->v_front_porch;
>                 pipes[pipe_cnt].pipe.src.dcc_rate = 3;
> @@ -581,6 +581,8 @@ void dcn35_decide_zstate_support(struct dc *dc, struct 
> dc_state *context)
>         unsigned int i, plane_count = 0;
>         DC_LOGGER_INIT(dc->ctx->logger);
>
> +       dc_assert_fp_enabled();
> +
>         for (i = 0; i < dc->res_pool->pipe_count; i++) {
>                 if (context->res_ctx.pipe_ctx[i].plane_state)
>                         plane_count++;
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
> index 6f516af82956..e5cfe73f640a 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
> @@ -478,6 +478,8 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc 
> *dc,
>         bool upscaled = false;
>         const unsigned int max_allowed_vblank_nom = 1023;
>
> +       dc_assert_fp_enabled();
> +
>         dcn31_populate_dml_pipes_from_context(dc, context, pipes,
>                                               validate_mode);
>
> @@ -531,9 +533,7 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc 
> *dc,
>
>                 pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>
> -               DC_FP_START();
>                 dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> -               DC_FP_END();
>
>                 pipes[pipe_cnt].pipe.dest.vfront_porch = 
> timing->v_front_porch;
>                 pipes[pipe_cnt].pipe.src.dcc_rate = 3;
> diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c 
> b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
> index 8475c6eec547..32678b66c410 100644
> --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
> @@ -1760,6 +1760,20 @@ enum dc_status dcn35_patch_unknown_plane_state(struct 
> dc_plane_state *plane_stat
>  }
>
>
> +static int populate_dml_pipes_from_context_fpu(struct dc *dc,
> +                                              struct dc_state *context,
> +                                              display_e2e_pipe_params_st 
> *pipes,
> +                                              enum dc_validate_mode 
> validate_mode)
> +{
> +       int ret;
> +
> +       DC_FP_START();
> +       ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, 
> validate_mode);
> +       DC_FP_END();
> +
> +       return ret;
> +}
> +
>  static struct resource_funcs dcn35_res_pool_funcs = {
>         .destroy = dcn35_destroy_resource_pool,
>         .link_enc_create = dcn35_link_encoder_create,
> @@ -1770,7 +1784,7 @@ static struct resource_funcs dcn35_res_pool_funcs = {
>         .validate_bandwidth = dcn35_validate_bandwidth,
>         .calculate_wm_and_dlg = NULL,
>         .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
> -       .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu,
> +       .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
>         .acquire_free_pipe_as_secondary_dpp_pipe = 
> dcn20_acquire_free_pipe_for_layer,
>         .release_pipe = dcn20_release_pipe,
>         .add_stream_to_ctx = dcn30_add_stream_to_ctx,
> diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c 
> b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
> index 0971c0f74186..677cee27589c 100644
> --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
> @@ -1732,6 +1732,21 @@ static enum dc_status dcn351_validate_bandwidth(struct 
> dc *dc,
>         return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
>  }
>
> +static int populate_dml_pipes_from_context_fpu(struct dc *dc,
> +                                              struct dc_state *context,
> +                                              display_e2e_pipe_params_st 
> *pipes,
> +                                              enum dc_validate_mode 
> validate_mode)
> +{
> +       int ret;
> +
> +       DC_FP_START();
> +       ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, 
> validate_mode);
> +       DC_FP_END();
> +
> +       return ret;
> +
> +}
> +
>  static struct resource_funcs dcn351_res_pool_funcs = {
>         .destroy = dcn351_destroy_resource_pool,
>         .link_enc_create = dcn35_link_encoder_create,
> @@ -1742,7 +1757,7 @@ static struct resource_funcs dcn351_res_pool_funcs = {
>         .validate_bandwidth = dcn351_validate_bandwidth,
>         .calculate_wm_and_dlg = NULL,
>         .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
> -       .populate_dml_pipes = dcn351_populate_dml_pipes_from_context_fpu,
> +       .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
>         .acquire_free_pipe_as_secondary_dpp_pipe = 
> dcn20_acquire_free_pipe_for_layer,
>         .release_pipe = dcn20_release_pipe,
>         .add_stream_to_ctx = dcn30_add_stream_to_ctx,
> diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c 
> b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
> index 8bae7fcedc22..d81540515e5c 100644
> --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
> @@ -1734,6 +1734,20 @@ static enum dc_status dcn35_validate_bandwidth(struct 
> dc *dc,
>  }
>
>
> +static int populate_dml_pipes_from_context_fpu(struct dc *dc,
> +                                              struct dc_state *context,
> +                                              display_e2e_pipe_params_st 
> *pipes,
> +                                              enum dc_validate_mode 
> validate_mode)
> +{
> +       int ret;
> +
> +       DC_FP_START();
> +       ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, 
> validate_mode);
> +       DC_FP_END();
> +
> +       return ret;
> +}
> +
>  static struct resource_funcs dcn36_res_pool_funcs = {
>         .destroy = dcn36_destroy_resource_pool,
>         .link_enc_create = dcn35_link_encoder_create,
> @@ -1744,7 +1758,7 @@ static struct resource_funcs dcn36_res_pool_funcs = {
>         .validate_bandwidth = dcn35_validate_bandwidth,
>         .calculate_wm_and_dlg = NULL,
>         .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
> -       .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu,
> +       .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
>         .acquire_free_pipe_as_secondary_dpp_pipe = 
> dcn20_acquire_free_pipe_for_layer,
>         .release_pipe = dcn20_release_pipe,
>         .add_stream_to_ctx = dcn30_add_stream_to_ctx,
> --
> 2.51.0.618.g983fd99d29-goog
>

Reply via email to