Applied. Thanks!
On Thu, Oct 2, 2025 at 5:11 PM Ard Biesheuvel <[email protected]> wrote: > > From: Ard Biesheuvel <[email protected]> > > The point of isolating code that uses kernel mode FPU in separate > compilation units is to ensure that even implicit uses of, e.g., SIMD > registers for spilling occur only in a context where this is permitted, > i.e., from inside a kernel_fpu_begin/end block. > > This is important on arm64, which uses -mgeneral-regs-only to build all > kernel code, with the exception of such compilation units where FP or > SIMD registers are expected to be used. Given that the compiler may > invent uses of FP/SIMD anywhere in such a unit, none of its code may be > accessible from outside a kernel_fpu_begin/end block. > > This means that all callers into such compilation units must use the > DC_FP start/end macros, which must not occur there themselves. For > robustness, all functions with external linkage that reside there should > call dc_assert_fp_enabled() to assert that the FPU context was set up > correctly. > > Fix this for the DCN35, DCN351 and DCN36 implementations. > > Cc: Austin Zheng <[email protected]> > Cc: Jun Lei <[email protected]> > Cc: Harry Wentland <[email protected]> > Cc: Leo Li <[email protected]> > Cc: Rodrigo Siqueira <[email protected]> > Cc: Alex Deucher <[email protected]> > Cc: "Christian König" <[email protected]> > Cc: [email protected] > Cc: [email protected] > Signed-off-by: Ard Biesheuvel <[email protected]> > --- > .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 4 ++++ > .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 6 ++++-- > .../drm/amd/display/dc/dml/dcn351/dcn351_fpu.c | 4 ++-- > .../display/dc/resource/dcn35/dcn35_resource.c | 16 +++++++++++++++- > .../dc/resource/dcn351/dcn351_resource.c | 17 ++++++++++++++++- > .../display/dc/resource/dcn36/dcn36_resource.c | 16 +++++++++++++++- > 6 files changed, 56 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c > b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c > index 17a21bcbde17..1a28061bb9ff 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c > @@ -808,6 +808,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct > clk_bw_params *bw_param > > int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st > *soc) > { > + dc_assert_fp_enabled(); > + > return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + > soc->dcn_downspread_percent / 100.0); > } > > @@ -815,6 +817,8 @@ int dcn_get_approx_det_segs_required_for_pstate( > struct _vcs_dpi_soc_bounding_box_st *soc, > int pix_clk_100hz, int bpp, int seg_size_kb) > { > + dc_assert_fp_enabled(); > + > /* Roughly calculate required crb to hide latency. In practice there > is slightly > * more buffer available for latency hiding > */ > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c > b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c > index 5d73efa2f0c9..15a1d77dfe36 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c > @@ -445,6 +445,8 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc > *dc, > bool upscaled = false; > const unsigned int max_allowed_vblank_nom = 1023; > > + dc_assert_fp_enabled(); > + > dcn31_populate_dml_pipes_from_context(dc, context, pipes, > validate_mode); > > @@ -498,9 +500,7 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc > *dc, > > pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; > > - DC_FP_START(); > dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); > - DC_FP_END(); > > pipes[pipe_cnt].pipe.dest.vfront_porch = > timing->v_front_porch; > pipes[pipe_cnt].pipe.src.dcc_rate = 3; > @@ -581,6 +581,8 @@ void dcn35_decide_zstate_support(struct dc *dc, struct > dc_state *context) > unsigned int i, plane_count = 0; > DC_LOGGER_INIT(dc->ctx->logger); > > + dc_assert_fp_enabled(); > + > for (i = 0; i < dc->res_pool->pipe_count; i++) { > if (context->res_ctx.pipe_ctx[i].plane_state) > plane_count++; > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c > b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c > index 6f516af82956..e5cfe73f640a 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c > @@ -478,6 +478,8 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc > *dc, > bool upscaled = false; > const unsigned int max_allowed_vblank_nom = 1023; > > + dc_assert_fp_enabled(); > + > dcn31_populate_dml_pipes_from_context(dc, context, pipes, > validate_mode); > > @@ -531,9 +533,7 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc > *dc, > > pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; > > - DC_FP_START(); > dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); > - DC_FP_END(); > > pipes[pipe_cnt].pipe.dest.vfront_porch = > timing->v_front_porch; > pipes[pipe_cnt].pipe.src.dcc_rate = 3; > diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c > b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c > index 8475c6eec547..32678b66c410 100644 > --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c > +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c > @@ -1760,6 +1760,20 @@ enum dc_status dcn35_patch_unknown_plane_state(struct > dc_plane_state *plane_stat > } > > > +static int populate_dml_pipes_from_context_fpu(struct dc *dc, > + struct dc_state *context, > + display_e2e_pipe_params_st > *pipes, > + enum dc_validate_mode > validate_mode) > +{ > + int ret; > + > + DC_FP_START(); > + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, > validate_mode); > + DC_FP_END(); > + > + return ret; > +} > + > static struct resource_funcs dcn35_res_pool_funcs = { > .destroy = dcn35_destroy_resource_pool, > .link_enc_create = dcn35_link_encoder_create, > @@ -1770,7 +1784,7 @@ static struct resource_funcs dcn35_res_pool_funcs = { > .validate_bandwidth = dcn35_validate_bandwidth, > .calculate_wm_and_dlg = NULL, > .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, > - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, > + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, > .acquire_free_pipe_as_secondary_dpp_pipe = > dcn20_acquire_free_pipe_for_layer, > .release_pipe = dcn20_release_pipe, > .add_stream_to_ctx = dcn30_add_stream_to_ctx, > diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c > b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c > index 0971c0f74186..677cee27589c 100644 > --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c > +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c > @@ -1732,6 +1732,21 @@ static enum dc_status dcn351_validate_bandwidth(struct > dc *dc, > return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; > } > > +static int populate_dml_pipes_from_context_fpu(struct dc *dc, > + struct dc_state *context, > + display_e2e_pipe_params_st > *pipes, > + enum dc_validate_mode > validate_mode) > +{ > + int ret; > + > + DC_FP_START(); > + ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, > validate_mode); > + DC_FP_END(); > + > + return ret; > + > +} > + > static struct resource_funcs dcn351_res_pool_funcs = { > .destroy = dcn351_destroy_resource_pool, > .link_enc_create = dcn35_link_encoder_create, > @@ -1742,7 +1757,7 @@ static struct resource_funcs dcn351_res_pool_funcs = { > .validate_bandwidth = dcn351_validate_bandwidth, > .calculate_wm_and_dlg = NULL, > .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, > - .populate_dml_pipes = dcn351_populate_dml_pipes_from_context_fpu, > + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, > .acquire_free_pipe_as_secondary_dpp_pipe = > dcn20_acquire_free_pipe_for_layer, > .release_pipe = dcn20_release_pipe, > .add_stream_to_ctx = dcn30_add_stream_to_ctx, > diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c > b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c > index 8bae7fcedc22..d81540515e5c 100644 > --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c > +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c > @@ -1734,6 +1734,20 @@ static enum dc_status dcn35_validate_bandwidth(struct > dc *dc, > } > > > +static int populate_dml_pipes_from_context_fpu(struct dc *dc, > + struct dc_state *context, > + display_e2e_pipe_params_st > *pipes, > + enum dc_validate_mode > validate_mode) > +{ > + int ret; > + > + DC_FP_START(); > + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, > validate_mode); > + DC_FP_END(); > + > + return ret; > +} > + > static struct resource_funcs dcn36_res_pool_funcs = { > .destroy = dcn36_destroy_resource_pool, > .link_enc_create = dcn35_link_encoder_create, > @@ -1744,7 +1758,7 @@ static struct resource_funcs dcn36_res_pool_funcs = { > .validate_bandwidth = dcn35_validate_bandwidth, > .calculate_wm_and_dlg = NULL, > .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, > - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, > + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, > .acquire_free_pipe_as_secondary_dpp_pipe = > dcn20_acquire_free_pipe_for_layer, > .release_pipe = dcn20_release_pipe, > .add_stream_to_ctx = dcn30_add_stream_to_ctx, > -- > 2.51.0.618.g983fd99d29-goog >
