On Fri, Dec 5, 2025 at 7:24 AM Srinivasan Shanmugam <[email protected]> wrote: > > mes_v11_0_set_hw_resources(), mes_v11_0_set_hw_resources_1() and > mes_v11_0_query_sched_status() were using large MESAPI packet unions on > the stack. > > When these helpers are inlined into mes_v11_0_hw_init(), the stack frame > grows and can hit the > > stack frame size (1144) exceeds limit (1024) in 'mes_v11_0_hw_init' > [-Wframe-larger-than] > > Change these helpers to allocate the packet with kmalloc(GFP_KERNEL) > instead of placing it on the stack. The code now fills the packet, calls > mes_v11_0_submit_pkt_and_poll_completion(), and then frees the packet > with kfree() on all paths. > > This reduces stack usage in mes_v11_0_hw_init(), and keeps the behaviour > the same.
Allocating memory for every MES submission adds a lot of overhead and puts memory allocation in potentially atomic paths. Also, why is this just showing up now? This code has existed for ~5 years and never been a problem. Alex > > Cc: Alex Deucher <[email protected]> > Cc: Christian König <[email protected]> > Signed-off-by: Srinivasan Shanmugam <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 129 ++++++++++++++----------- > 1 file changed, 74 insertions(+), 55 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > index 5159f4a9787c..5533098530de 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > @@ -582,17 +582,25 @@ static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, > > static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) > { > - union MESAPI__QUERY_MES_STATUS mes_status_pkt; > + union MESAPI__QUERY_MES_STATUS *pkt; > + int r; > > - memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); > + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); > + if (!pkt) > + return -ENOMEM; > > - mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; > - mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; > - mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + memset(pkt, 0, sizeof(*pkt)); > > - return mes_v11_0_submit_pkt_and_poll_completion(mes, > - &mes_status_pkt, sizeof(mes_status_pkt), > + pkt->header.type = MES_API_TYPE_SCHEDULER; > + pkt->header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; > + pkt->header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + > + r = mes_v11_0_submit_pkt_and_poll_completion(mes, > + pkt, sizeof(*pkt), > offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); > + > + kfree(pkt); > + return r; > } > > static int mes_v11_0_misc_op(struct amdgpu_mes *mes, > @@ -671,93 +679,104 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, > > static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) > { > - int i; > + int i, r; > struct amdgpu_device *adev = mes->adev; > - union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; > + union MESAPI_SET_HW_RESOURCES *pkt; > > - memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); > + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); > + if (!pkt) > + return -ENOMEM; > > - mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; > - mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; > - mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + memset(pkt, 0, sizeof(*pkt)); > > - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; > - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; > - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; > - mes_set_hw_res_pkt.paging_vmid = 0; > - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; > - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = > + pkt->header.type = MES_API_TYPE_SCHEDULER; > + pkt->header.opcode = MES_SCH_API_SET_HW_RSRC; > + pkt->header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + > + pkt->vmid_mask_mmhub = mes->vmid_mask_mmhub; > + pkt->vmid_mask_gfxhub = mes->vmid_mask_gfxhub; > + pkt->gds_size = adev->gds.gds_size; > + pkt->paging_vmid = 0; > + pkt->g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; > + pkt->query_status_fence_gpu_mc_ptr = > mes->query_status_fence_gpu_addr[0]; > > for (i = 0; i < MAX_COMPUTE_PIPES; i++) > - mes_set_hw_res_pkt.compute_hqd_mask[i] = > - mes->compute_hqd_mask[i]; > + pkt->compute_hqd_mask[i] = mes->compute_hqd_mask[i]; > > for (i = 0; i < MAX_GFX_PIPES; i++) > - mes_set_hw_res_pkt.gfx_hqd_mask[i] = > - mes->gfx_hqd_mask[i]; > + pkt->gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; > > for (i = 0; i < MAX_SDMA_PIPES; i++) > - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; > + pkt->sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; > > for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) > - mes_set_hw_res_pkt.aggregated_doorbells[i] = > - mes->aggregated_doorbells[i]; > + pkt->aggregated_doorbells[i] = mes->aggregated_doorbells[i]; > > for (i = 0; i < 5; i++) { > - mes_set_hw_res_pkt.gc_base[i] = > adev->reg_offset[GC_HWIP][0][i]; > - mes_set_hw_res_pkt.mmhub_base[i] = > - adev->reg_offset[MMHUB_HWIP][0][i]; > - mes_set_hw_res_pkt.osssys_base[i] = > - adev->reg_offset[OSSSYS_HWIP][0][i]; > + pkt->gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; > + pkt->mmhub_base[i] = adev->reg_offset[MMHUB_HWIP][0][i]; > + pkt->osssys_base[i] = adev->reg_offset[OSSSYS_HWIP][0][i]; > } > > - mes_set_hw_res_pkt.disable_reset = 1; > - mes_set_hw_res_pkt.disable_mes_log = 1; > - mes_set_hw_res_pkt.use_different_vmid_compute = 1; > - mes_set_hw_res_pkt.enable_reg_active_poll = 1; > - mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; > - mes_set_hw_res_pkt.oversubscription_timer = 50; > + pkt->disable_reset = 1; > + pkt->disable_mes_log = 1; > + pkt->use_different_vmid_compute = 1; > + pkt->enable_reg_active_poll = 1; > + pkt->enable_level_process_quantum_check = 1; > + pkt->oversubscription_timer = 50; > + > if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) > - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; > + pkt->enable_lr_compute_wa = 1; > else > dev_info_once(mes->adev->dev, > "MES FW version must be >= 0x7f to enable LR > compute workaround.\n"); > > if (amdgpu_mes_log_enable) { > - mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; > - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = > - mes->event_log_gpu_addr; > + pkt->enable_mes_event_int_logging = 1; > + pkt->event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; > } > > if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) > - mes_set_hw_res_pkt.limit_single_process = 1; > + pkt->limit_single_process = 1; > > - return mes_v11_0_submit_pkt_and_poll_completion(mes, > - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), > + r = mes_v11_0_submit_pkt_and_poll_completion(mes, > + pkt, sizeof(*pkt), > offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); > + > + kfree(pkt); > + return r; > } > > static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) > { > - union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; > - memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); > + union MESAPI_SET_HW_RESOURCES_1 *pkt; > + int r; > > - mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; > - mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; > - mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; > - mes_set_hw_res_pkt.enable_mes_info_ctx = 1; > + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); > + if (!pkt) > + return -ENOMEM; > + > + memset(pkt, 0, sizeof(*pkt)); > + > + pkt->header.type = MES_API_TYPE_SCHEDULER; > + pkt->header.opcode = MES_SCH_API_SET_HW_RSRC_1; > + pkt->header.dwsize = API_FRAME_SIZE_IN_DWORDS; > + pkt->enable_mes_info_ctx = 1; > > - mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = > mes->resource_1_gpu_addr[0]; > + pkt->cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0]; > if (amdgpu_sriov_is_mes_info_enable(mes->adev)) { > - mes_set_hw_res_pkt.mes_info_ctx_mc_addr = > + pkt->mes_info_ctx_mc_addr = > mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE; > - mes_set_hw_res_pkt.mes_info_ctx_size = > MES11_HW_RESOURCE_1_SIZE; > + pkt->mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE; > } > > - return mes_v11_0_submit_pkt_and_poll_completion(mes, > - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), > + r = mes_v11_0_submit_pkt_and_poll_completion(mes, > + pkt, sizeof(*pkt), > offsetof(union MESAPI_SET_HW_RESOURCES_1, > api_status)); > + > + kfree(pkt); > + return r; > } > > static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, > -- > 2.34.1 >
