mes_v11_0_set_hw_resources(), mes_v11_0_set_hw_resources_1() and mes_v11_0_query_sched_status() were using large MESAPI packet unions on the stack.
When these helpers are inlined into mes_v11_0_hw_init(), the stack frame grows and can hit the stack frame size (1144) exceeds limit (1024) in 'mes_v11_0_hw_init' [-Wframe-larger-than] Change these helpers to allocate the packet with kmalloc(GFP_KERNEL) instead of placing it on the stack. The code now fills the packet, calls mes_v11_0_submit_pkt_and_poll_completion(), and then frees the packet with kfree() on all paths. This reduces stack usage in mes_v11_0_hw_init(), and keeps the behaviour the same. Cc: Alex Deucher <[email protected]> Cc: Christian König <[email protected]> Signed-off-by: Srinivasan Shanmugam <[email protected]> --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 129 ++++++++++++++----------- 1 file changed, 74 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5159f4a9787c..5533098530de 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -582,17 +582,25 @@ static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) { - union MESAPI__QUERY_MES_STATUS mes_status_pkt; + union MESAPI__QUERY_MES_STATUS *pkt; + int r; - memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; - mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; - mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + memset(pkt, 0, sizeof(*pkt)); - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt), + pkt->header.type = MES_API_TYPE_SCHEDULER; + pkt->header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + pkt->header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + r = mes_v11_0_submit_pkt_and_poll_completion(mes, + pkt, sizeof(*pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); + + kfree(pkt); + return r; } static int mes_v11_0_misc_op(struct amdgpu_mes *mes, @@ -671,93 +679,104 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) { - int i; + int i, r; struct amdgpu_device *adev = mes->adev; - union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt; + union MESAPI_SET_HW_RESOURCES *pkt; - memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; - mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; - mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + memset(pkt, 0, sizeof(*pkt)); - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + pkt->header.type = MES_API_TYPE_SCHEDULER; + pkt->header.opcode = MES_SCH_API_SET_HW_RSRC; + pkt->header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + pkt->vmid_mask_mmhub = mes->vmid_mask_mmhub; + pkt->vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + pkt->gds_size = adev->gds.gds_size; + pkt->paging_vmid = 0; + pkt->g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; + pkt->query_status_fence_gpu_mc_ptr = mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; + pkt->compute_hqd_mask[i] = mes->compute_hqd_mask[i]; for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = - mes->gfx_hqd_mask[i]; + pkt->gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + pkt->sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + pkt->aggregated_doorbells[i] = mes->aggregated_doorbells[i]; for (i = 0; i < 5; i++) { - mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; - mes_set_hw_res_pkt.mmhub_base[i] = - adev->reg_offset[MMHUB_HWIP][0][i]; - mes_set_hw_res_pkt.osssys_base[i] = - adev->reg_offset[OSSSYS_HWIP][0][i]; + pkt->gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; + pkt->mmhub_base[i] = adev->reg_offset[MMHUB_HWIP][0][i]; + pkt->osssys_base[i] = adev->reg_offset[OSSSYS_HWIP][0][i]; } - mes_set_hw_res_pkt.disable_reset = 1; - mes_set_hw_res_pkt.disable_mes_log = 1; - mes_set_hw_res_pkt.use_different_vmid_compute = 1; - mes_set_hw_res_pkt.enable_reg_active_poll = 1; - mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; - mes_set_hw_res_pkt.oversubscription_timer = 50; + pkt->disable_reset = 1; + pkt->disable_mes_log = 1; + pkt->use_different_vmid_compute = 1; + pkt->enable_reg_active_poll = 1; + pkt->enable_level_process_quantum_check = 1; + pkt->oversubscription_timer = 50; + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; + pkt->enable_lr_compute_wa = 1; else dev_info_once(mes->adev->dev, "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); if (amdgpu_mes_log_enable) { - mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = - mes->event_log_gpu_addr; + pkt->enable_mes_event_int_logging = 1; + pkt->event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; } if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) - mes_set_hw_res_pkt.limit_single_process = 1; + pkt->limit_single_process = 1; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + r = mes_v11_0_submit_pkt_and_poll_completion(mes, + pkt, sizeof(*pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); + + kfree(pkt); + return r; } static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) { - union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; - memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + union MESAPI_SET_HW_RESOURCES_1 *pkt; + int r; - mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; - mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.enable_mes_info_ctx = 1; + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + memset(pkt, 0, sizeof(*pkt)); + + pkt->header.type = MES_API_TYPE_SCHEDULER; + pkt->header.opcode = MES_SCH_API_SET_HW_RSRC_1; + pkt->header.dwsize = API_FRAME_SIZE_IN_DWORDS; + pkt->enable_mes_info_ctx = 1; - mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0]; + pkt->cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0]; if (amdgpu_sriov_is_mes_info_enable(mes->adev)) { - mes_set_hw_res_pkt.mes_info_ctx_mc_addr = + pkt->mes_info_ctx_mc_addr = mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE; - mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE; + pkt->mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE; } - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + r = mes_v11_0_submit_pkt_and_poll_completion(mes, + pkt, sizeof(*pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); + + kfree(pkt); + return r; } static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, -- 2.34.1
