[AMD Official Use Only - AMD Internal Distribution Only] Reviewed-by: Tao Zhou <[email protected]>
> -----Original Message----- > From: SHANMUGAM, SRINIVASAN <[email protected]> > Sent: Friday, December 5, 2025 8:15 PM > To: Koenig, Christian <[email protected]>; Deucher, Alexander > <[email protected]> > Cc: [email protected]; SHANMUGAM, SRINIVASAN > <[email protected]>; Zhou1, Tao > <[email protected]>; Zhang, Hawking <[email protected]> > Subject: [PATCH] drm/amd/ras: Reduce stack usage in > amdgpu_virt_ras_get_cper_records() > > amdgpu_virt_ras_get_cper_records() was using a large stack array of > ras_log_info pointers. This contributed to the frame size warning on this > function. > > Replace the fixed-size stack array: > > struct ras_log_info *trace[MAX_RECORD_PER_BATCH]; > > with a heap-allocated array using kcalloc(). > > We free the trace buffer together with out_buf on all exit paths. > If allocation of trace or out_buf fails, we return a generic RAS error code. > > This reduces stack usage and keeps the runtime behaviour unchanged. > > Fixes: > stack frame size: 1112 bytes (limit: 1024) > > Cc: Tao Zhou <[email protected]> > Cc: Hawking Zhang <[email protected]> > Cc: Christian König <[email protected]> > Cc: Alex Deucher <[email protected]> > Signed-off-by: Srinivasan Shanmugam <[email protected]> > --- > .../drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c | 17 +++++++++++++---- > 1 file changed, 13 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c > b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c > index 5e90a187155b..a75479593864 100644 > --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c > +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c > @@ -183,7 +183,7 @@ static int amdgpu_virt_ras_get_cper_records(struct > ras_core_context *ras_core, > (struct ras_cmd_cper_record_rsp *)cmd->output_buff_raw; > struct ras_log_batch_overview *overview = &virt_ras- > >batch_mgr.batch_overview; > struct ras_cmd_batch_trace_record_rsp *rsp_cache = &virt_ras- > >batch_mgr.batch_trace; > - struct ras_log_info *trace[MAX_RECORD_PER_BATCH] = {0}; > + struct ras_log_info **trace; > uint32_t offset = 0, real_data_len = 0; > uint64_t batch_id; > uint8_t *out_buf; > @@ -195,9 +195,15 @@ static int amdgpu_virt_ras_get_cper_records(struct > ras_core_context *ras_core, > if (!req->buf_size || !req->buf_ptr || !req->cper_num) > return RAS_CMD__ERROR_INVALID_INPUT_DATA; > > + trace = kcalloc(MAX_RECORD_PER_BATCH, sizeof(*trace), > GFP_KERNEL); > + if (!trace) > + return RAS_CMD__ERROR_GENERIC; > + > out_buf = kzalloc(req->buf_size, GFP_KERNEL); > - if (!out_buf) > + if (!out_buf) { > + kfree(trace); > return RAS_CMD__ERROR_GENERIC; > + } > > memset(out_buf, 0, req->buf_size); > > @@ -205,8 +211,9 @@ static int amdgpu_virt_ras_get_cper_records(struct > ras_core_context *ras_core, > batch_id = req->cper_start_id + i; > if (batch_id >= overview->last_batch_id) > break; > - count = amdgpu_virt_ras_get_batch_records(ras_core, > batch_id, trace, > - ARRAY_SIZE(trace), rsp_cache); > + count = amdgpu_virt_ras_get_batch_records(ras_core, > batch_id, > + trace, > MAX_RECORD_PER_BATCH, > + rsp_cache); > if (count > 0) { > ret = ras_cper_generate_cper(ras_core, trace, count, > &out_buf[offset], req->buf_size - > offset, &real_data_len); @@ -220,6 +227,7 @@ static int > amdgpu_virt_ras_get_cper_records(struct ras_core_context *ras_core, > if ((ret && (ret != -ENOMEM)) || > copy_to_user(u64_to_user_ptr(req->buf_ptr), out_buf, offset)) { > kfree(out_buf); > + kfree(trace); > return RAS_CMD__ERROR_GENERIC; > } > > @@ -231,6 +239,7 @@ static int amdgpu_virt_ras_get_cper_records(struct > ras_core_context *ras_core, > cmd->output_size = sizeof(struct ras_cmd_cper_record_rsp); > > kfree(out_buf); > + kfree(trace); > > return RAS_CMD__SUCCESS; > } > -- > 2.34.1
