RE: [PATCH Review 2/2] drm/amdgpu: message smu to update bad channel info
[AMD Official Use Only] The series is: Reviewed-by: Tao Zhou > -Original Message- > From: Stanley.Yang > Sent: Friday, March 4, 2022 2:51 PM > To: amd-gfx@lists.freedesktop.org; Zhang, Hawking > ; Zhou1, Tao ; Joo, Maria > > Cc: Yang, Stanley > Subject: [PATCH Review 2/2] drm/amdgpu: message smu to update bad channel > info > > It should notice SMU to update bad channel info when detected uncorrectable > error in UMC block > > Change-Id: I2dc8848affdb53e52891013953ae9383fff5f20f > Signed-off-by: Stanley.Yang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ > .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c| 25 +-- > .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h| 4 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 5 > 5 files changed, 42 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index d3875618ebf5..f9104f99eb9c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -2068,6 +2068,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device > *adev) > mutex_init(>recovery_lock); > INIT_WORK(>recovery_work, amdgpu_ras_do_recovery); > atomic_set(>in_recovery, 0); > + con->eeprom_control.bad_channel_bitmap = 0; > > max_eeprom_records_count = > amdgpu_ras_eeprom_max_record_count(); > amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); > @@ -2092,6 +2093,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device > *adev) > goto free; > > amdgpu_dpm_send_hbm_bad_pages_num(adev, con- > >eeprom_control.ras_num_recs); > + > + if (con->update_channel_flag == true) { > + amdgpu_dpm_send_hbm_bad_channel_flag(adev, con- > >eeprom_control.bad_channel_bitmap); > + con->update_channel_flag = false; > + } > } > > #ifdef CONFIG_X86_MCE_AMD > @@ -2285,6 +2291,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) > goto release_con; > } > > + con->update_channel_flag = false; > con->features = 0; > INIT_LIST_HEAD(>head); > /* Might need get this flag from vbios. */ diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 7cddaad90d6d..9314fde81e68 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -374,6 +374,9 @@ struct amdgpu_ras { > > /* record umc error info queried from smu */ > struct umc_ecc_info umc_ecc; > + > + /* Indicates smu whether need update bad channel info */ > + bool update_channel_flag; > }; > > struct ras_fs_data { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > index 2b844a5aafdb..ad5d8667756d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > @@ -265,6 +265,7 @@ int amdgpu_ras_eeprom_reset_table(struct > amdgpu_ras_eeprom_control *control) { > struct amdgpu_device *adev = to_amdgpu_device(control); > struct amdgpu_ras_eeprom_table_header *hdr = >tbl_hdr; > + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > u8 csum; > int res; > > @@ -285,6 +286,10 @@ int amdgpu_ras_eeprom_reset_table(struct > amdgpu_ras_eeprom_control *control) > > amdgpu_dpm_send_hbm_bad_pages_num(adev, control- > >ras_num_recs); > > + control->bad_channel_bitmap = 0; > + amdgpu_dpm_send_hbm_bad_channel_flag(adev, control- > >bad_channel_bitmap); > + con->update_channel_flag = false; > + > amdgpu_ras_debugfs_set_ret_size(control); > > mutex_unlock(>ras_tbl_mutex); > @@ -418,6 +423,7 @@ amdgpu_ras_eeprom_append_table(struct > amdgpu_ras_eeprom_control *control, > struct eeprom_table_record *record, > const u32 num) > { > + struct amdgpu_ras *con = > +amdgpu_ras_get_context(to_amdgpu_device(control)); > u32 a, b, i; > u8 *buf, *pp; > int res; > @@ -429,9 +435,16 @@ amdgpu_ras_eeprom_append_table(struct > amdgpu_ras_eeprom_control *control, > /* Encode all of them in one go. >*/ > pp = buf; > - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) > + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { > __encode_table_record_to_buf(control, [i], pp); > > + /* update bad channel bitmap */ > + if (!(control->bad_channel_bitmap & (1 << > record[i].mem_channel))) { > + control->bad_channel_bitmap |= 1 << > record[i].mem_channel; > + con->update_channel_flag = true; > + } > + } > + > /* a, first record index to write into. >* b, last record index
[PATCH Review 2/2] drm/amdgpu: message smu to update bad channel info
It should notice SMU to update bad channel info when detected uncorrectable error in UMC block Change-Id: I2dc8848affdb53e52891013953ae9383fff5f20f Signed-off-by: Stanley.Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c| 25 +-- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h| 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 5 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d3875618ebf5..f9104f99eb9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2068,6 +2068,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) mutex_init(>recovery_lock); INIT_WORK(>recovery_work, amdgpu_ras_do_recovery); atomic_set(>in_recovery, 0); + con->eeprom_control.bad_channel_bitmap = 0; max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(); amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); @@ -2092,6 +2093,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) goto free; amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + + if (con->update_channel_flag == true) { + amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); + con->update_channel_flag = false; + } } #ifdef CONFIG_X86_MCE_AMD @@ -2285,6 +2291,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } + con->update_channel_flag = false; con->features = 0; INIT_LIST_HEAD(>head); /* Might need get this flag from vbios. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7cddaad90d6d..9314fde81e68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -374,6 +374,9 @@ struct amdgpu_ras { /* record umc error info queried from smu */ struct umc_ecc_info umc_ecc; + + /* Indicates smu whether need update bad channel info */ + bool update_channel_flag; }; struct ras_fs_data { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 2b844a5aafdb..ad5d8667756d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -265,6 +265,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = >tbl_hdr; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); u8 csum; int res; @@ -285,6 +286,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs); + control->bad_channel_bitmap = 0; + amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); + con->update_channel_flag = false; + amdgpu_ras_debugfs_set_ret_size(control); mutex_unlock(>ras_tbl_mutex); @@ -418,6 +423,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *record, const u32 num) { + struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control)); u32 a, b, i; u8 *buf, *pp; int res; @@ -429,9 +435,16 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, /* Encode all of them in one go. */ pp = buf; - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { __encode_table_record_to_buf(control, [i], pp); + /* update bad channel bitmap */ + if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + con->update_channel_flag = true; + } + } + /* a, first record index to write into. * b, last record index to write into. * a = first index to read (fri) + number of records in the table, @@ -684,6 +697,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int i, res; u8 *buf, *pp; u32 g0, g1; @@ -751,8 +765,15 @@ int
[PATCH Review 1/2] drm/amd/pm: add send bad channel info function
support message SMU update bad channel info to update HBM bad channel info in OOB table Change-Id: I1e50ed8118f4c1aaefb04c040e59ae4918cdc295 Signed-off-by: Stanley.Yang --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 12 ++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 1 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 7 +++ .../pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h| 3 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 +- .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 43 +++ 7 files changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 1d63f1e8884c..9a892d6d1d7a 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -507,6 +507,18 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size) return ret; } +int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = 0; + + mutex_lock(>pm.mutex); + ret = smu_send_hbm_bad_channel_flag(smu, size); + mutex_unlock(>pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index ddfa55b59d02..3e78b3057277 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -412,6 +412,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable); int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version); int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable); int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size); +int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size); int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7e79a67bb8ef..f1544755d8b4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3052,3 +3052,13 @@ int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size) return ret; } + +int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size) +{ + int ret = 0; + + if (smu->ppt_funcs && smu->ppt_funcs->send_hbm_bad_channel_flag) + ret = smu->ppt_funcs->send_hbm_bad_channel_flag(smu, size); + + return ret; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index fbef3ab8d487..ef57b6089c69 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1292,6 +1292,12 @@ struct pptable_funcs { * @set_config_table: Apply the input DriverSmuConfig table settings. */ int (*set_config_table)(struct smu_context *smu, struct config_table_setting *table); + + /** +* @sned_hbm_bad_channel_flag: message SMU to update bad channel info +* of SMUBUS table. +*/ + int (*send_hbm_bad_channel_flag)(struct smu_context *smu, uint32_t size); }; typedef enum { @@ -1428,5 +1434,6 @@ int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc); int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size); void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev); int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size); +int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h index ab66a4b9e438..0f498baf6838 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h @@ -103,7 +103,8 @@ #define PPSMC_MSG_GfxDriverResetRecovery 0x42 #define PPSMC_MSG_BoardPowerCalibration0x43 #define PPSMC_MSG_HeavySBR 0x45 -#define PPSMC_Message_Count0x46 +#define PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel 0x46 +#define PPSMC_Message_Count0x47 //PPSMC Reset Types diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index d787c3b9fc52..9f6f306eeca0 100644 ---
Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready
Thanks a lot Best Regards, JingWen Chen > On Mar 4, 2022, at 00:36, Grodzovsky, Andrey > wrote: > > I pushed all the changes including your patch. > > Andrey > > On 2022-03-02 22:16, Andrey Grodzovsky wrote: >> OK, i will do quick smoke test tomorrow and push all of it it then. >> >> Andrey >> >> On 2022-03-02 21:59, Chen, JingWen wrote: >>> Hi Andrey, >>> >>> I don't have the bare mental environment, I can only test the SRIOV cases. >>> >>> Best Regards, >>> JingWen Chen >>> >>> >>> On Mar 3, 2022, at 01:55, Grodzovsky, Andrey wrote: The patch is acked-by: Andrey Grodzovsky If you also smoked tested bare metal feel free to apply all the patches, if no let me know. Andrey On 2022-03-02 04:51, JingWen Chen wrote: > Hi Andrey, > > Most part of the patches are OK, but the code will introduce a ib test > fail on the disabled vcn of sienna_cichlid. > > In SRIOV use case we will disable one vcn on sienna_cichlid, I have > attached a patch to fix this issue, please check the attachment. > > Best Regards, > > Jingwen Chen > > > On 2/26/22 5:22 AM, Andrey Grodzovsky wrote: >> Hey, patches attached - i applied the patches and resolved merge >> conflicts but weren't able to test as my on board's network card doesn't >> work with 5.16 kernel (it does with 5.17, maybe it's Kconfig issue and i >> need to check more). >> The patches are on top of 'cababde192b2 Yifan Zhang 2 days ago >> drm/amd/pm: fix mode2 reset fail for smu 13.0.5 ' commit. >> >> Please test and let me know. Maybe by Monday I will be able to resolve >> the connectivity issue on 5.16. >> >> Andrey >> >> On 2022-02-24 22:13, JingWen Chen wrote: >>> Hi Andrey, >>> >>> Sorry for the misleading, I mean the whole patch series. We are >>> depending on this patch series to fix the concurrency issue within >>> SRIOV TDR sequence. >>> >>> >>> >>> On 2/25/22 1:26 AM, Andrey Grodzovsky wrote: No problem if so but before I do, JingWen - why you think this patch is needed as a standalone now ? It has no use without the entire feature together with it. Is it some changes you want to do on top of that code ? Andrey On 2022-02-24 12:12, Deucher, Alexander wrote: > [Public] > > > If it applies cleanly, feel free to drop it in. I'll drop those > patches for drm-next since they are already in drm-misc. > > Alex > > > > *From:* amd-gfx on behalf of > Andrey Grodzovsky > *Sent:* Thursday, February 24, 2022 11:24 AM > *To:* Chen, JingWen ; Christian König > ; dri-de...@lists.freedesktop.org > ; amd-gfx@lists.freedesktop.org > > *Cc:* Liu, Monk ; Chen, Horace > ; Lazar, Lijo ; Koenig, > Christian ; dan...@ffwll.ch > > *Subject:* Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to > after XGMI is ready > No because all the patch-set including this patch was landed into > drm-misc-next and will reach amd-staging-drm-next on the next upstream > rebase i guess. > > Andrey > > On 2022-02-24 01:47, JingWen Chen wrote: >> Hi Andrey, >> >> Will you port this patch into amd-staging-drm-next? >> >> on 2/10/22 2:06 AM, Andrey Grodzovsky wrote: >>> All comments are fixed and code pushed. Thanks for everyone >>> who helped reviewing. >>> >>> Andrey >>> >>> On 2022-02-09 02:53, Christian König wrote: Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky: > Before we initialize schedulers we must know which reset > domain are we in - for single device there iis a single > domain per device and so single wq per device. For XGMI > the reset domain spans the entire XGMI hive and so the > reset wq is per hive. > > Signed-off-by: Andrey Grodzovsky One more comment below, with that fixed Reviewed-by: Christian König . > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 > ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 34 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 + > 3 files changed, 51 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >
2022 X.Org Board of Directors Elections timeline extended, Request for nominations
We are seeking nominations for candidates for election to the X.org Foundation Board of Directors. However, as we presently do not have enough nominations to start the election - the decision has been made to extend the timeline by 2 weeks. Note this is a fairly regular part of the elections process. The new deadline for nominations to the X.org Board of Directors is 23:59 UTC on 20th March 2022. The Board consists of directors elected from the membership. Each year, an election is held to bring the total number of directors to eight. The four members receiving the highest vote totals will serve as directors for two year terms. The directors who received two year terms starting in 2021 were Lyude Paul, Samuel Iglesias Gonsálvez, Manasi D Navare and Daniel Vetter. They will continue to serve until their term ends in 2023. Current directors whose term expires in 2022 are Emma Anholt, Keith Packard, Harry Wentland and Mark Filion. A director is expected to participate in the fortnightly IRC meeting to discuss current business and to attend the annual meeting of the X.Org Foundation, which will be held at a location determined in advance by the Board of Directors. A member may nominate themselves or any other member they feel is qualified. Nominations should be sent to the Election Committee at elections at x.org. Nominees shall be required to be current members of the X.Org Foundation, and submit a personal statement of up to 200 words that will be provided to prospective voters. The collected statements, along with the statement of contribution to the X.Org Foundation in the member's account page on http://members.x.org, will be made available to all voters to help them make their voting decisions. Nominations, membership applications or renewals and completed personal statements must be received no later than 23:59 UTC on 20th March 2022. The slate of candidates will be published 28th March 2022 and candidate Q will begin then. The deadline for Xorg membership applications and renewals is 31st March 2022. Cheers, Lyude Paul, on behalf of the X.Org BoD
Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c
On Thu, Mar 03, 2022 at 12:19:57PM -0600, Gustavo A. R. Silva wrote: > On Thu, Mar 03, 2022 at 09:43:28AM -0800, Kees Cook wrote: > > On Thu, Mar 03, 2022 at 11:25:03AM -0600, Gustavo A. R. Silva wrote: > > > Fix the following Wstringop-overflow warnings when building with GCC-11: > > > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: > > > warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size > > > 1 [-Wstringop-overflow=] > > > > Can you "show your work" a little more here? I don't actually see the > > what is getting fixed: > > > > enum dc_lane_count { > > ... > > LANE_COUNT_FOUR = 4, > > ... > > LANE_COUNT_DP_MAX = LANE_COUNT_FOUR > > }; > > > > struct link_training_settings { > > ... > > union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]; > > }; > > > > void dp_hw_to_dpcd_lane_settings( > > ... > > union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) > > { > > ... > > } > > > > static enum link_training_result dpia_training_cr_transparent( > > ... > > struct link_training_settings *lt_settings) > > { > > ... > > dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, > > lt_settings->hw_lane_settings, > > lt_settings->dpcd_lane_settings); > > ... > > } > > > > Everything looks to be the correct size? > > Yep; this fix is similar to the one for intel_pm.c in this > > commit e7c6e405e171fb33990a12ecfd14e6500d9e5cf2 > > where the array size of 8 seems to be fine for all the > struct members related (pri_latency, spr_latency, cur_latency > and skl_latency): > > drivers/gpu/drm/i915/i915_drv.h:465:struct drm_i915_private { > ... > > drivers/gpu/drm/i915/i915_drv.h-739-struct { > > ... > drivers/gpu/drm/i915/i915_drv.h-745-/* primary */ > drivers/gpu/drm/i915/i915_drv.h-746-u16 pri_latency[5]; > drivers/gpu/drm/i915/i915_drv.h-747-/* sprite */ > drivers/gpu/drm/i915/i915_drv.h-748-u16 spr_latency[5]; > drivers/gpu/drm/i915/i915_drv.h-749-/* cursor */ > drivers/gpu/drm/i915/i915_drv.h-750-u16 cur_latency[5]; > drivers/gpu/drm/i915/i915_drv.h-751-/* > drivers/gpu/drm/i915/i915_drv.h-752- * Raw watermark memory > latency values > drivers/gpu/drm/i915/i915_drv.h-753- * for SKL for all 8 levels > drivers/gpu/drm/i915/i915_drv.h-754- * in 1us units. > drivers/gpu/drm/i915/i915_drv.h-755- */ > drivers/gpu/drm/i915/i915_drv.h-756-u16 skl_latency[8]; > > ... > drivers/gpu/drm/i915/i915_drv.h-773-} wm; > ... > } and in this case the ilk_wm_max_level() returns the right maximum size for the corresponding 'struct wm' member: drivers/gpu/drm/i915/intel_pm.c:2993:int ilk_wm_max_level(const struct drm_i915_private *dev_priv) drivers/gpu/drm/i915/intel_pm.c-2994-{ drivers/gpu/drm/i915/intel_pm.c-2995- /* how many WM levels are we expecting */ drivers/gpu/drm/i915/intel_pm.c-2996- if (HAS_HW_SAGV_WM(dev_priv)) drivers/gpu/drm/i915/intel_pm.c-2997- return 5; drivers/gpu/drm/i915/intel_pm.c-2998- else if (DISPLAY_VER(dev_priv) >= 9) drivers/gpu/drm/i915/intel_pm.c-2999- return 7; drivers/gpu/drm/i915/intel_pm.c-3000- else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) drivers/gpu/drm/i915/intel_pm.c-3001- return 4; drivers/gpu/drm/i915/intel_pm.c-3002- else if (DISPLAY_VER(dev_priv) >= 6) drivers/gpu/drm/i915/intel_pm.c-3003- return 3; drivers/gpu/drm/i915/intel_pm.c-3004- else drivers/gpu/drm/i915/intel_pm.c-3005- return 2; drivers/gpu/drm/i915/intel_pm.c-3006-} drivers/gpu/drm/i915/intel_pm.c:3009:static void intel_print_wm_latency(struct drm_i915_private *dev_priv, drivers/gpu/drm/i915/intel_pm.c-3010- const char *name, drivers/gpu/drm/i915/intel_pm.c-3011- const u16 wm[]) drivers/gpu/drm/i915/intel_pm.c-3012-{ drivers/gpu/drm/i915/intel_pm.c-3013- int level, max_level = ilk_wm_max_level(dev_priv); drivers/gpu/drm/i915/intel_pm.c-3014- drivers/gpu/drm/i915/intel_pm.c-3015- for (level = 0; level <= max_level; level++) { drivers/gpu/drm/i915/intel_pm.c-3016- unsigned int latency = wm[level]; drivers/gpu/drm/i915/intel_pm.c-3017- ... } still GCC warns about this with Wstringop-overread, as it is explained in commit e7c6e405e171. -- Gustavo > > however GCC warns about accessing bytes beyond the limits, and turning the > argument declarations into pointers (removing the over-specified array > size from the argument declaration) silence the warnings. > > -- > Gustavo
Re: [PATCH 08/10] drm/amdgpu: initialize the vmid_wait with the stub fence
Reviewed-by: Andrey Grodzovsky Andrey On 2022-03-03 03:23, Christian König wrote: This way we don't need to check for NULL any more. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ddf46802b1ff..4ba4b54092f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -188,7 +188,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, unsigned i; int r; - if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) + if (!dma_fence_is_signaled(ring->vmid_wait)) return amdgpu_sync_fence(sync, ring->vmid_wait); fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 35bcb6dc1816..7f33ae87cb41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -193,6 +193,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, adev->rings[ring->idx] = ring; ring->num_hw_submission = sched_hw_submission; ring->sched_score = sched_score; + ring->vmid_wait = dma_fence_get_stub(); r = amdgpu_fence_driver_init_ring(ring); if (r) return r;
Re: [PATCH 06/10] drm/amdgpu: properly imbed the IBs into the job
Reviewed-by: Andrey Grodzovsky Andrey On 2022-03-03 03:23, Christian König wrote: We now have standard macros for that. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 6 -- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 38c9fd7b7ad4..e4ca62225996 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -78,14 +78,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm) { - size_t size = sizeof(struct amdgpu_job); - if (num_ibs == 0) return -EINVAL; - size += sizeof(struct amdgpu_ib) * num_ibs; - - *job = kzalloc(size, GFP_KERNEL); + *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); if (!*job) return -ENOMEM; @@ -95,7 +91,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, */ (*job)->base.sched = >rings[0]->sched; (*job)->vm = vm; - (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; amdgpu_sync_create(&(*job)->sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 6d704772ff42..d599c0540b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -25,6 +25,7 @@ #include #include "amdgpu_sync.h" +#include "amdgpu_ring.h" /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) @@ -48,12 +49,10 @@ struct amdgpu_job { struct amdgpu_vm*vm; struct amdgpu_sync sync; struct amdgpu_sync sched_sync; - struct amdgpu_ib*ibs; struct dma_fencehw_fence; struct dma_fence*external_hw_fence; uint32_tpreamble_status; uint32_tpreemption_status; - uint32_tnum_ibs; boolvm_needs_flush; uint64_tvm_pd_addr; unsignedvmid; @@ -69,6 +68,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_tjob_run_counter; + + uint32_tnum_ibs; + struct amdgpu_ibibs[]; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
[PATCH] drm/selftests: fix a shift-out-of-bounds bug
pass the correct size value computed using the max_order. [ 68.124177][ T1] UBSAN: shift-out-of-bounds in include/linux/log2.h:67:13 [ 68.125333][ T1] shift exponent 4294967295 is too large for 32-bit type 'long unsigned int' [ 68.126563][ T1] CPU: 0 PID: 1 Comm: swapper Not tainted 5.17.0-rc2-00311-g39ec47bbfd5d #2 [ 68.127758][ T1] Call Trace: [ 68.128187][ T1] dump_stack_lvl (lib/dump_stack.c:108) [ 68.128793][ T1] dump_stack (lib/dump_stack.c:114) [ 68.129331][ T1] ubsan_epilogue (lib/ubsan.c:152) [ 68.129958][ T1] __ubsan_handle_shift_out_of_bounds.cold (arch/x86/include/asm/smap.h:85) [ 68.130791][ T1] ? drm_block_alloc+0x28/0x80 [ 68.131582][ T1] ? rcu_read_lock_sched_held (kernel/rcu/update.c:125) [ 68.132215][ T1] ? kmem_cache_alloc (include/trace/events/kmem.h:54 mm/slab.c:3501) [ 68.132878][ T1] ? mark_free+0x2e/0x80 [ 68.133524][ T1] drm_buddy_init.cold (include/linux/log2.h:67 drivers/gpu/drm/drm_buddy.c:131) [ 68.134145][ T1] ? test_drm_cmdline_init (drivers/gpu/drm/selftests/test-drm_buddy.c:87) [ 68.134770][ T1] igt_buddy_alloc_limit (drivers/gpu/drm/selftests/test-drm_buddy.c:30) [ 68.135472][ T1] ? vprintk_default (kernel/printk/printk.c:2257) [ 68.136057][ T1] ? test_drm_cmdline_init (drivers/gpu/drm/selftests/test-drm_buddy.c:87) [ 68.136812][ T1] test_drm_buddy_init (drivers/gpu/drm/selftests/drm_selftest.c:77 drivers/gpu/drm/selftests/test-drm_buddy.c:95) [ 68.137475][ T1] do_one_initcall (init/main.c:1300) [ 68.138111][ T1] ? parse_args (kernel/params.c:609 kernel/params.c:146 kernel/params.c:188) [ 68.138717][ T1] do_basic_setup (init/main.c:1372 init/main.c:1389 init/main.c:1408) [ 68.139366][ T1] kernel_init_freeable (init/main.c:1617) [ 68.140040][ T1] ? rest_init (init/main.c:1494) [ 68.140634][ T1] kernel_init (init/main.c:1504) [ 68.141155][ T1] ret_from_fork (arch/x86/entry/entry_32.S:772) [ 68.141607][ T1] [ 68.146730][ T1] [ cut here ] [ 68.147460][ T1] kernel BUG at drivers/gpu/drm/drm_buddy.c:140! [ 68.148280][ T1] invalid opcode: [#1] [ 68.148895][ T1] CPU: 0 PID: 1 Comm: swapper Not tainted 5.17.0-rc2-00311-g39ec47bbfd5d #2 [ 68.149896][ T1] EIP: drm_buddy_init (drivers/gpu/drm/drm_buddy.c:140 (discriminator 1)) For more details: https://lists.01.org/hyperkitty/list/l...@lists.01.org/thread/FDIF3HCILZNN5UQAZMOR7E3MQSMHHKWU/ Signed-off-by: Arunpravin Reported-by: kernel test robot --- drivers/gpu/drm/selftests/test-drm_buddy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/selftests/test-drm_buddy.c b/drivers/gpu/drm/selftests/test-drm_buddy.c index fa997f89522b..913cbd7eae04 100644 --- a/drivers/gpu/drm/selftests/test-drm_buddy.c +++ b/drivers/gpu/drm/selftests/test-drm_buddy.c @@ -902,14 +902,13 @@ static int igt_buddy_alloc_range(void *arg) static int igt_buddy_alloc_limit(void *arg) { - u64 end, size = U64_MAX, start = 0; + u64 size = U64_MAX, start = 0; struct drm_buddy_block *block; unsigned long flags = 0; LIST_HEAD(allocated); struct drm_buddy mm; int err; - size = end = round_down(size, 4096); err = drm_buddy_init(, size, PAGE_SIZE); if (err) return err; @@ -921,7 +920,8 @@ static int igt_buddy_alloc_limit(void *arg) goto out_fini; } - err = drm_buddy_alloc_blocks(, start, end, size, + size = mm.chunk_size << mm.max_order; + err = drm_buddy_alloc_blocks(, start, size, size, PAGE_SIZE, , flags); if (unlikely(err)) base-commit: 6be340ee8f5beae574dae6f5e17a22e67beeff3e -- 2.25.1
Re: [PATCH 05/10] drm/amdgpu: use job and ib structures directly in CS parsers
Acked-by: Andrey Grodzovsky Andrey On 2022-03-03 03:23, Christian König wrote: Instead of providing the ib index provide the job and ib pointers directly to the patch and parse functions for UVD and VCE. Also move the set/get functions for IB values to the IB declerations. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 13 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 23 - drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 36 --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 116 --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 7 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c| 13 +-- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c| 25 ++--- 9 files changed, 129 insertions(+), 114 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 20bf6134baca..dd9e708fe97f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1024,12 +1024,14 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p) memcpy(ib->ptr, kptr, job->ibs[i].length_dw * 4); amdgpu_bo_kunmap(aobj); - r = amdgpu_ring_parse_cs(ring, p, i); + r = amdgpu_ring_parse_cs(ring, p, p->job, +>job->ibs[i]); if (r) return r; } else { ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, i); + r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, + >job->ibs[i]); amdgpu_bo_kunmap(aobj); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index 30136eb50d2a..652b5593499f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -73,19 +73,6 @@ struct amdgpu_cs_parser { struct amdgpu_cs_post_dep *post_deps; }; -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo, struct amdgpu_bo_va_mapping **mapping); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 05e789fc7a9e..a8bed1b47899 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -163,8 +163,12 @@ struct amdgpu_ring_funcs { u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ - int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); - int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx); + int (*parse_cs)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); + int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, +struct amdgpu_job *job, +struct amdgpu_ib *ib); /* constants to calculate how many DW are needed for an emit */ unsigned emit_frame_size; unsigned emit_ib_size; @@ -264,8 +268,8 @@ struct amdgpu_ring { atomic_t*sched_score; }; -#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) -#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) +#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) +#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) @@ -364,6 +368,17 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); +static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx) +{ + return ib->ptr[idx]; +} + +static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx, +
Re: [PATCH 02/10] drm/amdgpu: header cleanup
Acked-by: Andrey Grodzovsky Andrey On 2022-03-03 03:23, Christian König wrote: No function change, just move a bunch of definitions from amdgpu.h into separate header files. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 95 --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h| 93 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 35 ++- .../gpu/drm/amd/amdgpu/amdgpu_trace_points.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + 10 files changed, 132 insertions(+), 100 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b89406b01694..7f447ed7a67f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -60,7 +60,6 @@ #include #include #include -#include #include #include "dm_pp_interface.h" @@ -276,9 +275,6 @@ extern int amdgpu_num_kcq; #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) struct amdgpu_device; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; @@ -465,20 +461,6 @@ struct amdgpu_flip_work { }; -/* - * CP & rings. - */ - -struct amdgpu_ib { - struct amdgpu_sa_bo *sa_bo; - uint32_tlength_dw; - uint64_tgpu_addr; - uint32_t*ptr; - uint32_tflags; -}; - -extern const struct drm_sched_backend_ops amdgpu_sched_ops; - /* * file private structure */ @@ -494,79 +476,6 @@ struct amdgpu_fpriv { int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); -int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool, - struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ibs, struct amdgpu_job *job, - struct dma_fence **f); -int amdgpu_ib_pool_init(struct amdgpu_device *adev); -void amdgpu_ib_pool_fini(struct amdgpu_device *adev); -int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - -/* - * CS. - */ -struct amdgpu_cs_chunk { - uint32_tchunk_id; - uint32_tlength_dw; - void*kdata; -}; - -struct amdgpu_cs_post_dep { - struct drm_syncobj *syncobj; - struct dma_fence_chain *chain; - u64 point; -}; - -struct amdgpu_cs_parser { - struct amdgpu_device*adev; - struct drm_file *filp; - struct amdgpu_ctx *ctx; - - /* chunks */ - unsignednchunks; - struct amdgpu_cs_chunk *chunks; - - /* scheduler job object */ - struct amdgpu_job *job; - struct drm_sched_entity *entity; - - /* buffer objects */ - struct ww_acquire_ctx ticket; - struct amdgpu_bo_list *bo_list; - struct amdgpu_mn*mn; - struct amdgpu_bo_list_entry vm_pd; - struct list_headvalidated; - struct dma_fence*fence; - uint64_tbytes_moved_threshold; - uint64_tbytes_moved_vis_threshold; - uint64_tbytes_moved; - uint64_tbytes_moved_vis; - - /* user fence */ - struct amdgpu_bo_list_entry uf_entry; - - unsignednum_post_deps; - struct amdgpu_cs_post_dep *post_deps; -}; - -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - /* * Writeback */ @@ -1425,10 +1334,6 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) { return 0; } #endif -int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, - uint64_t addr, struct amdgpu_bo **bo, - struct
Re: [PATCH 01/10] drm/amdgpu: install ctx entities with cmpxchg
Reviewed-by: Andrey Grodzovsky Andrey On 2022-03-03 03:22, Christian König wrote: Since we removed the context lock we need to make sure that not two threads are trying to install an entity at the same time. Signed-off-by: Christian König Fixes: e68efb27647f ("drm/amdgpu: remove ctx->lock") --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c1f8b0e37b93..72c5f1c53d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -204,9 +204,15 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, if (r) goto error_free_entity; - ctx->entities[hw_ip][ring] = entity; + /* It's not an error if we fail to install the new entity */ + if (cmpxchg(>entities[hw_ip][ring], NULL, entity)) + goto cleanup_entity; + return 0; +cleanup_entity: + drm_sched_entity_fini(>entity); + error_free_entity: kfree(entity);
Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c
On 2022-03-03 12:25, Gustavo A. R. Silva wrote: > Fix the following Wstringop-overflow warnings when building with GCC-11: > > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:1491:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] > > by removing the over-specified array size from the argument declarations. > > This helps with the ongoing efforts to globally enable > -Wstringop-overflow. > > Link: https://github.com/KSPP/linux/issues/181>> Signed-off-by: Gustavo A. R. > Silva Acked-by: Harry Wentland Harry > --- > drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 ++-- > drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h | 4 ++-- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c > b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c > index f11a8d97fb60..81952e07acf6 100644 > --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c > +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c > @@ -823,7 +823,7 @@ bool dp_is_interlane_aligned(union > lane_align_status_updated align_status) > void dp_hw_to_dpcd_lane_settings( > const struct link_training_settings *lt_settings, > const struct dc_lane_settings > hw_lane_settings[LANE_COUNT_DP_MAX], > - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) > + union dpcd_training_lane dpcd_lane_settings[]) > { > uint8_t lane = 0; > > @@ -853,7 +853,7 @@ void dp_decide_lane_settings( > const struct link_training_settings *lt_settings, > const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], > struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], > - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) > + union dpcd_training_lane dpcd_lane_settings[]) > { > uint32_t lane; > > diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h > b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h > index ab9939db8cea..0d00da1640a7 100644 > --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h > +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h > @@ -147,12 +147,12 @@ bool dp_is_max_vs_reached( > void dp_hw_to_dpcd_lane_settings( > const struct link_training_settings *lt_settings, > const struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], > - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]); > + union dpcd_training_lane dpcd_lane_settings[]); > void dp_decide_lane_settings( > const struct link_training_settings *lt_settings, > const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], > struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], > - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]); > + union dpcd_training_lane dpcd_lane_settings[]); > > uint32_t dp_translate_training_aux_read_interval(uint32_t > dpcd_aux_read_interval); >
Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c
On Thu, Mar 03, 2022 at 09:43:28AM -0800, Kees Cook wrote: > On Thu, Mar 03, 2022 at 11:25:03AM -0600, Gustavo A. R. Silva wrote: > > Fix the following Wstringop-overflow warnings when building with GCC-11: > > > > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: > > warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > > [-Wstringop-overflow=] > > Can you "show your work" a little more here? I don't actually see the > what is getting fixed: > > enum dc_lane_count { > ... > LANE_COUNT_FOUR = 4, > ... > LANE_COUNT_DP_MAX = LANE_COUNT_FOUR > }; > > struct link_training_settings { > ... > union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]; > }; > > void dp_hw_to_dpcd_lane_settings( > ... > union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) > { > ... > } > > static enum link_training_result dpia_training_cr_transparent( > ... > struct link_training_settings *lt_settings) > { > ... > dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, > lt_settings->hw_lane_settings, > lt_settings->dpcd_lane_settings); > ... > } > > Everything looks to be the correct size? Yep; this fix is similar to the one for intel_pm.c in this commit e7c6e405e171fb33990a12ecfd14e6500d9e5cf2 where the array size of 8 seems to be fine for all the struct members related (pri_latency, spr_latency, cur_latency and skl_latency): drivers/gpu/drm/i915/i915_drv.h:465:struct drm_i915_private { ... drivers/gpu/drm/i915/i915_drv.h-739-struct { ... drivers/gpu/drm/i915/i915_drv.h-745-/* primary */ drivers/gpu/drm/i915/i915_drv.h-746-u16 pri_latency[5]; drivers/gpu/drm/i915/i915_drv.h-747-/* sprite */ drivers/gpu/drm/i915/i915_drv.h-748-u16 spr_latency[5]; drivers/gpu/drm/i915/i915_drv.h-749-/* cursor */ drivers/gpu/drm/i915/i915_drv.h-750-u16 cur_latency[5]; drivers/gpu/drm/i915/i915_drv.h-751-/* drivers/gpu/drm/i915/i915_drv.h-752- * Raw watermark memory latency values drivers/gpu/drm/i915/i915_drv.h-753- * for SKL for all 8 levels drivers/gpu/drm/i915/i915_drv.h-754- * in 1us units. drivers/gpu/drm/i915/i915_drv.h-755- */ drivers/gpu/drm/i915/i915_drv.h-756-u16 skl_latency[8]; ... drivers/gpu/drm/i915/i915_drv.h-773-} wm; ... } however GCC warns about accessing bytes beyond the limits, and turning the argument declarations into pointers (removing the over-specified array size from the argument declaration) silence the warnings. -- Gustavo
Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c
On Thu, Mar 03, 2022 at 11:25:03AM -0600, Gustavo A. R. Silva wrote: > Fix the following Wstringop-overflow warnings when building with GCC-11: > > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: > ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 > [-Wstringop-overflow=] Can you "show your work" a little more here? I don't actually see the what is getting fixed: enum dc_lane_count { ... LANE_COUNT_FOUR = 4, ... LANE_COUNT_DP_MAX = LANE_COUNT_FOUR }; struct link_training_settings { ... union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]; }; void dp_hw_to_dpcd_lane_settings( ... union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) { ... } static enum link_training_result dpia_training_cr_transparent( ... struct link_training_settings *lt_settings) { ... dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); ... } Everything looks to be the correct size? -- Kees Cook
[PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c
Fix the following Wstringop-overflow warnings when building with GCC-11: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:1491:17: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 [-Wstringop-overflow=] by removing the over-specified array size from the argument declarations. This helps with the ongoing efforts to globally enable -Wstringop-overflow. Link: https://github.com/KSPP/linux/issues/181 Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 ++-- drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index f11a8d97fb60..81952e07acf6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -823,7 +823,7 @@ bool dp_is_interlane_aligned(union lane_align_status_updated align_status) void dp_hw_to_dpcd_lane_settings( const struct link_training_settings *lt_settings, const struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) + union dpcd_training_lane dpcd_lane_settings[]) { uint8_t lane = 0; @@ -853,7 +853,7 @@ void dp_decide_lane_settings( const struct link_training_settings *lt_settings, const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) + union dpcd_training_lane dpcd_lane_settings[]) { uint32_t lane; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index ab9939db8cea..0d00da1640a7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -147,12 +147,12 @@ bool dp_is_max_vs_reached( void dp_hw_to_dpcd_lane_settings( const struct link_training_settings *lt_settings, const struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]); + union dpcd_training_lane dpcd_lane_settings[]); void dp_decide_lane_settings( const struct link_training_settings *lt_settings, const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]); + union dpcd_training_lane dpcd_lane_settings[]); uint32_t dp_translate_training_aux_read_interval(uint32_t dpcd_aux_read_interval); -- 2.27.0
Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready
I pushed all the changes including your patch. Andrey On 2022-03-02 22:16, Andrey Grodzovsky wrote: OK, i will do quick smoke test tomorrow and push all of it it then. Andrey On 2022-03-02 21:59, Chen, JingWen wrote: Hi Andrey, I don't have the bare mental environment, I can only test the SRIOV cases. Best Regards, JingWen Chen On Mar 3, 2022, at 01:55, Grodzovsky, Andrey wrote: The patch is acked-by: Andrey Grodzovsky If you also smoked tested bare metal feel free to apply all the patches, if no let me know. Andrey On 2022-03-02 04:51, JingWen Chen wrote: Hi Andrey, Most part of the patches are OK, but the code will introduce a ib test fail on the disabled vcn of sienna_cichlid. In SRIOV use case we will disable one vcn on sienna_cichlid, I have attached a patch to fix this issue, please check the attachment. Best Regards, Jingwen Chen On 2/26/22 5:22 AM, Andrey Grodzovsky wrote: Hey, patches attached - i applied the patches and resolved merge conflicts but weren't able to test as my on board's network card doesn't work with 5.16 kernel (it does with 5.17, maybe it's Kconfig issue and i need to check more). The patches are on top of 'cababde192b2 Yifan Zhang 2 days ago drm/amd/pm: fix mode2 reset fail for smu 13.0.5 ' commit. Please test and let me know. Maybe by Monday I will be able to resolve the connectivity issue on 5.16. Andrey On 2022-02-24 22:13, JingWen Chen wrote: Hi Andrey, Sorry for the misleading, I mean the whole patch series. We are depending on this patch series to fix the concurrency issue within SRIOV TDR sequence. On 2/25/22 1:26 AM, Andrey Grodzovsky wrote: No problem if so but before I do, JingWen - why you think this patch is needed as a standalone now ? It has no use without the entire feature together with it. Is it some changes you want to do on top of that code ? Andrey On 2022-02-24 12:12, Deucher, Alexander wrote: [Public] If it applies cleanly, feel free to drop it in. I'll drop those patches for drm-next since they are already in drm-misc. Alex *From:* amd-gfx on behalf of Andrey Grodzovsky *Sent:* Thursday, February 24, 2022 11:24 AM *To:* Chen, JingWen ; Christian König ; dri-de...@lists.freedesktop.org ; amd-gfx@lists.freedesktop.org *Cc:* Liu, Monk ; Chen, Horace ; Lazar, Lijo ; Koenig, Christian ; dan...@ffwll.ch *Subject:* Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready No because all the patch-set including this patch was landed into drm-misc-next and will reach amd-staging-drm-next on the next upstream rebase i guess. Andrey On 2022-02-24 01:47, JingWen Chen wrote: Hi Andrey, Will you port this patch into amd-staging-drm-next? on 2/10/22 2:06 AM, Andrey Grodzovsky wrote: All comments are fixed and code pushed. Thanks for everyone who helped reviewing. Andrey On 2022-02-09 02:53, Christian König wrote: Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky: Before we initialize schedulers we must know which reset domain are we in - for single device there iis a single domain per device and so single wq per device. For XGMI the reset domain spans the entire XGMI hive and so the reset wq is per hive. Signed-off-by: Andrey Grodzovsky One more comment below, with that fixed Reviewed-by: Christian König . --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 34 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 + 3 files changed, 51 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9704b0e1fd82..00123b0013d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) return r; } +static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) +{ + long timeout; + int r, i; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + + /* No need to setup the GPU scheduler for rings that don't need it */ + if (!ring || ring->no_scheduler) + continue; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: + timeout = adev->compute_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; + break; + default: + timeout = adev->video_timeout; + break; + } + + r = drm_sched_init(>sched, _sched_ops, + ring->num_hw_submission, amdgpu_job_hang_limit, + timeout, adev->reset_domain.wq, ring->sched_score, ring->name); +
RE: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran
[AMD Official Use Only] Reviewed by : Shaoyun.liu -Original Message- From: amd-gfx On Behalf Of David Yu Sent: Thursday, March 3, 2022 11:25 AM To: amd-gfx@lists.freedesktop.org Cc: Yu, David Subject: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran Add DFC CAP support for aldebaran Initialize cap microcode in psp_init_sriov_microcode, the ta microcode will be initialized in psp_vxx_init_microcode Signed-off-by: David Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 94bfe502b55e..3ce1d38a7822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) ret = psp_init_cap_microcode(psp, "sienna_cichlid"); break; case IP_VERSION(13, 0, 2): - ret = psp_init_ta_microcode(psp, "aldebaran"); + ret = psp_init_cap_microcode(psp, "aldebaran"); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 2c6070b90dcf..024f60631faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -31,6 +31,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); -- 2.25.1
[PATCH] drm/amdgpu: Add DFC CAP support for aldebaran
Add DFC CAP support for aldebaran Initialize cap microcode in psp_init_sriov_microcode, the ta microcode will be initialized in psp_vxx_init_microcode Signed-off-by: David Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 94bfe502b55e..3ce1d38a7822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) ret = psp_init_cap_microcode(psp, "sienna_cichlid"); break; case IP_VERSION(13, 0, 2): - ret = psp_init_ta_microcode(psp, "aldebaran"); + ret = psp_init_cap_microcode(psp, "aldebaran"); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 2c6070b90dcf..024f60631faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -31,6 +31,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); -- 2.25.1
Re: [PATCH 2/2] drm/amdkfd: implement get_atc_vmid_pasid_mapping_info for gfx10.3
[Public] Series is: Reviewed-by: Alex Deucher From: amd-gfx on behalf of Yifan Zhang Sent: Thursday, March 3, 2022 3:05 AM To: amd-gfx@lists.freedesktop.org Cc: Zhang, Yifan ; Kuehling, Felix Subject: [PATCH 2/2] drm/amdkfd: implement get_atc_vmid_pasid_mapping_info for gfx10.3 This patch implements get_atc_vmid_pasid_mapping_info for gfx10.3 Signed-off-by: Yifan Zhang --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index e9c80ce13f3e..ba21ec6b35e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -26,6 +26,8 @@ #include "gc/gc_10_3_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" #include "oss/osssys_5_0_0_sh_mask.h" +#include "athub/athub_2_1_0_offset.h" +#include "athub/athub_2_1_0_sh_mask.h" #include "soc15_common.h" #include "v10_structs.h" #include "nv.h" @@ -606,6 +608,18 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev, return 0; } +static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) ++ vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -788,7 +802,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .hqd_destroy = hqd_destroy_v10_3, .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, .wave_control_execute = wave_control_execute_v10_3, - .get_atc_vmid_pasid_mapping_info = NULL, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, #if 0 -- 2.25.1
Re: [PATCH 1/1] drm/amdkfd: Improve concurrency of event handling
Am 2022-03-03 um 02:25 schrieb Christian König: Am 02.03.22 um 21:06 schrieb Felix Kuehling: Use rcu_read_lock to read p->event_idr concurrently with other readers and writers. Use p->event_mutex only for creating and destroying events and in kfd_wait_on_events. That might not necessary work as you expected. rcu_read_lock() does not provide barriers for the CPU cache. Protect the contents of the kfd_event structure with a per-event spinlock that can be taken inside the rcu_read_lock critical section. That helps to prevent problems for the ev structure, but it doesn't protect the idr itself. idr_find can be used safely under rcu_read_lock according to this comment in idr.h: * idr_find() is able to be called locklessly, using RCU. The caller must * ensure calls to this function are made within rcu_read_lock() regions. * Other readers (lock-free or otherwise) and modifications may be running * concurrently. * * It is still required that the caller manage the synchronization and * lifetimes of the items. So if RCU lock-free lookups are used, typically * this would mean that the items have their own locks, or are amenable to * lock-free access; and that the items are freed by RCU (or only freed after * having been deleted from the idr tree *and* a synchronize_rcu() grace * period). It was introduced by f9c46d6ea5ce ("idr: make idr_find rcu-safe") in 2008. BTW: Why are you using an idr in the first place? I don't see the lookup used anywhere. lookup_event_by_id is just a wrapper around idr_find. It's used in kfd_event_destroy, kfd_set_event, kfd_reset_event and kfd_wait_on_events. lookup_signaled_event_by_partial_id also uses idr_find. It's used in kfd_signal_event_interrupt. Regards, Felix Regards, Christian. This eliminates contention of p->event_mutex in set_event, which tends to be on the critical path for dispatch latency even when busy waiting is used. It also eliminates lock contention in event interrupt handlers. Since the p->event_mutex is now used much less, the impact of requiring it in kfd_wait_on_events should also be much smaller. This should improve event handling latency for processes using multiple GPUs concurrently. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 119 +++- drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 + 2 files changed, 78 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index deecccebe5b6..1726306715b2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -128,8 +128,8 @@ static int allocate_event_notification_slot(struct kfd_process *p, } /* - * Assumes that p->event_mutex is held and of course that p is not going - * away (current or locked). + * Assumes that p->event_mutex or rcu_readlock is held and of course that p is + * not going away. */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { @@ -251,15 +251,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) struct kfd_event_waiter *waiter; /* Wake up pending waiters. They will return failure */ + spin_lock(>lock); list_for_each_entry(waiter, >wq.head, wait.entry) - waiter->event = NULL; + WRITE_ONCE(waiter->event, NULL); wake_up_all(>wq); + spin_unlock(>lock); if (ev->type == KFD_EVENT_TYPE_SIGNAL || ev->type == KFD_EVENT_TYPE_DEBUG) p->signal_event_count--; idr_remove(>event_idr, ev->event_id); + synchronize_rcu(); kfree(ev); } @@ -392,6 +395,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ev->auto_reset = auto_reset; ev->signaled = false; + spin_lock_init(>lock); init_waitqueue_head(>wq); *event_page_offset = 0; @@ -466,6 +470,7 @@ int kfd_criu_restore_event(struct file *devkfd, ev->auto_reset = ev_priv->auto_reset; ev->signaled = ev_priv->signaled; + spin_lock_init(>lock); init_waitqueue_head(>wq); mutex_lock(>event_mutex); @@ -609,13 +614,13 @@ static void set_event(struct kfd_event *ev) /* Auto reset if the list is non-empty and we're waking * someone. waitqueue_active is safe here because we're - * protected by the p->event_mutex, which is also held when + * protected by the ev->lock, which is also held when * updating the wait queues in kfd_wait_on_events. */ ev->signaled = !ev->auto_reset || !waitqueue_active(>wq); list_for_each_entry(waiter, >wq.head, wait.entry) - waiter->activated = true; + WRITE_ONCE(waiter->activated, true); wake_up_all(>wq); } @@ -626,16 +631,18 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id) int ret = 0; struct kfd_event *ev; - mutex_lock(>event_mutex); + rcu_read_lock(); ev =
RE: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran
[AMD Official Use Only] Probably just described as follows : Initialize cap microcode in psp_init_sriov_microcode, the ta microcode will be initialized in psp_vxx_init_microcode -Original Message- From: amd-gfx On Behalf Of David Yu Sent: Thursday, March 3, 2022 9:10 AM To: amd-gfx@lists.freedesktop.org Cc: Yu, David Subject: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran Add DFC CAP support for aldebaran Changed incorrect call to psp_init_ta_microcode in psp_init_sriov_microcode to psp_init_cap_microcode which caused it to fail even with correct CAP firmware. Signed-off-by: David Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 94bfe502b55e..3ce1d38a7822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) ret = psp_init_cap_microcode(psp, "sienna_cichlid"); break; case IP_VERSION(13, 0, 2): - ret = psp_init_ta_microcode(psp, "aldebaran"); + ret = psp_init_cap_microcode(psp, "aldebaran"); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 2c6070b90dcf..024f60631faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -31,6 +31,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); -- 2.25.1
RE: [PATCH 1/2] drm/amdgpu/vcn: Update fw shared data structure
[AMD Official Use Only] The series are: Reviewed-by: Leo Liu -Original Message- From: Dong, Ruijing Sent: March 2, 2022 4:25 PM To: amd-gfx@lists.freedesktop.org Cc: Liu, Leo ; Deucher, Alexander Subject: [PATCH 1/2] drm/amdgpu/vcn: Update fw shared data structure Add fw log in fw shared data structure. Signed-off-by: Ruijing Dong --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 - drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 26 +++-- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 18 - drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 18 - drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 20 +-- 5 files changed, 61 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 5e0dbf54d561..6f3f55e39ab1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -79,6 +79,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; + unsigned int fw_shared_size; int i, r; INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -226,7 +227,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + bo_size += fw_shared_size; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) @@ -240,10 +242,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } - adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); - adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + + bo_size - fw_shared_size; + adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + + bo_size - fw_shared_size; + + adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; if (adev->vcn.indirect_sram) { r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 5d3728b027d3..f6569a7d6fdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -205,6 +205,13 @@ struct amdgpu_vcn_reg{ unsignedscratch9; }; +struct amdgpu_vcn_fw_shared { + void*cpu_addr; + uint64_tgpu_addr; + uint32_tmem_size; + uint32_tlog_offset; +}; + struct amdgpu_vcn_inst { struct amdgpu_bo*vcpu_bo; void*cpu_addr; @@ -221,8 +228,7 @@ struct amdgpu_vcn_inst { uint64_tdpg_sram_gpu_addr; uint32_t*dpg_sram_curr_addr; atomic_tdpg_enc_submission_cnt; - void*fw_shared_cpu_addr; - uint64_tfw_shared_gpu_addr; + struct amdgpu_vcn_fw_shared fw_shared; }; struct amdgpu_vcn { @@ -265,6 +271,13 @@ struct amdgpu_fw_shared_sw_ring { uint8_t padding[3]; }; +struct amdgpu_fw_shared_fw_logging { + uint8_t is_enabled; + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t size; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[44]; @@ -272,6 +285,15 @@ struct amdgpu_fw_shared { uint8_t pad1[1]; struct amdgpu_fw_shared_multi_queue multi_queue; struct amdgpu_fw_shared_sw_ring sw_ring; + struct amdgpu_fw_shared_fw_logging fw_log; }; + +struct amdgpu_vcn_fwlog { + uint32_t rptr; + uint32_t wptr; + uint32_t buffer_size; + uint32_t header_size; + uint8_t wrapped; }; struct amdgpu_vcn_decode_buffer { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 313fc1b53999..36ec877a2a55 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -172,7 +172,7 @@ static int vcn_v2_0_sw_init(void *handle) if (r) return r; - fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + fw_shared =
[PATCH] drm/amdgpu: Add DFC CAP support for aldebaran
Add DFC CAP support for aldebaran Changed incorrect call to psp_init_ta_microcode in psp_init_sriov_microcode to psp_init_cap_microcode which caused it to fail even with correct CAP firmware. Signed-off-by: David Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 94bfe502b55e..3ce1d38a7822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) ret = psp_init_cap_microcode(psp, "sienna_cichlid"); break; case IP_VERSION(13, 0, 2): - ret = psp_init_ta_microcode(psp, "aldebaran"); + ret = psp_init_cap_microcode(psp, "aldebaran"); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 2c6070b90dcf..024f60631faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -31,6 +31,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); -- 2.25.1
RE: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
From: Xiaomeng Tong > Sent: 03 March 2022 07:27 > > On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote: > > on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote: > > > The problem is the mis-use of iterator outside the loop on exit, and > > > the iterator will be the HEAD's container_of pointer which pointers > > > to a type-confused struct. Sidenote: The *mis-use* here refers to > > > mistakely access to other members of the struct, instead of the > > > list_head member which acutally is the valid HEAD. > > > > The problem is that the HEAD's container_of pointer should never > > be calculated at all. > > This is what is fundamentally broken about the current definition. > > Yes, the rule is "the HEAD's container_of pointer should never be > calculated at all outside the loop", but how do you make sure everyone > follows this rule? > Everyone makes mistakes, but we can eliminate them all from the beginning > with the help of compiler which can catch such use-after-loop things. > > > > IOW, you would dereference a (NULL + offset_of_member) address here. > > > >Where? > > In the case where a developer do not follows the above rule, and mistakely > access a non-list-head member of the HEAD's container_of pointer outside > the loop. For example: > struct req{ > int a; > struct list_head h; > } > struct req *r; > list_for_each_entry(r, HEAD, h) { > if (r->a == 0x10) > break; > } > // the developer made a mistake: he didn't take this situation into > // account where all entries in the list are *r->a != 0x10*, and now > // the r is the HEAD's container_of pointer. > r->a = 0x20; > Thus the "r->a = 0x20" would dereference a (NULL + offset_of_member) > address here. That is just a bug. No different to failing to check anything else might 'return' a NULL pointer. Because it is a NULL dereference you find out pretty quickly. The existing loop leaves you with a valid pointer to something that isn't a list item. > > > Please remind me if i missed something, thanks. > > > > > > Can you share your "alternative definitions" details? thanks! > > > > The loop should probably use as extra variable that points > > to the 'list node' in the next structure. > > Something like: > > for (xxx *iter = head->next; > > iter == ? ((item = NULL),0) : ((item = > > list_item(iter),1)); > > iter = item->member->next) { > >... > > With a bit of casting you can use 'item' to hold 'iter'. > > you still can not make sure everyone follows this rule: > "do not use iterator outside the loop" without the help of compiler, > because item is declared outside the loop. That one has 'iter' defined in the loop. > BTW, to avoid ambiguity,the "alternative definitions" here i asked is > something from you in this context: > "OTOH there may be alternative definitions that can be used to get > the compiler (or other compiler-like tools) to detect broken code. > Even if the definition can't possibly generate a working kerrnel." I was thinking of something like: if ((pos = list_first)), 1) pos = NULL else so that unchecked dereferences after the loop will be detectable as NULL pointer offsets - but that in itself isn't enough to avoid other warnings. > > > The "list_for_each_entry_inside(pos, type, head, member)" way makes > > > the iterator invisiable outside the loop, and would be catched by > > > compiler if use-after-loop things happened. > > > It is also a compete PITA for anything doing a search. > > You mean it would be a burden on search? can you show me some examples? The whole business of having to save the pointer to the located item before breaking the loop, remembering to have set it to NULL earlier etc. It is so much better if you can just do: if (found) break; David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
RE: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
> From: Xiaomeng Tong > > Sent: 03 March 2022 07:27 > > > > On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote: > > > on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote: > > > > The problem is the mis-use of iterator outside the loop on exit, and > > > > the iterator will be the HEAD's container_of pointer which pointers > > > > to a type-confused struct. Sidenote: The *mis-use* here refers to > > > > mistakely access to other members of the struct, instead of the > > > > list_head member which acutally is the valid HEAD. > > > > > > The problem is that the HEAD's container_of pointer should never > > > be calculated at all. > > > This is what is fundamentally broken about the current definition. > > > > Yes, the rule is "the HEAD's container_of pointer should never be > > calculated at all outside the loop", but how do you make sure everyone > > follows this rule? > > Everyone makes mistakes, but we can eliminate them all from the beginning > > with the help of compiler which can catch such use-after-loop things. > > > > > > IOW, you would dereference a (NULL + offset_of_member) address here. > > > > > >Where? > > > > In the case where a developer do not follows the above rule, and mistakely > > access a non-list-head member of the HEAD's container_of pointer outside > > the loop. For example: > > struct req{ > > int a; > > struct list_head h; > > } > > struct req *r; > > list_for_each_entry(r, HEAD, h) { > > if (r->a == 0x10) > > break; > > } > > // the developer made a mistake: he didn't take this situation into > > // account where all entries in the list are *r->a != 0x10*, and now > > // the r is the HEAD's container_of pointer. > > r->a = 0x20; > > Thus the "r->a = 0x20" would dereference a (NULL + offset_of_member) > > address here. > > That is just a bug. > No different to failing to check anything else might 'return' > a NULL pointer. Yes, but it‘s a mistake everyone has made and will make, we should avoid this at the beginning with the help of compiler. > Because it is a NULL dereference you find out pretty quickly. AFAIK,NULL dereference is a undefined bahavior, can compiler quickly catch it? Or it can only be applied to some simple/restricted cases. > The existing loop leaves you with a valid pointer to something > that isn't a list item. > > > > > Please remind me if i missed something, thanks. > > > > > > > > Can you share your "alternative definitions" details? thanks! > > > > > > The loop should probably use as extra variable that points > > > to the 'list node' in the next structure. > > > Something like: > > > for (xxx *iter = head->next; > > > iter == ? ((item = NULL),0) : ((item = > > > list_item(iter),1)); > > > iter = item->member->next) { > > > ... > > > With a bit of casting you can use 'item' to hold 'iter'. > > > > you still can not make sure everyone follows this rule: > > "do not use iterator outside the loop" without the help of compiler, > > because item is declared outside the loop. > > That one has 'iter' defined in the loop. Oh, sorry, I misunderstood. Then this is the same way with my way of list_for_each_entry_inside(pos, type, head, member), which declare the iterator inside the loop. You go further and make things like ">member == (head)" goes away to avoid calculate the HEAD's container_of pointer, although the calculation is harmless. > > > BTW, to avoid ambiguity,the "alternative definitions" here i asked is > > something from you in this context: > > "OTOH there may be alternative definitions that can be used to get > > the compiler (or other compiler-like tools) to detect broken code. > > Even if the definition can't possibly generate a working kerrnel." > > I was thinking of something like: > if ((pos = list_first)), 1) pos = NULL else > so that unchecked dereferences after the loop will be detectable > as NULL pointer offsets - but that in itself isn't enough to avoid > other warnings. > Do you mean put this right after the loop (I changed somthing that i do not understand, please correct me if i am worng, thanks): if (pos == list_first) pos = NULL; else and compiler can detect the following NULL derefernce? But if the NULL derefernce is just right after the loop originally, it will be masked by the *else* keyword。 > > > > The "list_for_each_entry_inside(pos, type, head, member)" way makes > > > > the iterator invisiable outside the loop, and would be catched by > > > > compiler if use-after-loop things happened. > > > > > It is also a compete PITA for anything doing a search. > > > > You mean it would be a burden on search? can you show me some examples? > > The whole business of having to save the pointer to the located item > before breaking the loop, remembering to have set it to NULL earlier etc. Ok, i see. And then you need pass a "item" to the list_for_each_entry macro as a new argument. > > It is so much better if you can
Re: [Kgdb-bugreport] [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
On Thu, Mar 03, 2022 at 03:26:57PM +0800, Xiaomeng Tong wrote: > On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote: > > on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote: > > > The problem is the mis-use of iterator outside the loop on exit, and > > > the iterator will be the HEAD's container_of pointer which pointers > > > to a type-confused struct. Sidenote: The *mis-use* here refers to > > > mistakely access to other members of the struct, instead of the > > > list_head member which acutally is the valid HEAD. > > > > The problem is that the HEAD's container_of pointer should never > > be calculated at all. > > This is what is fundamentally broken about the current definition. > > Yes, the rule is "the HEAD's container_of pointer should never be > calculated at all outside the loop", but how do you make sure everyone > follows this rule? Your formulation of the rule is correct: never run container_of() on HEAD pointer. However the rule that is introduced by list_for_each_entry_inside() is *not* this rule. The rule it introduces is: never access the iterator variable outside the loop. Making the iterator NULL on loop exit does follow the rule you proposed but using a different technique: do not allow HEAD to be stored in the iterator variable after loop exit. This also makes it impossible to run container_of() on the HEAD pointer. > Everyone makes mistakes, but we can eliminate them all from the beginning > with the help of compiler which can catch such use-after-loop things. Indeed but if we introduce new interfaces then we don't have to worry about existing usages and silent regressions. Code will have been written knowing the loop can exit with the iterator set to NULL. Sure it is still possible for programmers to make mistakes and dereference the NULL pointer but C programmers are well training w.r.t. NULL pointer checking so such mistakes are much less likely than with the current list_for_each_entry() macro. This risk must be offset against the way a NULLify approach can lead to more elegant code when we are doing a list search. Daniel.
Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
correct for typo: -for (struct list_head *list = head->next, cond = (struct list_head *)-1; cond == (struct list_head *)-1; cond = NULL) \ +for (struct list_head *list = head->next, *cond = (struct list_head *)-1; cond == (struct list_head *)-1; cond = NULL) \ -- Xiaomeng Tong
Re: [PATCH v9] drm/amdgpu: add drm buddy support to amdgpu
Am 01.03.22 um 21:38 schrieb Arunpravin: - Remove drm_mm references and replace with drm buddy functionalities - Add res cursor support for drm buddy v2(Matthew Auld): - replace spinlock with mutex as we call kmem_cache_zalloc (..., GFP_KERNEL) in drm_buddy_alloc() function - lock drm_buddy_block_trim() function as it calls mark_free/mark_split are all globally visible v3(Matthew Auld): - remove trim method error handling as we address the failure case at drm_buddy_block_trim() function v4: - fix warnings reported by kernel test robot v5: - fix merge conflict issue v6: - fix warnings reported by kernel test robot v7: - remove DRM_BUDDY_RANGE_ALLOCATION flag usage v8: - keep DRM_BUDDY_RANGE_ALLOCATION flag usage - resolve conflicts created by drm/amdgpu: remove VRAM accounting v2 v9(Christian): - rename label name as fallback - move struct amdgpu_vram_mgr to amdgpu_vram_mgr.h - remove unnecessary flags from struct amdgpu_vram_reservation - rewrite block NULL check condition - change else style as per coding standard - rewrite the node max size - add a helper function to fetch the first entry from the list Signed-off-by: Arunpravin --- drivers/gpu/drm/Kconfig | 1 + .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 97 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 249 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 68 + 5 files changed, 289 insertions(+), 136 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f1422bee3dcc..5133c3f028ab 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -280,6 +280,7 @@ config DRM_AMDGPU select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE + select DRM_BUDDY help Choose this option if you have a recent AMD Radeon graphics card. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index acfa207cf970..864c609ba00b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -30,12 +30,15 @@ #include #include +#include "amdgpu_vram_mgr.h" + /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { uint64_tstart; uint64_tsize; uint64_tremaining; - struct drm_mm_node *node; + void*node; + uint32_tmem_type; }; /** @@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { + struct drm_buddy_block *block; + struct list_head *head, *next; struct drm_mm_node *node; - if (!res || res->mem_type == TTM_PL_SYSTEM) { - cur->start = start; - cur->size = size; - cur->remaining = size; - cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); - return; - } + if (!res) + goto fallback; BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - node = to_ttm_range_mgr_node(res)->mm_nodes; - while (start >= node->size << PAGE_SHIFT) - start -= node++->size << PAGE_SHIFT; + cur->mem_type = res->mem_type; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + head = _amdgpu_vram_mgr_node(res)->blocks; + + block = list_first_entry_or_null(head, +struct drm_buddy_block, +link); + if (!block) + goto fallback; + + while (start >= amdgpu_node_size(block)) { + start -= amdgpu_node_size(block); + + next = block->link.next; + if (next != head) + block = list_entry(next, struct drm_buddy_block, link); + } + + cur->start = amdgpu_node_start(block) + start; + cur->size = min(amdgpu_node_size(block) - start, size); + cur->remaining = size; + cur->node = block; + break; + case TTM_PL_TT: + node = to_ttm_range_mgr_node(res)->mm_nodes; + while (start >= node->size << PAGE_SHIFT) + start -= node++->size << PAGE_SHIFT; + + cur->start = (node->start << PAGE_SHIFT) + start; + cur->size = min((node->size << PAGE_SHIFT) - start, size); +
Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
On Wed, Mar 02, 2022 at 10:29:31AM +0100, Rasmus Villemoes wrote: > This won't help the current issue (because it doesn't exist and might > never), but just in case some compiler people are listening, I'd like to > have some sort of way to tell the compiler "treat this variable as > uninitialized from here on". So one could do > > #define kfree(p) do { __kfree(p); __magic_uninit(p); } while (0) > I think this is a good idea. Smatch can already find all the iterator used outside the loop bugs that Jakob did with a manageably small number of false positives. The problems are that: 1) It would be better to find it in the compile stage instead of later. 2) I hadn't published that check. Will do shortly. 3) A couple weeks back I noticed that the list_for_each_entry() check was no longer working. Fixed now. 4) Smatch was only looking at cases which dereferenced the iterator and not checks for NULL. I will test the fix for that tonight. 5) Smatch is broken on PowerPC. Coccinelle also has checks for iterator used outside the loop. Coccinelle had these checks before Smatch did. I copied Julia's idea. If your annotation was added to GCC it would solve all those problems. But it's kind of awkward that we can't annotate kfree() directly instead of creating the kfree() macro. And there are lots of other functions which free things so you'd have to create a ton of macros like: #define gr_free_dma_desc(a, b) do { __gr_free_dma_desc(a, b); __magic_uninit(b); } while (0) And then there are functions which free a struct member: void free_bar(struct foo *p) { kfree(p->bar); } Or functions which free a container_of(). Smatch is more evolved than designed but what I do these days is use $0, $1, $2 to represent the parameters. So you can say a function frees $0->bar. For container_of() then is "(168<~$0)->bar" which means 168 bytes from $0. Returns are parameter -1 so I guess it would be $(-1), but as I said Smatch evolved so right now places that talk about returned values use a different format. What you could do is just make a parseable table next to the function definition with all the information. Then you would use a Perl script to automatically generate a Coccinelle check to warn about use after frees. diff --git a/mm/slab.c b/mm/slab.c index ddf5737c63d9..c9dffa5c40a2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3771,6 +3771,9 @@ EXPORT_SYMBOL(kmem_cache_free_bulk); * * Don't free memory not originally allocated by kmalloc() * or you will run into trouble. + * + * CHECKER information + * frees: $0 */ void kfree(const void *objp) { regards, dan carpenter
Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
On Wed, Mar 02, 2022 at 12:07:04PM -0800, Kees Cook wrote: > On Wed, Mar 02, 2022 at 10:29:31AM +0100, Rasmus Villemoes wrote: > > This won't help the current issue (because it doesn't exist and might > > never), but just in case some compiler people are listening, I'd like to > > have some sort of way to tell the compiler "treat this variable as > > uninitialized from here on". So one could do > > > > #define kfree(p) do { __kfree(p); __magic_uninit(p); } while (0) > > > > with __magic_uninit being a magic no-op that doesn't affect the > > semantics of the code, but could be used by the compiler's "[is/may be] > > used uninitialized" machinery to flag e.g. double frees on some odd > > error path etc. It would probably only work for local automatic > > variables, but it should be possible to just ignore the hint if p is > > some expression like foo->bar or has side effects. If we had that, the > > end-of-loop test could include that to "uninitialize" the iterator. > > I've long wanted to change kfree() to explicitly set pointers to NULL on > free. https://github.com/KSPP/linux/issues/87 You also need to be a bit careful with existing code because there are places which do things like: drivers/usb/host/r8a66597-hcd.c 424 kfree(dev); ^^^ 425 426 for (port = 0; port < r8a66597->max_root_hub; port++) { 427 if (r8a66597->root_hub[port].dev == dev) { ^^^ 428 r8a66597->root_hub[port].dev = NULL; 429 break; 430 } 431 } Printing the freed pointer in debug code is another thing people do. regards, dan carpenter
[PATCH] drm: Remove redundant code
Clean up the following smatch warning: drivers/gpu/drm/radeon/atom.c:400 atom_skip_src_int() warn: ignoring unreachable code. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong --- drivers/gpu/drm/radeon/atom.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index c1bbfbe28bda..13940f5e783a 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -397,7 +397,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr) (*ptr)++; return; } - return; } } -- 2.20.1.7.g153144c
Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
> I think this would make sense, it would mean you only assign the containing > element on valid elements. > > I was thinking something along the lines of: > > #define list_for_each_entry(pos, head, member) > \ > for (struct list_head *list = head->next, typeof(pos) pos; \ >list == head ? 0 : (( pos = list_entry(pos, list, member), 1)); > \ >list = list->next) > > Although the initialization block of the for loop is not valid C, I'm > not sure there is any way to declare two variables of a different type > in the initialization part of the loop. It can be done using a *nested loop*, like this: #define list_for_each_entry(pos, head, member) \ for (struct list_head *list = head->next, cond = (struct list_head *)-1; cond == (struct list_head *)-1; cond = NULL) \ for (typeof(pos) pos; \ list == head ? 0 : (( pos = list_entry(pos, list, member), 1)); \ list = list->next) > > I believe all this does is get rid of the >member == (head) check > to terminate the list. Indeed, although the original way is harmless. > It alone will not fix any of the other issues that using the iterator > variable after the loop currently has. Yes, but I stick with the list_for_each_entry_inside(pos, type, head, member) way to make the iterator invisiable outside the loop (before and after the loop). It is maintainable longer-term than "type(pos) pos" one and perfect. see my explain: https://lore.kernel.org/lkml/20220302093106.8402-1-xiam0nd.t...@gmail.com/ and list_for_each_entry_inside(pos, type, head, member) patch here: https://lore.kernel.org/lkml/20220301075839.4156-3-xiam0nd.t...@gmail.com/ -- Xiaomeng Tong
RE: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote: > on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote: > > The problem is the mis-use of iterator outside the loop on exit, and > > the iterator will be the HEAD's container_of pointer which pointers > > to a type-confused struct. Sidenote: The *mis-use* here refers to > > mistakely access to other members of the struct, instead of the > > list_head member which acutally is the valid HEAD. > > The problem is that the HEAD's container_of pointer should never > be calculated at all. > This is what is fundamentally broken about the current definition. Yes, the rule is "the HEAD's container_of pointer should never be calculated at all outside the loop", but how do you make sure everyone follows this rule? Everyone makes mistakes, but we can eliminate them all from the beginning with the help of compiler which can catch such use-after-loop things. > > IOW, you would dereference a (NULL + offset_of_member) address here. > >Where? In the case where a developer do not follows the above rule, and mistakely access a non-list-head member of the HEAD's container_of pointer outside the loop. For example: struct req{ int a; struct list_head h; } struct req *r; list_for_each_entry(r, HEAD, h) { if (r->a == 0x10) break; } // the developer made a mistake: he didn't take this situation into // account where all entries in the list are *r->a != 0x10*, and now // the r is the HEAD's container_of pointer. r->a = 0x20; Thus the "r->a = 0x20" would dereference a (NULL + offset_of_member) address here. > > Please remind me if i missed something, thanks. > > > > Can you share your "alternative definitions" details? thanks! > > The loop should probably use as extra variable that points > to the 'list node' in the next structure. > Something like: > for (xxx *iter = head->next; > iter == ? ((item = NULL),0) : ((item = > list_item(iter),1)); > iter = item->member->next) { > ... > With a bit of casting you can use 'item' to hold 'iter'. you still can not make sure everyone follows this rule: "do not use iterator outside the loop" without the help of compiler, because item is declared outside the loop. BTW, to avoid ambiguity,the "alternative definitions" here i asked is something from you in this context: "OTOH there may be alternative definitions that can be used to get the compiler (or other compiler-like tools) to detect broken code. Even if the definition can't possibly generate a working kerrnel." > > > > > OTOH there may be alternative definitions that can be used to get > > > the compiler (or other compiler-like tools) to detect broken code. > > > Even if the definition can't possibly generate a working kerrnel. > > > > The "list_for_each_entry_inside(pos, type, head, member)" way makes > > the iterator invisiable outside the loop, and would be catched by > > compiler if use-after-loop things happened. > It is also a compete PITA for anything doing a search. You mean it would be a burden on search? can you show me some examples? Or you mean it is too long the list_for_each_entry_inside* string to live in one single line, and should spilt into two line? If it is the case, there are 2 way to mitigate it. 1. pass a shorter t as struct type to the macro 2. after all list_for_each_entry macros be replaced with list_for_each_entry_inside, remove the list_for_each_entry implementations and rename all list_for_each_entry_inside use back to the origin list_for_each_entry in a single patch. For me, it is acceptable and not a such big problem. -- Xiaomeng Tong
Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr
> On 3. Mar 2022, at 05:58, David Laight wrote: > > From: Xiaomeng Tong >> Sent: 03 March 2022 02:27 >> >> On Wed, 2 Mar 2022 14:04:06 +, David Laight >> wrote: >>> I think that it would be better to make any alternate loop macro >>> just set the variable to NULL on the loop exit. >>> That is easier to code for and the compiler might be persuaded to >>> not redo the test. >> >> No, that would lead to a NULL dereference. > > Why, it would make it b ethe same as the 'easy to use': > for (item = head; item; item = item->next) { > ... > if (...) > break; > ... > } > if (!item) > return; > >> The problem is the mis-use of iterator outside the loop on exit, and >> the iterator will be the HEAD's container_of pointer which pointers >> to a type-confused struct. Sidenote: The *mis-use* here refers to >> mistakely access to other members of the struct, instead of the >> list_head member which acutally is the valid HEAD. > > The problem is that the HEAD's container_of pointer should never > be calculated at all. > This is what is fundamentally broken about the current definition. > >> IOW, you would dereference a (NULL + offset_of_member) address here. > > Where? > >> Please remind me if i missed something, thanks. >> >> Can you share your "alternative definitions" details? thanks! > > The loop should probably use as extra variable that points > to the 'list node' in the next structure. > Something like: > for (xxx *iter = head->next; > iter == ? ((item = NULL),0) : ((item = > list_item(iter),1)); > iter = item->member->next) { > ... > With a bit of casting you can use 'item' to hold 'iter'. I think this would make sense, it would mean you only assign the containing element on valid elements. I was thinking something along the lines of: #define list_for_each_entry(pos, head, member) \ for (struct list_head *list = head->next, typeof(pos) pos; \ list == head ? 0 : (( pos = list_entry(pos, list, member), 1)); \ list = list->next) Although the initialization block of the for loop is not valid C, I'm not sure there is any way to declare two variables of a different type in the initialization part of the loop. I believe all this does is get rid of the >member == (head) check to terminate the list. It alone will not fix any of the other issues that using the iterator variable after the loop currently has. AFAIK Adrián Moreno is working on doing something along those lines for the list iterator in openvswitch (that was similar to the kernel one before) [1]. I *think* they don't declare 'pos' within the loop which we *do want* to avoid any uses of it after the loop. (If pos is not declared in the initialization block, shadowing the *outer* pos, it would just point to the last element of the list or stay uninitialized if the list is empty). [1] https://www.mail-archive.com/ovs-dev@openvswitch.org/msg63497.html > >> >>> OTOH there may be alternative definitions that can be used to get >>> the compiler (or other compiler-like tools) to detect broken code. >>> Even if the definition can't possibly generate a working kerrnel. >> >> The "list_for_each_entry_inside(pos, type, head, member)" way makes >> the iterator invisiable outside the loop, and would be catched by >> compiler if use-after-loop things happened. > > It is also a compete PITA for anything doing a search. > > David > > - > Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 > 1PT, UK > Registration No: 1397386 (Wales) > - Jakob
[PATCH 10/10] drm/amdgpu: add gang submit frontend
Allows submitting jobs as gang which needs to run on multiple engines at the same time. All members of the gang get the same implicit, explicit and VM dependencies. So no gang member will start running until everything else is ready. The last job is considered the gang leader (usually a submission to the GFX ring) and used for signaling output dependencies. Each job is remembered individually as user of a buffer object, so there is no joining of work at the end. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 244 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h| 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 +- 3 files changed, 173 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c6541f7b8f54..7429e64919fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -69,6 +69,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, unsigned int *num_ibs) { struct drm_sched_entity *entity; + unsigned int i; int r; r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, @@ -83,11 +84,19 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, return -EINVAL; /* Currently we don't support submitting to multiple entities */ - if (p->entity && p->entity != entity) + for (i = 0; i < p->gang_size; ++i) { + if (p->entities[i] == entity) + goto found; + } + + if (i == AMDGPU_CS_GANG_SIZE) return -EINVAL; - p->entity = entity; - ++(*num_ibs); + p->entities[i] = entity; + p->gang_size = i + 1; + +found: + ++(num_ibs[i]); return 0; } @@ -161,11 +170,12 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; struct amdgpu_vm *vm = >vm; uint64_t *chunk_array_user; uint64_t *chunk_array; - unsigned size, num_ibs = 0; uint32_t uf_offset = 0; + unsigned int size; int ret; int i; @@ -228,7 +238,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (size < sizeof(struct drm_amdgpu_cs_chunk_ib)) goto free_partial_kdata; - ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, _ibs); + ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); if (ret) goto free_partial_kdata; break; @@ -265,21 +275,27 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } } - ret = amdgpu_job_alloc(p->adev, num_ibs, >job, vm); - if (ret) - goto free_all_kdata; + if (!p->gang_size) + return -EINVAL; - ret = drm_sched_job_init(>job->base, p->entity, >vm); - if (ret) - goto free_all_kdata; + for (i = 0; i < p->gang_size; ++i) { + ret = amdgpu_job_alloc(p->adev, num_ibs[i], >jobs[i], vm); + if (ret) + goto free_all_kdata; + + ret = drm_sched_job_init(>jobs[i]->base, p->entities[i], +>vm); + if (ret) + goto free_all_kdata; + } - if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + if (p->ctx->vram_lost_counter != p->jobs[0]->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; } if (p->uf_entry.tv.bo) - p->job->uf_addr = uf_offset; + p->jobs[p->gang_size - 1]->uf_addr = uf_offset; kvfree(chunk_array); /* Use this opportunity to fill in task info for the vm */ @@ -301,22 +317,18 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, return ret; } -static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk, - unsigned int *num_ibs, - unsigned int *ce_preempt, - unsigned int *de_preempt) +static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + struct amdgpu_ib *ib, struct amdgpu_cs_chunk *chunk, + unsigned int *ce_preempt, unsigned int *de_preempt) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->job->base.sched); + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_ib *ib = >job->ibs[*num_ibs];
[PATCH 07/10] drm/amdgpu: move setting the job resources
Move setting the job resources into amdgpu_job.c Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 17 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 ++ 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dd9e708fe97f..c6541f7b8f54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -825,9 +825,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_vm *vm = >vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - struct amdgpu_bo *gds; - struct amdgpu_bo *gws; - struct amdgpu_bo *oa; int r; INIT_LIST_HEAD(>validated); @@ -941,22 +938,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - gds = p->bo_list->gds_obj; - gws = p->bo_list->gws_obj; - oa = p->bo_list->oa_obj; - - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; - p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; - p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; - p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; - } + amdgpu_job_set_resources(p->job, p->bo_list->gds_obj, +p->bo_list->gws_obj, p->bo_list->oa_obj); if (!r && p->uf_entry.tv.bo) { struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e4ca62225996..e07ceae36a5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -118,6 +118,23 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, return r; } +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa) +{ + if (gds) { + job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; + } + if (gws) { + job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; + } + if (oa) { + job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; + } +} + void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d599c0540b46..0bab8fe0d419 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -77,6 +77,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm); int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, enum amdgpu_ib_pool_type pool, struct amdgpu_job **job); +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa); void amdgpu_job_free_resources(struct amdgpu_job *job); void amdgpu_job_free(struct amdgpu_job *job); int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, -- 2.25.1
[PATCH 09/10] drm/amdgpu: add gang submit backend
Allows submitting jobs as gang which needs to run on multiple engines at the same time. Basic idea is that we have a global gang submit fence representing when the gang leader is finally pushed to run on the hardware last. Jobs submitted as gang are never re-submitted in case of a GPU reset since this won't work and will just deadlock the hardware immediately again. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 34 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 28 -- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h| 3 ++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7f447ed7a67f..a664d43d7502 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -852,6 +852,7 @@ struct amdgpu_device { u64 fence_context; unsignednum_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; + struct dma_fence __rcu *gang_submit; boolib_pool_ready; struct amdgpu_sa_managerib_pools[AMDGPU_IB_POOL_MAX]; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; @@ -1233,6 +1234,8 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); void amdgpu_device_halt(struct amdgpu_device *adev); +struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, + struct dma_fence *gang); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d78141e2c509..a116b8c08827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3512,6 +3512,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->gmc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; + RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub()); adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; @@ -3989,6 +3990,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; adev->accel_working = false; + dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); amdgpu_reset_fini(adev); @@ -5744,3 +5746,35 @@ void amdgpu_device_halt(struct amdgpu_device *adev) pci_disable_device(pdev); pci_wait_for_pending_transaction(pdev); } + +/** + * amdgpu_device_switch_gang - switch to a new gang + * @adev: amdgpu_device pointer + * @gang: the gang to switch to + * + * Try to switch to a new gang or return a reference to the current gang if that + * isn't possible. + * Returns: Either NULL if we switched correctly or a reference to the existing + * gang. + */ +struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, + struct dma_fence *gang) +{ + struct dma_fence *old = NULL; + + do { + dma_fence_put(old); + old = dma_fence_get_rcu_safe(>gang_submit); + + if (old == gang) + break; + + if (!dma_fence_is_signaled(old)) + return old; + + } while (cmpxchg((struct dma_fence __force **)>gang_submit, +old, gang) != old); + + dma_fence_put(old); + return NULL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e07ceae36a5c..059e11c7898c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -169,11 +169,29 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) kfree(job); } +void amdgpu_job_set_gang_leader(struct amdgpu_job *job, + struct amdgpu_job *leader) +{ + struct dma_fence *fence = >base.s_fence->scheduled; + + WARN_ON(job->gang_submit); + + /* +* Don't add a reference when we are the gang leader to avoid circle +* dependency. +*/ + if (job != leader) + dma_fence_get(fence); + job->gang_submit = fence; +} + void amdgpu_job_free(struct amdgpu_job *job) { amdgpu_job_free_resources(job); amdgpu_sync_free(>sync); amdgpu_sync_free(>sched_sync); + if (job->gang_submit != >base.s_fence->scheduled) + dma_fence_put(job->gang_submit); /* only put the hw fence if has embedded fence */ if (job->hw_fence.ops != NULL) @@
[PATCH 08/10] drm/amdgpu: initialize the vmid_wait with the stub fence
This way we don't need to check for NULL any more. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ddf46802b1ff..4ba4b54092f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -188,7 +188,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, unsigned i; int r; - if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) + if (!dma_fence_is_signaled(ring->vmid_wait)) return amdgpu_sync_fence(sync, ring->vmid_wait); fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 35bcb6dc1816..7f33ae87cb41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -193,6 +193,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, adev->rings[ring->idx] = ring; ring->num_hw_submission = sched_hw_submission; ring->sched_score = sched_score; + ring->vmid_wait = dma_fence_get_stub(); r = amdgpu_fence_driver_init_ring(ring); if (r) return r; -- 2.25.1
[PATCH 06/10] drm/amdgpu: properly imbed the IBs into the job
We now have standard macros for that. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 6 -- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 38c9fd7b7ad4..e4ca62225996 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -78,14 +78,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm) { - size_t size = sizeof(struct amdgpu_job); - if (num_ibs == 0) return -EINVAL; - size += sizeof(struct amdgpu_ib) * num_ibs; - - *job = kzalloc(size, GFP_KERNEL); + *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); if (!*job) return -ENOMEM; @@ -95,7 +91,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, */ (*job)->base.sched = >rings[0]->sched; (*job)->vm = vm; - (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; amdgpu_sync_create(&(*job)->sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 6d704772ff42..d599c0540b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -25,6 +25,7 @@ #include #include "amdgpu_sync.h" +#include "amdgpu_ring.h" /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) @@ -48,12 +49,10 @@ struct amdgpu_job { struct amdgpu_vm*vm; struct amdgpu_sync sync; struct amdgpu_sync sched_sync; - struct amdgpu_ib*ibs; struct dma_fencehw_fence; struct dma_fence*external_hw_fence; uint32_tpreamble_status; uint32_tpreemption_status; - uint32_tnum_ibs; boolvm_needs_flush; uint64_tvm_pd_addr; unsignedvmid; @@ -69,6 +68,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_tjob_run_counter; + + uint32_tnum_ibs; + struct amdgpu_ibibs[]; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, -- 2.25.1
[PATCH 05/10] drm/amdgpu: use job and ib structures directly in CS parsers
Instead of providing the ib index provide the job and ib pointers directly to the patch and parse functions for UVD and VCE. Also move the set/get functions for IB values to the IB declerations. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 13 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 23 - drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 36 --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 116 --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 7 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c| 13 +-- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c| 25 ++--- 9 files changed, 129 insertions(+), 114 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 20bf6134baca..dd9e708fe97f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1024,12 +1024,14 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p) memcpy(ib->ptr, kptr, job->ibs[i].length_dw * 4); amdgpu_bo_kunmap(aobj); - r = amdgpu_ring_parse_cs(ring, p, i); + r = amdgpu_ring_parse_cs(ring, p, p->job, +>job->ibs[i]); if (r) return r; } else { ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, i); + r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, + >job->ibs[i]); amdgpu_bo_kunmap(aobj); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index 30136eb50d2a..652b5593499f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -73,19 +73,6 @@ struct amdgpu_cs_parser { struct amdgpu_cs_post_dep *post_deps; }; -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo, struct amdgpu_bo_va_mapping **mapping); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 05e789fc7a9e..a8bed1b47899 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -163,8 +163,12 @@ struct amdgpu_ring_funcs { u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ - int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); - int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx); + int (*parse_cs)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); + int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, +struct amdgpu_job *job, +struct amdgpu_ib *ib); /* constants to calculate how many DW are needed for an emit */ unsigned emit_frame_size; unsigned emit_ib_size; @@ -264,8 +268,8 @@ struct amdgpu_ring { atomic_t*sched_score; }; -#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) -#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) +#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) +#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) @@ -364,6 +368,17 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); +static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx) +{ + return ib->ptr[idx]; +} + +static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx, + uint32_t value) +{ + ib->ptr[idx] = value; +} + int
[PATCH 04/10] drm/amdgpu: remove SRIOV and MCBP dependencies from the CS
We should not have any different CS constrains based on the execution environment. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 58ddc4241f04..20bf6134baca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -320,8 +320,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, return -EINVAL; if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && - chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT && - (amdgpu_mcbp || amdgpu_sriov_vf(p->adev))) { + chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) (*ce_preempt)++; else @@ -1062,7 +1061,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (fpriv->csa_va) { bo_va = fpriv->csa_va; r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) -- 2.25.1
[PATCH 03/10] drm/amdgpu: cleanup and reorder amdgpu_cs.c
Sort the functions in the order they are called and cleanup the coding style and function names to represent the data they process. Check the size of the IB chunk, initialize resulting entity and scheduler job much earlier as well. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1374 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h |2 +- 2 files changed, 683 insertions(+), 693 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6b6a9d925994..58ddc4241f04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -39,9 +39,61 @@ #include "amdgpu_gem.h" #include "amdgpu_ras.h" -static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_cs_chunk_fence *data, - uint32_t *offset) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, +struct amdgpu_device *adev, +struct drm_file *filp, +union drm_amdgpu_cs *cs) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + + if (cs->in.num_chunks == 0) + return -EINVAL; + + memset(p, 0, sizeof(*p)); + p->adev = adev; + p->filp = filp; + + p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); + if (!p->ctx) + return -EINVAL; + + if (atomic_read(>ctx->guilty)) { + amdgpu_ctx_put(p->ctx); + return -ECANCELED; + } + return 0; +} + +static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_ib *chunk_ib, + unsigned int *num_ibs) +{ + struct drm_sched_entity *entity; + int r; + + r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, + chunk_ib->ring, ); + if (r) + return r; + + /* Abort if there is no run queue associated with this entity. +* Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + + /* Currently we don't support submitting to multiple entities */ + if (p->entity && p->entity != entity) + return -EINVAL; + + p->entity = entity; + ++(*num_ibs); + return 0; +} + +static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_fence *data, + uint32_t *offset) { struct drm_gem_object *gobj; struct amdgpu_bo *bo; @@ -80,11 +132,11 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return r; } -static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_bo_list_in *data) +static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) { + struct drm_amdgpu_bo_list_entry *info; int r; - struct drm_amdgpu_bo_list_entry *info = NULL; r = amdgpu_bo_create_list_entry_array(data, ); if (r) @@ -104,7 +156,9 @@ static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, return r; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) +/* Copy the data from userspace and go over it the first time */ +static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = >vm; @@ -112,28 +166,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs uint64_t *chunk_array; unsigned size, num_ibs = 0; uint32_t uf_offset = 0; - int i; int ret; + int i; - if (cs->in.num_chunks == 0) - return 0; - - chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); + chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), +GFP_KERNEL); if (!chunk_array) return -ENOMEM; - p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); - if (!p->ctx) { - ret = -EINVAL; - goto free_chunk; - } - - /* skip guilty context job */ - if (atomic_read(>ctx->guilty) == 1) { - ret = -ECANCELED; - goto free_chunk; - } - /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -168,7 +208,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs size =
[PATCH 02/10] drm/amdgpu: header cleanup
No function change, just move a bunch of definitions from amdgpu.h into separate header files. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 95 --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h| 93 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 35 ++- .../gpu/drm/amd/amdgpu/amdgpu_trace_points.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + 10 files changed, 132 insertions(+), 100 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b89406b01694..7f447ed7a67f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -60,7 +60,6 @@ #include #include #include -#include #include #include "dm_pp_interface.h" @@ -276,9 +275,6 @@ extern int amdgpu_num_kcq; #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) struct amdgpu_device; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; @@ -465,20 +461,6 @@ struct amdgpu_flip_work { }; -/* - * CP & rings. - */ - -struct amdgpu_ib { - struct amdgpu_sa_bo *sa_bo; - uint32_tlength_dw; - uint64_tgpu_addr; - uint32_t*ptr; - uint32_tflags; -}; - -extern const struct drm_sched_backend_ops amdgpu_sched_ops; - /* * file private structure */ @@ -494,79 +476,6 @@ struct amdgpu_fpriv { int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); -int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool, - struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ibs, struct amdgpu_job *job, - struct dma_fence **f); -int amdgpu_ib_pool_init(struct amdgpu_device *adev); -void amdgpu_ib_pool_fini(struct amdgpu_device *adev); -int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - -/* - * CS. - */ -struct amdgpu_cs_chunk { - uint32_tchunk_id; - uint32_tlength_dw; - void*kdata; -}; - -struct amdgpu_cs_post_dep { - struct drm_syncobj *syncobj; - struct dma_fence_chain *chain; - u64 point; -}; - -struct amdgpu_cs_parser { - struct amdgpu_device*adev; - struct drm_file *filp; - struct amdgpu_ctx *ctx; - - /* chunks */ - unsignednchunks; - struct amdgpu_cs_chunk *chunks; - - /* scheduler job object */ - struct amdgpu_job *job; - struct drm_sched_entity *entity; - - /* buffer objects */ - struct ww_acquire_ctx ticket; - struct amdgpu_bo_list *bo_list; - struct amdgpu_mn*mn; - struct amdgpu_bo_list_entry vm_pd; - struct list_headvalidated; - struct dma_fence*fence; - uint64_tbytes_moved_threshold; - uint64_tbytes_moved_vis_threshold; - uint64_tbytes_moved; - uint64_tbytes_moved_vis; - - /* user fence */ - struct amdgpu_bo_list_entry uf_entry; - - unsignednum_post_deps; - struct amdgpu_cs_post_dep *post_deps; -}; - -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - /* * Writeback */ @@ -1425,10 +1334,6 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) { return 0; } #endif -int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, - uint64_t addr, struct amdgpu_bo **bo, - struct amdgpu_bo_va_mapping **mapping); - #if defined(CONFIG_DRM_AMD_DC) int amdgpu_dm_display_resume(struct amdgpu_device *adev ); #else diff --git
[PATCH 01/10] drm/amdgpu: install ctx entities with cmpxchg
Since we removed the context lock we need to make sure that not two threads are trying to install an entity at the same time. Signed-off-by: Christian König Fixes: e68efb27647f ("drm/amdgpu: remove ctx->lock") --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c1f8b0e37b93..72c5f1c53d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -204,9 +204,15 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, if (r) goto error_free_entity; - ctx->entities[hw_ip][ring] = entity; + /* It's not an error if we fail to install the new entity */ + if (cmpxchg(>entities[hw_ip][ring], NULL, entity)) + goto cleanup_entity; + return 0; +cleanup_entity: + drm_sched_entity_fini(>entity); + error_free_entity: kfree(entity); -- 2.25.1
Gang submit
Hi guys, this patch set implements the the requirement for so called gang submissions in the CS interface. A gang submission guarantees that multiple IBs can run on different engines at the same time. This is implemented by keeping a global per-device gang around represented by a dma_fence which signals as soon as all jobs in a gang are pushed to the hardware. The effect is that as long as members of a gang are waiting to be submitted no other gang can start pushing jobs to the hardware and so deadlocks are effectively prevented. The whole set is based on top of my dma_resv_usage work and a few patches merged over from amd-staging-drm-next, so it won't easily apply anywhere. Please review and comment, Christian.
[PATCH 2/2] drm/amdkfd: implement get_atc_vmid_pasid_mapping_info for gfx10.3
This patch implements get_atc_vmid_pasid_mapping_info for gfx10.3 Signed-off-by: Yifan Zhang --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index e9c80ce13f3e..ba21ec6b35e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -26,6 +26,8 @@ #include "gc/gc_10_3_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" #include "oss/osssys_5_0_0_sh_mask.h" +#include "athub/athub_2_1_0_offset.h" +#include "athub/athub_2_1_0_sh_mask.h" #include "soc15_common.h" #include "v10_structs.h" #include "nv.h" @@ -606,6 +608,18 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev, return 0; } +static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) ++ vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -788,7 +802,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .hqd_destroy = hqd_destroy_v10_3, .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, .wave_control_execute = wave_control_execute_v10_3, - .get_atc_vmid_pasid_mapping_info = NULL, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, #if 0 -- 2.25.1
[PATCH 1/2] drm/amdkfd: judge get_atc_vmid_pasid_mapping_info before call
Fix the NULL point issue: [ 3076.255609] BUG: kernel NULL pointer dereference, address: [ 3076.255624] #PF: supervisor instruction fetch in kernel mode [ 3076.255637] #PF: error_code(0x0010) - not-present page [ 3076.255649] PGD 0 P4D 0 [ 3076.255660] Oops: 0010 [#1] SMP NOPTI [ 3076.255669] CPU: 20 PID: 2415 Comm: kfdtest Tainted: GW OE 5.11.0-41-generic #45~20.04.1-Ubuntu [ 3076.255691] Hardware name: AMD Splinter/Splinter-RPL, BIOS VS2326337N.FD 02/07/2022 [ 3076.255706] RIP: 0010:0x0 [ 3076.255718] Code: Unable to access opcode bytes at RIP 0xffd6. [ 3076.255732] RSP: 0018:b64283e3fc10 EFLAGS: 00010297 [ 3076.255744] RAX: RBX: 0008 RCX: 0027 [ 3076.255759] RDX: b64283e3fc1e RSI: 0008 RDI: 8c7a87f6 [ 3076.255776] RBP: b64283e3fc78 R08: 8c7d88518ac0 R09: b64283e3fa60 [ 3076.255791] R10: 0001 R11: 0001 R12: 000f [ 3076.255805] R13: 8c7bdcea5800 R14: 8c7a9f3f3000 R15: 8c7a8696bc00 [ 3076.255820] FS: () GS:8c7d8850() knlGS: [ 3076.255839] CS: 0010 DS: ES: CR0: 80050033 [ 3076.255851] CR2: ffd6 CR3: 000109e3c000 CR4: 00750ee0 [ 3076.255866] PKRU: 5554 [ 3076.255873] Call Trace: [ 3076.255884] dbgdev_wave_reset_wavefronts+0x72/0x160 [amdgpu] [ 3076.256025] process_termination_cpsch.cold+0x26/0x2f [amdgpu] [ 3076.256182] ? ktime_get_mono_fast_ns+0x4e/0xa0 [ 3076.256196] kfd_process_dequeue_from_all_devices+0x49/0x70 [amdgpu] [ 3076.256328] kfd_process_notifier_release+0x187/0x2b0 [amdgpu] [ 3076.256451] ? mn_itree_inv_end+0xdc/0x110 [ 3076.256463] __mmu_notifier_release+0x74/0x1f0 [ 3076.256474] exit_mmap+0x170/0x200 [ 3076.256484] ? __handle_mm_fault+0x677/0x920 [ 3076.256496] ? _cond_resched+0x19/0x30 [ 3076.256507] mmput+0x5d/0x130 [ 3076.256518] do_exit+0x332/0xaf0 [ 3076.256526] ? handle_mm_fault+0xd7/0x2b0 [ 3076.256537] do_group_exit+0x43/0xa0 [ 3076.256548] __x64_sys_exit_group+0x18/0x20 [ 3076.256559] do_syscall_64+0x38/0x90 [ 3076.256569] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Yifan Zhang --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1cd2ea536bd0..77364afdc606 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -505,14 +505,16 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process * to check which VMID the current process is mapped to. */ - for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { - status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info - (dev->adev, vmid, _pasid); - - if (status && queried_pasid == p->pasid) { - pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", - vmid, p->pasid); - break; + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info + (dev->adev, vmid, _pasid); + + if (status && queried_pasid == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", + vmid, p->pasid); + break; + } } } -- 2.25.1