RE: [PATCH Review 2/2] drm/amdgpu: message smu to update bad channel info

2022-03-03 Thread Zhou1, Tao
[AMD Official Use Only]

The series is:

Reviewed-by: Tao Zhou 

> -Original Message-
> From: Stanley.Yang 
> Sent: Friday, March 4, 2022 2:51 PM
> To: amd-gfx@lists.freedesktop.org; Zhang, Hawking
> ; Zhou1, Tao ; Joo, Maria
> 
> Cc: Yang, Stanley 
> Subject: [PATCH Review 2/2] drm/amdgpu: message smu to update bad channel
> info
> 
> It should notice SMU to update bad channel info when detected uncorrectable
> error in UMC block
> 
> Change-Id: I2dc8848affdb53e52891013953ae9383fff5f20f
> Signed-off-by: Stanley.Yang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   |  7 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h   |  3 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c| 25 +--
>  .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h|  4 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   |  5 
>  5 files changed, 42 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index d3875618ebf5..f9104f99eb9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2068,6 +2068,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device
> *adev)
>   mutex_init(>recovery_lock);
>   INIT_WORK(>recovery_work, amdgpu_ras_do_recovery);
>   atomic_set(>in_recovery, 0);
> + con->eeprom_control.bad_channel_bitmap = 0;
> 
>   max_eeprom_records_count =
> amdgpu_ras_eeprom_max_record_count();
>   amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
> @@ -2092,6 +2093,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device
> *adev)
>   goto free;
> 
>   amdgpu_dpm_send_hbm_bad_pages_num(adev, con-
> >eeprom_control.ras_num_recs);
> +
> + if (con->update_channel_flag == true) {
> + amdgpu_dpm_send_hbm_bad_channel_flag(adev, con-
> >eeprom_control.bad_channel_bitmap);
> + con->update_channel_flag = false;
> + }
>   }
> 
>  #ifdef CONFIG_X86_MCE_AMD
> @@ -2285,6 +2291,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>   goto release_con;
>   }
> 
> + con->update_channel_flag = false;
>   con->features = 0;
>   INIT_LIST_HEAD(>head);
>   /* Might need get this flag from vbios. */ diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> index 7cddaad90d6d..9314fde81e68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
> @@ -374,6 +374,9 @@ struct amdgpu_ras {
> 
>   /* record umc error info queried from smu */
>   struct umc_ecc_info umc_ecc;
> +
> + /* Indicates smu whether need update bad channel info */
> + bool update_channel_flag;
>  };
> 
>  struct ras_fs_data {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> index 2b844a5aafdb..ad5d8667756d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> @@ -265,6 +265,7 @@ int amdgpu_ras_eeprom_reset_table(struct
> amdgpu_ras_eeprom_control *control)  {
>   struct amdgpu_device *adev = to_amdgpu_device(control);
>   struct amdgpu_ras_eeprom_table_header *hdr = >tbl_hdr;
> + struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>   u8 csum;
>   int res;
> 
> @@ -285,6 +286,10 @@ int amdgpu_ras_eeprom_reset_table(struct
> amdgpu_ras_eeprom_control *control)
> 
>   amdgpu_dpm_send_hbm_bad_pages_num(adev, control-
> >ras_num_recs);
> 
> + control->bad_channel_bitmap = 0;
> + amdgpu_dpm_send_hbm_bad_channel_flag(adev, control-
> >bad_channel_bitmap);
> + con->update_channel_flag = false;
> +
>   amdgpu_ras_debugfs_set_ret_size(control);
> 
>   mutex_unlock(>ras_tbl_mutex);
> @@ -418,6 +423,7 @@ amdgpu_ras_eeprom_append_table(struct
> amdgpu_ras_eeprom_control *control,
>  struct eeprom_table_record *record,
>  const u32 num)
>  {
> + struct amdgpu_ras *con =
> +amdgpu_ras_get_context(to_amdgpu_device(control));
>   u32 a, b, i;
>   u8 *buf, *pp;
>   int res;
> @@ -429,9 +435,16 @@ amdgpu_ras_eeprom_append_table(struct
> amdgpu_ras_eeprom_control *control,
>   /* Encode all of them in one go.
>*/
>   pp = buf;
> - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE)
> + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
>   __encode_table_record_to_buf(control, [i], pp);
> 
> + /* update bad channel bitmap */
> + if (!(control->bad_channel_bitmap & (1 <<
> record[i].mem_channel))) {
> + control->bad_channel_bitmap |= 1 <<
> record[i].mem_channel;
> + con->update_channel_flag = true;
> + }
> + }
> +
>   /* a, first record index to write into.
>* b, last record index 

[PATCH Review 2/2] drm/amdgpu: message smu to update bad channel info

2022-03-03 Thread Stanley . Yang
It should notice SMU to update bad channel info when detected
uncorrectable error in UMC block

Change-Id: I2dc8848affdb53e52891013953ae9383fff5f20f
Signed-off-by: Stanley.Yang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   |  7 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h   |  3 +++
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c| 25 +--
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h|  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c   |  5 
 5 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d3875618ebf5..f9104f99eb9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2068,6 +2068,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
mutex_init(>recovery_lock);
INIT_WORK(>recovery_work, amdgpu_ras_do_recovery);
atomic_set(>in_recovery, 0);
+   con->eeprom_control.bad_channel_bitmap = 0;
 
max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count();
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
@@ -2092,6 +2093,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
goto free;
 
amdgpu_dpm_send_hbm_bad_pages_num(adev, 
con->eeprom_control.ras_num_recs);
+
+   if (con->update_channel_flag == true) {
+   amdgpu_dpm_send_hbm_bad_channel_flag(adev, 
con->eeprom_control.bad_channel_bitmap);
+   con->update_channel_flag = false;
+   }
}
 
 #ifdef CONFIG_X86_MCE_AMD
@@ -2285,6 +2291,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con;
}
 
+   con->update_channel_flag = false;
con->features = 0;
INIT_LIST_HEAD(>head);
/* Might need get this flag from vbios. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 7cddaad90d6d..9314fde81e68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -374,6 +374,9 @@ struct amdgpu_ras {
 
/* record umc error info queried from smu */
struct umc_ecc_info umc_ecc;
+
+   /* Indicates smu whether need update bad channel info */
+   bool update_channel_flag;
 };
 
 struct ras_fs_data {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 2b844a5aafdb..ad5d8667756d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -265,6 +265,7 @@ int amdgpu_ras_eeprom_reset_table(struct 
amdgpu_ras_eeprom_control *control)
 {
struct amdgpu_device *adev = to_amdgpu_device(control);
struct amdgpu_ras_eeprom_table_header *hdr = >tbl_hdr;
+   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
u8 csum;
int res;
 
@@ -285,6 +286,10 @@ int amdgpu_ras_eeprom_reset_table(struct 
amdgpu_ras_eeprom_control *control)
 
amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs);
 
+   control->bad_channel_bitmap = 0;
+   amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
+   con->update_channel_flag = false;
+
amdgpu_ras_debugfs_set_ret_size(control);
 
mutex_unlock(>ras_tbl_mutex);
@@ -418,6 +423,7 @@ amdgpu_ras_eeprom_append_table(struct 
amdgpu_ras_eeprom_control *control,
   struct eeprom_table_record *record,
   const u32 num)
 {
+   struct amdgpu_ras *con = 
amdgpu_ras_get_context(to_amdgpu_device(control));
u32 a, b, i;
u8 *buf, *pp;
int res;
@@ -429,9 +435,16 @@ amdgpu_ras_eeprom_append_table(struct 
amdgpu_ras_eeprom_control *control,
/* Encode all of them in one go.
 */
pp = buf;
-   for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE)
+   for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
__encode_table_record_to_buf(control, [i], pp);
 
+   /* update bad channel bitmap */
+   if (!(control->bad_channel_bitmap & (1 << 
record[i].mem_channel))) {
+   control->bad_channel_bitmap |= 1 << 
record[i].mem_channel;
+   con->update_channel_flag = true;
+   }
+   }
+
/* a, first record index to write into.
 * b, last record index to write into.
 * a = first index to read (fri) + number of records in the table,
@@ -684,6 +697,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control 
*control,
   const u32 num)
 {
struct amdgpu_device *adev = to_amdgpu_device(control);
+   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int i, res;
u8 *buf, *pp;
u32 g0, g1;
@@ -751,8 +765,15 @@ int 

[PATCH Review 1/2] drm/amd/pm: add send bad channel info function

2022-03-03 Thread Stanley . Yang
support message SMU update bad channel info to update HBM bad channel
info in OOB table

Change-Id: I1e50ed8118f4c1aaefb04c040e59ae4918cdc295
Signed-off-by: Stanley.Yang 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c   | 12 ++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h   |  1 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 +
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 +++
 .../pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h|  3 +-
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c| 43 +++
 7 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 1d63f1e8884c..9a892d6d1d7a 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -507,6 +507,18 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device 
*adev, uint32_t size)
return ret;
 }
 
+int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t 
size)
+{
+   struct smu_context *smu = adev->powerplay.pp_handle;
+   int ret = 0;
+
+   mutex_lock(>pm.mutex);
+   ret = smu_send_hbm_bad_channel_flag(smu, size);
+   mutex_unlock(>pm.mutex);
+
+   return ret;
+}
+
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
  enum pp_clock_type type,
  uint32_t *min,
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index ddfa55b59d02..3e78b3057277 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -412,6 +412,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, 
bool enable);
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t 
*smu_version);
 int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
 int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t 
size);
+int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t 
size);
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
   enum pp_clock_type type,
   uint32_t *min,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7e79a67bb8ef..f1544755d8b4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3052,3 +3052,13 @@ int smu_send_hbm_bad_pages_num(struct smu_context *smu, 
uint32_t size)
 
return ret;
 }
+
+int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size)
+{
+   int ret = 0;
+
+   if (smu->ppt_funcs && smu->ppt_funcs->send_hbm_bad_channel_flag)
+   ret = smu->ppt_funcs->send_hbm_bad_channel_flag(smu, size);
+
+   return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index fbef3ab8d487..ef57b6089c69 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1292,6 +1292,12 @@ struct pptable_funcs {
 * @set_config_table: Apply the input DriverSmuConfig table settings.
 */
int (*set_config_table)(struct smu_context *smu, struct 
config_table_setting *table);
+
+   /**
+* @sned_hbm_bad_channel_flag:  message SMU to update bad channel info
+*  
of SMUBUS table.
+*/
+   int (*send_hbm_bad_channel_flag)(struct smu_context *smu, uint32_t 
size);
 };
 
 typedef enum {
@@ -1428,5 +1434,6 @@ int smu_get_ecc_info(struct smu_context *smu, void 
*umc_ecc);
 int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
 void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
 int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
+int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
 #endif
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h
index ab66a4b9e438..0f498baf6838 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/aldebaran_ppsmc.h
@@ -103,7 +103,8 @@
 #define PPSMC_MSG_GfxDriverResetRecovery   0x42
 #define PPSMC_MSG_BoardPowerCalibration0x43
 #define PPSMC_MSG_HeavySBR  0x45
-#define PPSMC_Message_Count0x46
+#define PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel 0x46
+#define PPSMC_Message_Count0x47
 
 
 //PPSMC Reset Types
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index d787c3b9fc52..9f6f306eeca0 100644
--- 

Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready

2022-03-03 Thread Chen, JingWen
Thanks a lot

Best Regards,
JingWen Chen



> On Mar 4, 2022, at 00:36, Grodzovsky, Andrey  
> wrote:
> 
> I pushed all the changes including your patch.
> 
> Andrey
> 
> On 2022-03-02 22:16, Andrey Grodzovsky wrote:
>> OK, i will do quick smoke test tomorrow and push all of it it then.
>> 
>> Andrey
>> 
>> On 2022-03-02 21:59, Chen, JingWen wrote:
>>> Hi Andrey,
>>> 
>>> I don't have the bare mental environment, I can only test the SRIOV cases.
>>> 
>>> Best Regards,
>>> JingWen Chen
>>> 
>>> 
>>> 
 On Mar 3, 2022, at 01:55, Grodzovsky, Andrey  
 wrote:
 
 The patch is acked-by: Andrey Grodzovsky 
 
 If you also smoked tested bare metal feel free to apply all the patches, 
 if no let me know.
 
 Andrey
 
 On 2022-03-02 04:51, JingWen Chen wrote:
> Hi Andrey,
> 
> Most part of the patches are OK, but the code will introduce a ib test 
> fail on the disabled vcn of sienna_cichlid.
> 
> In SRIOV use case we will disable one vcn on sienna_cichlid, I have 
> attached a patch to fix this issue, please check the attachment.
> 
> Best Regards,
> 
> Jingwen Chen
> 
> 
> On 2/26/22 5:22 AM, Andrey Grodzovsky wrote:
>> Hey, patches attached - i applied the patches and resolved merge 
>> conflicts but weren't able to test as my on board's network card doesn't 
>> work with 5.16 kernel (it does with 5.17, maybe it's Kconfig issue and i 
>> need to check more).
>> The patches are on top of 'cababde192b2 Yifan Zhang 2 days ago   
>>   drm/amd/pm: fix mode2 reset fail for smu 13.0.5 ' commit.
>> 
>> Please test and let me know. Maybe by Monday I will be able to resolve 
>> the connectivity issue on 5.16.
>> 
>> Andrey
>> 
>> On 2022-02-24 22:13, JingWen Chen wrote:
>>> Hi Andrey,
>>> 
>>> Sorry for the misleading, I mean the whole patch series. We are 
>>> depending on this patch series to fix the concurrency issue within 
>>> SRIOV TDR sequence.
>>> 
>>> 
>>> 
>>> On 2/25/22 1:26 AM, Andrey Grodzovsky wrote:
 No problem if so but before I do,
 
 
 JingWen - why you think this patch is needed as a standalone now ? It 
 has no use without the
 entire feature together with it. Is it some changes you want to do on 
 top of that code ?
 
 
 Andrey
 
 
 On 2022-02-24 12:12, Deucher, Alexander wrote:
> [Public]
> 
> 
> If it applies cleanly, feel free to drop it in. I'll drop those 
> patches for drm-next since they are already in drm-misc.
> 
> Alex
> 
> 
>  
> *From:* amd-gfx  on behalf of 
> Andrey Grodzovsky 
> *Sent:* Thursday, February 24, 2022 11:24 AM
> *To:* Chen, JingWen ; Christian König 
> ; dri-de...@lists.freedesktop.org 
> ; amd-gfx@lists.freedesktop.org 
> 
> *Cc:* Liu, Monk ; Chen, Horace 
> ; Lazar, Lijo ; Koenig, 
> Christian ; dan...@ffwll.ch 
> 
> *Subject:* Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to 
> after XGMI is ready
> No because all the patch-set including this patch was landed into
> drm-misc-next and will reach amd-staging-drm-next on the next upstream
> rebase i guess.
> 
> Andrey
> 
> On 2022-02-24 01:47, JingWen Chen wrote:
>> Hi Andrey,
>> 
>> Will you port this patch into amd-staging-drm-next?
>> 
>> on 2/10/22 2:06 AM, Andrey Grodzovsky wrote:
>>> All comments are fixed and code pushed. Thanks for everyone
>>> who helped reviewing.
>>> 
>>> Andrey
>>> 
>>> On 2022-02-09 02:53, Christian König wrote:
 Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:
> Before we initialize schedulers we must know which reset
> domain are we in - for single device there iis a single
> domain per device and so single wq per device. For XGMI
> the reset domain spans the entire XGMI hive and so the
> reset wq is per hive.
> 
> Signed-off-by: Andrey Grodzovsky 
 One more comment below, with that fixed Reviewed-by: Christian 
 König .
 
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 
> ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +
>   3 files changed, 51 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> 

2022 X.Org Board of Directors Elections timeline extended, Request for nominations

2022-03-03 Thread Lyude Paul
We are seeking nominations for candidates for election to the X.org Foundation
Board of Directors. However, as we presently do not have enough nominations to
start the election - the decision has been made to extend the timeline by 2
weeks. Note this is a fairly regular part of the elections process.

The new deadline for nominations to the X.org Board of Directors is 23:59 UTC
on 20th March 2022.

The Board consists of directors elected from the membership.  Each year, an
election is held to bring the total number of directors to eight. The four
members receiving the highest vote totals will serve as directors for two year
terms.

The directors who received two year terms starting in 2021 were Lyude Paul,
Samuel Iglesias Gonsálvez, Manasi D Navare and Daniel Vetter. They will
continue to serve until their term ends in 2023. Current directors whose term
expires in 2022 are Emma Anholt, Keith Packard, Harry Wentland and Mark
Filion.

A director is expected to participate in the fortnightly IRC meeting to
discuss current business and to attend the annual meeting of the X.Org
Foundation, which will be held at a location determined in advance by the
Board of Directors.

A member may nominate themselves or any other member they feel is qualified.
Nominations should be sent to the Election Committee at elections at x.org.

Nominees shall be required to be current members of the X.Org Foundation, and
submit a personal statement of up to 200 words that will be provided to
prospective voters.  The collected statements, along with the statement of
contribution to the X.Org Foundation in the member's account page on
http://members.x.org, will be made available to all voters to help them make
their voting decisions.

Nominations, membership applications or renewals and completed personal
statements must be received no later than 23:59 UTC on 20th March 2022.

The slate of candidates will be published 28th March 2022 and candidate Q
will begin then. The deadline for Xorg membership applications and renewals is
31st March 2022.

Cheers,
   Lyude Paul, on behalf of the X.Org BoD



Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c

2022-03-03 Thread Gustavo A. R. Silva
On Thu, Mar 03, 2022 at 12:19:57PM -0600, Gustavo A. R. Silva wrote:
> On Thu, Mar 03, 2022 at 09:43:28AM -0800, Kees Cook wrote:
> > On Thu, Mar 03, 2022 at 11:25:03AM -0600, Gustavo A. R. Silva wrote:
> > > Fix the following Wstringop-overflow warnings when building with GCC-11:
> > > 
> > > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: 
> > > warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 
> > > 1 [-Wstringop-overflow=]
> > 
> > Can you "show your work" a little more here? I don't actually see the
> > what is getting fixed:
> > 
> > enum dc_lane_count {
> > ...
> > LANE_COUNT_FOUR = 4,
> > ...
> > LANE_COUNT_DP_MAX = LANE_COUNT_FOUR
> > };
> > 
> > struct link_training_settings {
> > ...
> > union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX];
> > };
> > 
> > void dp_hw_to_dpcd_lane_settings(
> > ...
> > union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
> > {
> > ...
> > }
> > 
> > static enum link_training_result dpia_training_cr_transparent(
> > ...
> > struct link_training_settings *lt_settings)
> > {
> > ...
> > dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
> > lt_settings->hw_lane_settings, 
> > lt_settings->dpcd_lane_settings);
> > ...
> > }
> > 
> > Everything looks to be the correct size?
> 
> Yep; this fix is similar to the one for intel_pm.c in this
> 
>   commit e7c6e405e171fb33990a12ecfd14e6500d9e5cf2
> 
> where the array size of 8 seems to be fine for all the
> struct members related (pri_latency, spr_latency, cur_latency
> and skl_latency):
> 
> drivers/gpu/drm/i915/i915_drv.h:465:struct drm_i915_private {
> ...
> 
> drivers/gpu/drm/i915/i915_drv.h-739-struct {
> 
> ...
> drivers/gpu/drm/i915/i915_drv.h-745-/* primary */
> drivers/gpu/drm/i915/i915_drv.h-746-u16 pri_latency[5];
> drivers/gpu/drm/i915/i915_drv.h-747-/* sprite */
> drivers/gpu/drm/i915/i915_drv.h-748-u16 spr_latency[5];
> drivers/gpu/drm/i915/i915_drv.h-749-/* cursor */
> drivers/gpu/drm/i915/i915_drv.h-750-u16 cur_latency[5];
> drivers/gpu/drm/i915/i915_drv.h-751-/*
> drivers/gpu/drm/i915/i915_drv.h-752- * Raw watermark memory 
> latency values
> drivers/gpu/drm/i915/i915_drv.h-753- * for SKL for all 8 levels
> drivers/gpu/drm/i915/i915_drv.h-754- * in 1us units.
> drivers/gpu/drm/i915/i915_drv.h-755- */
> drivers/gpu/drm/i915/i915_drv.h-756-u16 skl_latency[8];
> 
> ...
> drivers/gpu/drm/i915/i915_drv.h-773-} wm;
> ...
> }

and in this case the ilk_wm_max_level() returns the right maximum size for the
corresponding 'struct wm' member:

drivers/gpu/drm/i915/intel_pm.c:2993:int ilk_wm_max_level(const struct 
drm_i915_private *dev_priv)
drivers/gpu/drm/i915/intel_pm.c-2994-{
drivers/gpu/drm/i915/intel_pm.c-2995-   /* how many WM levels are we expecting 
*/
drivers/gpu/drm/i915/intel_pm.c-2996-   if (HAS_HW_SAGV_WM(dev_priv))
drivers/gpu/drm/i915/intel_pm.c-2997-   return 5;
drivers/gpu/drm/i915/intel_pm.c-2998-   else if (DISPLAY_VER(dev_priv) >= 9)
drivers/gpu/drm/i915/intel_pm.c-2999-   return 7;
drivers/gpu/drm/i915/intel_pm.c-3000-   else if (IS_HASWELL(dev_priv) || 
IS_BROADWELL(dev_priv))
drivers/gpu/drm/i915/intel_pm.c-3001-   return 4;
drivers/gpu/drm/i915/intel_pm.c-3002-   else if (DISPLAY_VER(dev_priv) >= 6)
drivers/gpu/drm/i915/intel_pm.c-3003-   return 3;
drivers/gpu/drm/i915/intel_pm.c-3004-   else
drivers/gpu/drm/i915/intel_pm.c-3005-   return 2;
drivers/gpu/drm/i915/intel_pm.c-3006-}

drivers/gpu/drm/i915/intel_pm.c:3009:static void intel_print_wm_latency(struct 
drm_i915_private *dev_priv,
drivers/gpu/drm/i915/intel_pm.c-3010-  const char 
*name,
drivers/gpu/drm/i915/intel_pm.c-3011-  const u16 
wm[])
drivers/gpu/drm/i915/intel_pm.c-3012-{
drivers/gpu/drm/i915/intel_pm.c-3013-   int level, max_level = 
ilk_wm_max_level(dev_priv);
drivers/gpu/drm/i915/intel_pm.c-3014-
drivers/gpu/drm/i915/intel_pm.c-3015-   for (level = 0; level <= max_level; 
level++) {
drivers/gpu/drm/i915/intel_pm.c-3016-   unsigned int latency = 
wm[level];
drivers/gpu/drm/i915/intel_pm.c-3017-
...
}

still GCC warns about this with Wstringop-overread, as it is explained
in commit e7c6e405e171.

--
Gustavo

> 
> however GCC warns about accessing bytes beyond the limits, and turning the
> argument declarations into pointers (removing the over-specified array
> size from the argument declaration) silence the warnings.
> 
> --
> Gustavo


Re: [PATCH 08/10] drm/amdgpu: initialize the vmid_wait with the stub fence

2022-03-03 Thread Andrey Grodzovsky

Reviewed-by: Andrey Grodzovsky 

Andrey

On 2022-03-03 03:23, Christian König wrote:

This way we don't need to check for NULL any more.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c  | 2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 +
  2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ddf46802b1ff..4ba4b54092f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -188,7 +188,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
unsigned i;
int r;
  
-	if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))

+   if (!dma_fence_is_signaled(ring->vmid_wait))
return amdgpu_sync_fence(sync, ring->vmid_wait);
  
  	fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 35bcb6dc1816..7f33ae87cb41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -193,6 +193,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
adev->rings[ring->idx] = ring;
ring->num_hw_submission = sched_hw_submission;
ring->sched_score = sched_score;
+   ring->vmid_wait = dma_fence_get_stub();
r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;


Re: [PATCH 06/10] drm/amdgpu: properly imbed the IBs into the job

2022-03-03 Thread Andrey Grodzovsky

Reviewed-by: Andrey Grodzovsky 

Andrey

On 2022-03-03 03:23, Christian König wrote:

We now have standard macros for that.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 6 --
  2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 38c9fd7b7ad4..e4ca62225996 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -78,14 +78,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 struct amdgpu_job **job, struct amdgpu_vm *vm)
  {
-   size_t size = sizeof(struct amdgpu_job);
-
if (num_ibs == 0)
return -EINVAL;
  
-	size += sizeof(struct amdgpu_ib) * num_ibs;

-
-   *job = kzalloc(size, GFP_KERNEL);
+   *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
if (!*job)
return -ENOMEM;
  
@@ -95,7 +91,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,

 */
(*job)->base.sched = >rings[0]->sched;
(*job)->vm = vm;
-   (*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
  
  	amdgpu_sync_create(&(*job)->sync);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 6d704772ff42..d599c0540b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -25,6 +25,7 @@
  
  #include 

  #include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
  
  /* bit set means command submit involves a preamble IB */

  #define AMDGPU_PREAMBLE_IB_PRESENT  (1 << 0)
@@ -48,12 +49,10 @@ struct amdgpu_job {
struct amdgpu_vm*vm;
struct amdgpu_sync  sync;
struct amdgpu_sync  sched_sync;
-   struct amdgpu_ib*ibs;
struct dma_fencehw_fence;
struct dma_fence*external_hw_fence;
uint32_tpreamble_status;
uint32_tpreemption_status;
-   uint32_tnum_ibs;
boolvm_needs_flush;
uint64_tvm_pd_addr;
unsignedvmid;
@@ -69,6 +68,9 @@ struct amdgpu_job {
  
  	/* job_run_counter >= 1 means a resubmit job */

uint32_tjob_run_counter;
+
+   uint32_tnum_ibs;
+   struct amdgpu_ibibs[];
  };
  
  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,


[PATCH] drm/selftests: fix a shift-out-of-bounds bug

2022-03-03 Thread Arunpravin
pass the correct size value computed using the max_order.



[ 68.124177][ T1] UBSAN: shift-out-of-bounds in include/linux/log2.h:67:13
[ 68.125333][ T1] shift exponent 4294967295 is too large for 32-bit type 'long
unsigned int'
[ 68.126563][ T1] CPU: 0 PID: 1 Comm: swapper Not tainted
5.17.0-rc2-00311-g39ec47bbfd5d #2
[ 68.127758][ T1] Call Trace:
[ 68.128187][ T1] dump_stack_lvl (lib/dump_stack.c:108)
[ 68.128793][ T1] dump_stack (lib/dump_stack.c:114)
[ 68.129331][ T1] ubsan_epilogue (lib/ubsan.c:152)
[ 68.129958][ T1] __ubsan_handle_shift_out_of_bounds.cold 
(arch/x86/include/asm/smap.h:85)

[ 68.130791][ T1] ? drm_block_alloc+0x28/0x80
[ 68.131582][ T1] ? rcu_read_lock_sched_held (kernel/rcu/update.c:125)
[ 68.132215][ T1] ? kmem_cache_alloc (include/trace/events/kmem.h:54 
mm/slab.c:3501)
[ 68.132878][ T1] ? mark_free+0x2e/0x80
[ 68.133524][ T1] drm_buddy_init.cold (include/linux/log2.h:67
drivers/gpu/drm/drm_buddy.c:131)
[ 68.134145][ T1] ? test_drm_cmdline_init 
(drivers/gpu/drm/selftests/test-drm_buddy.c:87)

[ 68.134770][ T1] igt_buddy_alloc_limit 
(drivers/gpu/drm/selftests/test-drm_buddy.c:30)
[ 68.135472][ T1] ? vprintk_default (kernel/printk/printk.c:2257)
[ 68.136057][ T1] ? test_drm_cmdline_init 
(drivers/gpu/drm/selftests/test-drm_buddy.c:87)

[ 68.136812][ T1] test_drm_buddy_init 
(drivers/gpu/drm/selftests/drm_selftest.c:77
drivers/gpu/drm/selftests/test-drm_buddy.c:95)
[ 68.137475][ T1] do_one_initcall (init/main.c:1300)
[ 68.138111][ T1] ? parse_args (kernel/params.c:609 kernel/params.c:146
kernel/params.c:188)
[ 68.138717][ T1] do_basic_setup (init/main.c:1372 init/main.c:1389 
init/main.c:1408)
[ 68.139366][ T1] kernel_init_freeable (init/main.c:1617)
[ 68.140040][ T1] ? rest_init (init/main.c:1494)
[ 68.140634][ T1] kernel_init (init/main.c:1504)
[ 68.141155][ T1] ret_from_fork (arch/x86/entry/entry_32.S:772)
[ 68.141607][ T1]

[ 68.146730][ T1] [ cut here ]
[ 68.147460][ T1] kernel BUG at drivers/gpu/drm/drm_buddy.c:140!
[ 68.148280][ T1] invalid opcode:  [#1]
[ 68.148895][ T1] CPU: 0 PID: 1 Comm: swapper Not tainted
5.17.0-rc2-00311-g39ec47bbfd5d #2
[ 68.149896][ T1] EIP: drm_buddy_init (drivers/gpu/drm/drm_buddy.c:140 
(discriminator 1))

For more details: 
https://lists.01.org/hyperkitty/list/l...@lists.01.org/thread/FDIF3HCILZNN5UQAZMOR7E3MQSMHHKWU/

Signed-off-by: Arunpravin 
Reported-by: kernel test robot 
---
 drivers/gpu/drm/selftests/test-drm_buddy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/selftests/test-drm_buddy.c 
b/drivers/gpu/drm/selftests/test-drm_buddy.c
index fa997f89522b..913cbd7eae04 100644
--- a/drivers/gpu/drm/selftests/test-drm_buddy.c
+++ b/drivers/gpu/drm/selftests/test-drm_buddy.c
@@ -902,14 +902,13 @@ static int igt_buddy_alloc_range(void *arg)
 
 static int igt_buddy_alloc_limit(void *arg)
 {
-   u64 end, size = U64_MAX, start = 0;
+   u64 size = U64_MAX, start = 0;
struct drm_buddy_block *block;
unsigned long flags = 0;
LIST_HEAD(allocated);
struct drm_buddy mm;
int err;
 
-   size = end = round_down(size, 4096);
err = drm_buddy_init(, size, PAGE_SIZE);
if (err)
return err;
@@ -921,7 +920,8 @@ static int igt_buddy_alloc_limit(void *arg)
goto out_fini;
}
 
-   err = drm_buddy_alloc_blocks(, start, end, size,
+   size = mm.chunk_size << mm.max_order;
+   err = drm_buddy_alloc_blocks(, start, size, size,
 PAGE_SIZE, , flags);
 
if (unlikely(err))

base-commit: 6be340ee8f5beae574dae6f5e17a22e67beeff3e
-- 
2.25.1



Re: [PATCH 05/10] drm/amdgpu: use job and ib structures directly in CS parsers

2022-03-03 Thread Andrey Grodzovsky

Acked-by: Andrey Grodzovsky 

Andrey

On 2022-03-03 03:23, Christian König wrote:

Instead of providing the ib index provide the job and ib pointers directly to
the patch and parse functions for UVD and VCE.

Also move the set/get functions for IB values to the IB declerations.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |   6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h   |  13 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  23 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c  |  36 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h  |   4 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  | 116 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |   7 +-
  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  13 +--
  drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c|  25 ++---
  9 files changed, 129 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20bf6134baca..dd9e708fe97f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1024,12 +1024,14 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser 
*p)
memcpy(ib->ptr, kptr, job->ibs[i].length_dw * 4);
amdgpu_bo_kunmap(aobj);
  
-			r = amdgpu_ring_parse_cs(ring, p, i);

+   r = amdgpu_ring_parse_cs(ring, p, p->job,
+>job->ibs[i]);
if (r)
return r;
} else {
ib->ptr = (uint32_t *)kptr;
-   r = amdgpu_ring_patch_cs_in_place(ring, p, i);
+   r = amdgpu_ring_patch_cs_in_place(ring, p, p->job,
+ >job->ibs[i]);
amdgpu_bo_kunmap(aobj);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
index 30136eb50d2a..652b5593499f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -73,19 +73,6 @@ struct amdgpu_cs_parser {
struct amdgpu_cs_post_dep   *post_deps;
  };
  
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,

- uint32_t ib_idx, int idx)
-{
-   return p->job->ibs[ib_idx].ptr[idx];
-}
-
-static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
-  uint32_t ib_idx, int idx,
-  uint32_t value)
-{
-   p->job->ibs[ib_idx].ptr[idx] = value;
-}
-
  int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
   uint64_t addr, struct amdgpu_bo **bo,
   struct amdgpu_bo_va_mapping **mapping);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 05e789fc7a9e..a8bed1b47899 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -163,8 +163,12 @@ struct amdgpu_ring_funcs {
u64 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
-   int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-   int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+   int (*parse_cs)(struct amdgpu_cs_parser *p,
+   struct amdgpu_job *job,
+   struct amdgpu_ib *ib);
+   int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
+struct amdgpu_job *job,
+struct amdgpu_ib *ib);
/* constants to calculate how many DW are needed for an emit */
unsigned emit_frame_size;
unsigned emit_ib_size;
@@ -264,8 +268,8 @@ struct amdgpu_ring {
atomic_t*sched_score;
  };
  
-#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))

-#define amdgpu_ring_patch_cs_in_place(r, p, ib) 
((r)->funcs->patch_cs_in_place((p), (ib)))
+#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) 
((r)->funcs->patch_cs_in_place((p), (job), (ib)))
  #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
  #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
  #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
@@ -364,6 +368,17 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
  void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
  struct amdgpu_ring *ring);
  
+static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)

+{
+   return ib->ptr[idx];
+}
+
+static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
+

Re: [PATCH 02/10] drm/amdgpu: header cleanup

2022-03-03 Thread Andrey Grodzovsky

Acked-by: Andrey Grodzovsky 

Andrey

On 2022-03-03 03:23, Christian König wrote:

No function change, just move a bunch of definitions from amdgpu.h into
separate header files.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 95 ---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h| 93 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h   |  3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  | 35 ++-
  .../gpu/drm/amd/amdgpu/amdgpu_trace_points.c  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   |  1 +
  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c |  1 +
  drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c |  1 +
  10 files changed, 132 insertions(+), 100 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b89406b01694..7f447ed7a67f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -60,7 +60,6 @@
  #include 
  #include 
  #include 
-#include 
  
  #include 

  #include "dm_pp_interface.h"
@@ -276,9 +275,6 @@ extern int amdgpu_num_kcq;
  #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
  
  struct amdgpu_device;

-struct amdgpu_ib;
-struct amdgpu_cs_parser;
-struct amdgpu_job;
  struct amdgpu_irq_src;
  struct amdgpu_fpriv;
  struct amdgpu_bo_va_mapping;
@@ -465,20 +461,6 @@ struct amdgpu_flip_work {
  };
  
  
-/*

- * CP & rings.
- */
-
-struct amdgpu_ib {
-   struct amdgpu_sa_bo *sa_bo;
-   uint32_tlength_dw;
-   uint64_tgpu_addr;
-   uint32_t*ptr;
-   uint32_tflags;
-};
-
-extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-
  /*
   * file private structure
   */
@@ -494,79 +476,6 @@ struct amdgpu_fpriv {
  
  int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
  
-int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,

- unsigned size,
- enum amdgpu_ib_pool_type pool,
- struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
-   struct dma_fence *f);
-int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
-  struct amdgpu_ib *ibs, struct amdgpu_job *job,
-  struct dma_fence **f);
-int amdgpu_ib_pool_init(struct amdgpu_device *adev);
-void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
-int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
-/*
- * CS.
- */
-struct amdgpu_cs_chunk {
-   uint32_tchunk_id;
-   uint32_tlength_dw;
-   void*kdata;
-};
-
-struct amdgpu_cs_post_dep {
-   struct drm_syncobj *syncobj;
-   struct dma_fence_chain *chain;
-   u64 point;
-};
-
-struct amdgpu_cs_parser {
-   struct amdgpu_device*adev;
-   struct drm_file *filp;
-   struct amdgpu_ctx   *ctx;
-
-   /* chunks */
-   unsignednchunks;
-   struct amdgpu_cs_chunk  *chunks;
-
-   /* scheduler job object */
-   struct amdgpu_job   *job;
-   struct drm_sched_entity *entity;
-
-   /* buffer objects */
-   struct ww_acquire_ctx   ticket;
-   struct amdgpu_bo_list   *bo_list;
-   struct amdgpu_mn*mn;
-   struct amdgpu_bo_list_entry vm_pd;
-   struct list_headvalidated;
-   struct dma_fence*fence;
-   uint64_tbytes_moved_threshold;
-   uint64_tbytes_moved_vis_threshold;
-   uint64_tbytes_moved;
-   uint64_tbytes_moved_vis;
-
-   /* user fence */
-   struct amdgpu_bo_list_entry uf_entry;
-
-   unsignednum_post_deps;
-   struct amdgpu_cs_post_dep   *post_deps;
-};
-
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
- uint32_t ib_idx, int idx)
-{
-   return p->job->ibs[ib_idx].ptr[idx];
-}
-
-static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
-  uint32_t ib_idx, int idx,
-  uint32_t value)
-{
-   p->job->ibs[ib_idx].ptr[idx] = value;
-}
-
  /*
   * Writeback
   */
@@ -1425,10 +1334,6 @@ static inline int amdgpu_acpi_smart_shift_update(struct 
drm_device *dev,
 enum amdgpu_ss ss_state) { 
return 0; }
  #endif
  
-int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,

-  uint64_t addr, struct amdgpu_bo **bo,
-  struct 

Re: [PATCH 01/10] drm/amdgpu: install ctx entities with cmpxchg

2022-03-03 Thread Andrey Grodzovsky

Reviewed-by: Andrey Grodzovsky 

Andrey

On 2022-03-03 03:22, Christian König wrote:

Since we removed the context lock we need to make sure that not two threads
are trying to install an entity at the same time.

Signed-off-by: Christian König 
Fixes: e68efb27647f ("drm/amdgpu: remove ctx->lock")
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c1f8b0e37b93..72c5f1c53d6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -204,9 +204,15 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, 
u32 hw_ip,
if (r)
goto error_free_entity;
  
-	ctx->entities[hw_ip][ring] = entity;

+   /* It's not an error if we fail to install the new entity */
+   if (cmpxchg(>entities[hw_ip][ring], NULL, entity))
+   goto cleanup_entity;
+
return 0;
  
+cleanup_entity:

+   drm_sched_entity_fini(>entity);
+
  error_free_entity:
kfree(entity);
  


Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c

2022-03-03 Thread Harry Wentland



On 2022-03-03 12:25, Gustavo A. R. Silva wrote:
> Fix the following Wstringop-overflow warnings when building with GCC-11:
> 
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:1491:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]
> 
> by removing the over-specified array size from the argument declarations.
> 
> This helps with the ongoing efforts to globally enable
> -Wstringop-overflow.
> 
> Link: https://github.com/KSPP/linux/issues/181>> Signed-off-by: Gustavo A. R. 
> Silva 

Acked-by: Harry Wentland 

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 ++--
>  drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h  | 4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
> b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
> index f11a8d97fb60..81952e07acf6 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
> @@ -823,7 +823,7 @@ bool dp_is_interlane_aligned(union 
> lane_align_status_updated align_status)
>  void dp_hw_to_dpcd_lane_settings(
>   const struct link_training_settings *lt_settings,
>   const struct dc_lane_settings 
> hw_lane_settings[LANE_COUNT_DP_MAX],
> - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
> + union dpcd_training_lane dpcd_lane_settings[])
>  {
>   uint8_t lane = 0;
>  
> @@ -853,7 +853,7 @@ void dp_decide_lane_settings(
>   const struct link_training_settings *lt_settings,
>   const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
>   struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
> - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
> + union dpcd_training_lane dpcd_lane_settings[])
>  {
>   uint32_t lane;
>  
> diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h 
> b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
> index ab9939db8cea..0d00da1640a7 100644
> --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
> +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
> @@ -147,12 +147,12 @@ bool dp_is_max_vs_reached(
>  void dp_hw_to_dpcd_lane_settings(
>   const struct link_training_settings *lt_settings,
>   const struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
> - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
> + union dpcd_training_lane dpcd_lane_settings[]);
>  void dp_decide_lane_settings(
>   const struct link_training_settings *lt_settings,
>   const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
>   struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
> - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
> + union dpcd_training_lane dpcd_lane_settings[]);
>  
>  uint32_t dp_translate_training_aux_read_interval(uint32_t 
> dpcd_aux_read_interval);
>  



Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c

2022-03-03 Thread Gustavo A. R. Silva
On Thu, Mar 03, 2022 at 09:43:28AM -0800, Kees Cook wrote:
> On Thu, Mar 03, 2022 at 11:25:03AM -0600, Gustavo A. R. Silva wrote:
> > Fix the following Wstringop-overflow warnings when building with GCC-11:
> > 
> > drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: 
> > warning: ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> > [-Wstringop-overflow=]
> 
> Can you "show your work" a little more here? I don't actually see the
> what is getting fixed:
> 
> enum dc_lane_count {
>   ...
> LANE_COUNT_FOUR = 4,
>   ...
> LANE_COUNT_DP_MAX = LANE_COUNT_FOUR
> };
> 
> struct link_training_settings {
>   ...
> union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX];
> };
> 
> void dp_hw_to_dpcd_lane_settings(
>   ...
>   union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
> {
>   ...
> }
> 
> static enum link_training_result dpia_training_cr_transparent(
>   ...
> struct link_training_settings *lt_settings)
> {
>   ...
> dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
> lt_settings->hw_lane_settings, 
> lt_settings->dpcd_lane_settings);
>   ...
> }
> 
> Everything looks to be the correct size?

Yep; this fix is similar to the one for intel_pm.c in this

commit e7c6e405e171fb33990a12ecfd14e6500d9e5cf2

where the array size of 8 seems to be fine for all the
struct members related (pri_latency, spr_latency, cur_latency
and skl_latency):

drivers/gpu/drm/i915/i915_drv.h:465:struct drm_i915_private {
...

drivers/gpu/drm/i915/i915_drv.h-739-struct {

...
drivers/gpu/drm/i915/i915_drv.h-745-/* primary */
drivers/gpu/drm/i915/i915_drv.h-746-u16 pri_latency[5];
drivers/gpu/drm/i915/i915_drv.h-747-/* sprite */
drivers/gpu/drm/i915/i915_drv.h-748-u16 spr_latency[5];
drivers/gpu/drm/i915/i915_drv.h-749-/* cursor */
drivers/gpu/drm/i915/i915_drv.h-750-u16 cur_latency[5];
drivers/gpu/drm/i915/i915_drv.h-751-/*
drivers/gpu/drm/i915/i915_drv.h-752- * Raw watermark memory latency 
values
drivers/gpu/drm/i915/i915_drv.h-753- * for SKL for all 8 levels
drivers/gpu/drm/i915/i915_drv.h-754- * in 1us units.
drivers/gpu/drm/i915/i915_drv.h-755- */
drivers/gpu/drm/i915/i915_drv.h-756-u16 skl_latency[8];

...
drivers/gpu/drm/i915/i915_drv.h-773-} wm;
...
}

however GCC warns about accessing bytes beyond the limits, and turning the
argument declarations into pointers (removing the over-specified array
size from the argument declaration) silence the warnings.

--
Gustavo


Re: [PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c

2022-03-03 Thread Kees Cook
On Thu, Mar 03, 2022 at 11:25:03AM -0600, Gustavo A. R. Silva wrote:
> Fix the following Wstringop-overflow warnings when building with GCC-11:
> 
> drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: 
> ‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
> [-Wstringop-overflow=]

Can you "show your work" a little more here? I don't actually see the
what is getting fixed:

enum dc_lane_count {
...
LANE_COUNT_FOUR = 4,
...
LANE_COUNT_DP_MAX = LANE_COUNT_FOUR
};

struct link_training_settings {
...
union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX];
};

void dp_hw_to_dpcd_lane_settings(
...
union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
{
...
}

static enum link_training_result dpia_training_cr_transparent(
...
struct link_training_settings *lt_settings)
{
...
dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
lt_settings->hw_lane_settings, 
lt_settings->dpcd_lane_settings);
...
}

Everything looks to be the correct size?

-- 
Kees Cook


[PATCH][next] drm/amd/display: Fix Wstringop-overflow warnings in dc_link_dp.c

2022-03-03 Thread Gustavo A. R. Silva
Fix the following Wstringop-overflow warnings when building with GCC-11:

drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:493:17: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dpia.c:388:17: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:1491:17: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]
drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link_dp.c:2613:25: warning: 
‘dp_decide_lane_settings’ accessing 4 bytes in a region of size 1 
[-Wstringop-overflow=]

by removing the over-specified array size from the argument declarations.

This helps with the ongoing efforts to globally enable
-Wstringop-overflow.

Link: https://github.com/KSPP/linux/issues/181
Signed-off-by: Gustavo A. R. Silva 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 ++--
 drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index f11a8d97fb60..81952e07acf6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -823,7 +823,7 @@ bool dp_is_interlane_aligned(union 
lane_align_status_updated align_status)
 void dp_hw_to_dpcd_lane_settings(
const struct link_training_settings *lt_settings,
const struct dc_lane_settings 
hw_lane_settings[LANE_COUNT_DP_MAX],
-   union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
+   union dpcd_training_lane dpcd_lane_settings[])
 {
uint8_t lane = 0;
 
@@ -853,7 +853,7 @@ void dp_decide_lane_settings(
const struct link_training_settings *lt_settings,
const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-   union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
+   union dpcd_training_lane dpcd_lane_settings[])
 {
uint32_t lane;
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h 
b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index ab9939db8cea..0d00da1640a7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -147,12 +147,12 @@ bool dp_is_max_vs_reached(
 void dp_hw_to_dpcd_lane_settings(
const struct link_training_settings *lt_settings,
const struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-   union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
+   union dpcd_training_lane dpcd_lane_settings[]);
 void dp_decide_lane_settings(
const struct link_training_settings *lt_settings,
const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
-   union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
+   union dpcd_training_lane dpcd_lane_settings[]);
 
 uint32_t dp_translate_training_aux_read_interval(uint32_t 
dpcd_aux_read_interval);
 
-- 
2.27.0



Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready

2022-03-03 Thread Andrey Grodzovsky

I pushed all the changes including your patch.

Andrey

On 2022-03-02 22:16, Andrey Grodzovsky wrote:

OK, i will do quick smoke test tomorrow and push all of it it then.

Andrey

On 2022-03-02 21:59, Chen, JingWen wrote:

Hi Andrey,

I don't have the bare mental environment, I can only test the SRIOV 
cases.


Best Regards,
JingWen Chen



On Mar 3, 2022, at 01:55, Grodzovsky, Andrey 
 wrote:


The patch is acked-by: Andrey Grodzovsky 

If you also smoked tested bare metal feel free to apply all the 
patches, if no let me know.


Andrey

On 2022-03-02 04:51, JingWen Chen wrote:

Hi Andrey,

Most part of the patches are OK, but the code will introduce a ib 
test fail on the disabled vcn of sienna_cichlid.


In SRIOV use case we will disable one vcn on sienna_cichlid, I have 
attached a patch to fix this issue, please check the attachment.


Best Regards,

Jingwen Chen


On 2/26/22 5:22 AM, Andrey Grodzovsky wrote:
Hey, patches attached - i applied the patches and resolved merge 
conflicts but weren't able to test as my on board's network card 
doesn't work with 5.16 kernel (it does with 5.17, maybe it's 
Kconfig issue and i need to check more).
The patches are on top of 'cababde192b2 Yifan Zhang 2 days 
ago drm/amd/pm: fix mode2 reset fail for smu 13.0.5 ' commit.


Please test and let me know. Maybe by Monday I will be able to 
resolve the connectivity issue on 5.16.


Andrey

On 2022-02-24 22:13, JingWen Chen wrote:

Hi Andrey,

Sorry for the misleading, I mean the whole patch series. We are 
depending on this patch series to fix the concurrency issue 
within SRIOV TDR sequence.




On 2/25/22 1:26 AM, Andrey Grodzovsky wrote:

No problem if so but before I do,


JingWen - why you think this patch is needed as a standalone now 
? It has no use without the
entire feature together with it. Is it some changes you want to 
do on top of that code ?



Andrey


On 2022-02-24 12:12, Deucher, Alexander wrote:

[Public]


If it applies cleanly, feel free to drop it in. I'll drop those 
patches for drm-next since they are already in drm-misc.


Alex

 

*From:* amd-gfx  on 
behalf of Andrey Grodzovsky 

*Sent:* Thursday, February 24, 2022 11:24 AM
*To:* Chen, JingWen ; Christian König 
; 
dri-de...@lists.freedesktop.org 
; 
amd-gfx@lists.freedesktop.org 
*Cc:* Liu, Monk ; Chen, Horace 
; Lazar, Lijo ; 
Koenig, Christian ; dan...@ffwll.ch 

*Subject:* Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init 
to after XGMI is ready

No because all the patch-set including this patch was landed into
drm-misc-next and will reach amd-staging-drm-next on the next 
upstream

rebase i guess.

Andrey

On 2022-02-24 01:47, JingWen Chen wrote:

Hi Andrey,

Will you port this patch into amd-staging-drm-next?

on 2/10/22 2:06 AM, Andrey Grodzovsky wrote:

All comments are fixed and code pushed. Thanks for everyone
who helped reviewing.

Andrey

On 2022-02-09 02:53, Christian König wrote:

Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:

Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.

Signed-off-by: Andrey Grodzovsky 
One more comment below, with that fixed Reviewed-by: 
Christian König .



---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 
++
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 
++--

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +
  3 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 9704b0e1fd82..00123b0013d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2287,6 +2287,47 @@ static int 
amdgpu_device_fw_loading(struct amdgpu_device *adev)

  return r;
  }
  +static int amdgpu_device_init_schedulers(struct 
amdgpu_device *adev)

+{
+    long timeout;
+    int r, i;
+
+    for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+    struct amdgpu_ring *ring = adev->rings[i];
+
+    /* No need to setup the GPU scheduler for rings 
that don't need it */

+    if (!ring || ring->no_scheduler)
+    continue;
+
+    switch (ring->funcs->type) {
+    case AMDGPU_RING_TYPE_GFX:
+    timeout = adev->gfx_timeout;
+    break;
+    case AMDGPU_RING_TYPE_COMPUTE:
+    timeout = adev->compute_timeout;
+    break;
+    case AMDGPU_RING_TYPE_SDMA:
+    timeout = adev->sdma_timeout;
+    break;
+    default:
+    timeout = adev->video_timeout;
+    break;
+    }
+
+    r = drm_sched_init(>sched, _sched_ops,
+ ring->num_hw_submission, amdgpu_job_hang_limit,
+   timeout, adev->reset_domain.wq, 
ring->sched_score, ring->name);

+ 

RE: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran

2022-03-03 Thread Liu, Shaoyun
[AMD Official Use Only]

Reviewed by : Shaoyun.liu 

-Original Message-
From: amd-gfx  On Behalf Of David Yu
Sent: Thursday, March 3, 2022 11:25 AM
To: amd-gfx@lists.freedesktop.org
Cc: Yu, David 
Subject: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran

Add DFC CAP support for aldebaran

Initialize cap microcode in psp_init_sriov_microcode,  the ta microcode will be 
 initialized in psp_vxx_init_microcode

Signed-off-by: David Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-  
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 94bfe502b55e..3ce1d38a7822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
ret = psp_init_cap_microcode(psp, "sienna_cichlid");
break;
case IP_VERSION(13, 0, 2):
-   ret = psp_init_ta_microcode(psp, "aldebaran");
+   ret = psp_init_cap_microcode(psp, "aldebaran");
break;
default:
BUG();
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 2c6070b90dcf..024f60631faf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -31,6 +31,7 @@
 
 MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
--
2.25.1


[PATCH] drm/amdgpu: Add DFC CAP support for aldebaran

2022-03-03 Thread David Yu
Add DFC CAP support for aldebaran

Initialize cap microcode in psp_init_sriov_microcode,  the ta microcode will be 
 initialized in psp_vxx_init_microcode

Signed-off-by: David Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 94bfe502b55e..3ce1d38a7822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
ret = psp_init_cap_microcode(psp, "sienna_cichlid");
break;
case IP_VERSION(13, 0, 2):
-   ret = psp_init_ta_microcode(psp, "aldebaran");
+   ret = psp_init_cap_microcode(psp, "aldebaran");
break;
default:
BUG();
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 2c6070b90dcf..024f60631faf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -31,6 +31,7 @@
 
 MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
-- 
2.25.1



Re: [PATCH 2/2] drm/amdkfd: implement get_atc_vmid_pasid_mapping_info for gfx10.3

2022-03-03 Thread Deucher, Alexander
[Public]

Series is:
Reviewed-by: Alex Deucher 

From: amd-gfx  on behalf of Yifan Zhang 

Sent: Thursday, March 3, 2022 3:05 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Zhang, Yifan ; Kuehling, Felix 

Subject: [PATCH 2/2] drm/amdkfd: implement get_atc_vmid_pasid_mapping_info for 
gfx10.3

This patch implements get_atc_vmid_pasid_mapping_info for gfx10.3

Signed-off-by: Yifan Zhang 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index e9c80ce13f3e..ba21ec6b35e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -26,6 +26,8 @@
 #include "gc/gc_10_3_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
 #include "oss/osssys_5_0_0_sh_mask.h"
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
 #include "soc15_common.h"
 #include "v10_structs.h"
 #include "nv.h"
@@ -606,6 +608,18 @@ static int wave_control_execute_v10_3(struct amdgpu_device 
*adev,
 return 0;
 }

+static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
+   uint8_t vmid, uint16_t *p_pasid)
+{
+   uint32_t value;
+
+   value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
++ vmid);
+   *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+   return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
 uint32_t vmid, uint64_t page_table_base)
 {
@@ -788,7 +802,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
 .hqd_destroy = hqd_destroy_v10_3,
 .hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
 .wave_control_execute = wave_control_execute_v10_3,
-   .get_atc_vmid_pasid_mapping_info = NULL,
+   .get_atc_vmid_pasid_mapping_info = 
get_atc_vmid_pasid_mapping_info_v10_3,
 .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
 .program_trap_handler_settings = program_trap_handler_settings_v10_3,
 #if 0
--
2.25.1



Re: [PATCH 1/1] drm/amdkfd: Improve concurrency of event handling

2022-03-03 Thread Felix Kuehling



Am 2022-03-03 um 02:25 schrieb Christian König:

Am 02.03.22 um 21:06 schrieb Felix Kuehling:

Use rcu_read_lock to read p->event_idr concurrently with other readers
and writers. Use p->event_mutex only for creating and destroying events
and in kfd_wait_on_events.


That might not necessary work as you expected. rcu_read_lock() does 
not provide barriers for the CPU cache.



Protect the contents of the kfd_event structure with a per-event
spinlock that can be taken inside the rcu_read_lock critical section.


That helps to prevent problems for the ev structure, but it doesn't 
protect the idr itself.


idr_find can be used safely under rcu_read_lock according to this 
comment in idr.h:


 * idr_find() is able to be called locklessly, using RCU. The caller must
 * ensure calls to this function are made within rcu_read_lock() regions.
 * Other readers (lock-free or otherwise) and modifications may be running
 * concurrently.
 *
 * It is still required that the caller manage the synchronization and
 * lifetimes of the items. So if RCU lock-free lookups are used, typically
 * this would mean that the items have their own locks, or are amenable to
 * lock-free access; and that the items are freed by RCU (or only freed after
 * having been deleted from the idr tree *and* a synchronize_rcu() grace
 * period).

It was introduced by f9c46d6ea5ce ("idr: make idr_find rcu-safe") in 2008.




BTW: Why are you using an idr in the first place? I don't see the 
lookup used anywhere.


lookup_event_by_id is just a wrapper around idr_find. It's used in 
kfd_event_destroy, kfd_set_event, kfd_reset_event and 
kfd_wait_on_events. lookup_signaled_event_by_partial_id also uses 
idr_find. It's used in kfd_signal_event_interrupt.


Regards,
  Felix




Regards,
Christian.



This eliminates contention of p->event_mutex in set_event, which tends
to be on the critical path for dispatch latency even when busy waiting
is used. It also eliminates lock contention in event interrupt handlers.
Since the p->event_mutex is now used much less, the impact of requiring
it in kfd_wait_on_events should also be much smaller.

This should improve event handling latency for processes using multiple
GPUs concurrently.

Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdkfd/kfd_events.c | 119 +++-
  drivers/gpu/drm/amd/amdkfd/kfd_events.h |   1 +
  2 files changed, 78 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c

index deecccebe5b6..1726306715b2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -128,8 +128,8 @@ static int 
allocate_event_notification_slot(struct kfd_process *p,

  }
    /*
- * Assumes that p->event_mutex is held and of course that p is not 
going

- * away (current or locked).
+ * Assumes that p->event_mutex or rcu_readlock is held and of course 
that p is

+ * not going away.
   */
  static struct kfd_event *lookup_event_by_id(struct kfd_process *p, 
uint32_t id)

  {
@@ -251,15 +251,18 @@ static void destroy_event(struct kfd_process 
*p, struct kfd_event *ev)

  struct kfd_event_waiter *waiter;
    /* Wake up pending waiters. They will return failure */
+    spin_lock(>lock);
  list_for_each_entry(waiter, >wq.head, wait.entry)
-    waiter->event = NULL;
+    WRITE_ONCE(waiter->event, NULL);
  wake_up_all(>wq);
+    spin_unlock(>lock);
    if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
  ev->type == KFD_EVENT_TYPE_DEBUG)
  p->signal_event_count--;
    idr_remove(>event_idr, ev->event_id);
+    synchronize_rcu();
  kfree(ev);
  }
  @@ -392,6 +395,7 @@ int kfd_event_create(struct file *devkfd, 
struct kfd_process *p,

  ev->auto_reset = auto_reset;
  ev->signaled = false;
  +    spin_lock_init(>lock);
  init_waitqueue_head(>wq);
    *event_page_offset = 0;
@@ -466,6 +470,7 @@ int kfd_criu_restore_event(struct file *devkfd,
  ev->auto_reset = ev_priv->auto_reset;
  ev->signaled = ev_priv->signaled;
  +    spin_lock_init(>lock);
  init_waitqueue_head(>wq);
    mutex_lock(>event_mutex);
@@ -609,13 +614,13 @@ static void set_event(struct kfd_event *ev)
    /* Auto reset if the list is non-empty and we're waking
   * someone. waitqueue_active is safe here because we're
- * protected by the p->event_mutex, which is also held when
+ * protected by the ev->lock, which is also held when
   * updating the wait queues in kfd_wait_on_events.
   */
  ev->signaled = !ev->auto_reset || !waitqueue_active(>wq);
    list_for_each_entry(waiter, >wq.head, wait.entry)
-    waiter->activated = true;
+    WRITE_ONCE(waiter->activated, true);
    wake_up_all(>wq);
  }
@@ -626,16 +631,18 @@ int kfd_set_event(struct kfd_process *p, 
uint32_t event_id)

  int ret = 0;
  struct kfd_event *ev;
  -    mutex_lock(>event_mutex);
+    rcu_read_lock();
    ev = 

RE: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran

2022-03-03 Thread Liu, Shaoyun
[AMD Official Use Only]

Probably just described as follows : 
Initialize cap microcode in psp_init_sriov_microcode,  the ta microcode will be 
 initialized in psp_vxx_init_microcode


-Original Message-
From: amd-gfx  On Behalf Of David Yu
Sent: Thursday, March 3, 2022 9:10 AM
To: amd-gfx@lists.freedesktop.org
Cc: Yu, David 
Subject: [PATCH] drm/amdgpu: Add DFC CAP support for aldebaran

Add DFC CAP support for aldebaran

Changed incorrect call to psp_init_ta_microcode in psp_init_sriov_microcode to 
psp_init_cap_microcode which caused it to fail even with correct CAP firmware.

Signed-off-by: David Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-  
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 94bfe502b55e..3ce1d38a7822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
ret = psp_init_cap_microcode(psp, "sienna_cichlid");
break;
case IP_VERSION(13, 0, 2):
-   ret = psp_init_ta_microcode(psp, "aldebaran");
+   ret = psp_init_cap_microcode(psp, "aldebaran");
break;
default:
BUG();
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 2c6070b90dcf..024f60631faf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -31,6 +31,7 @@
 
 MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
--
2.25.1


RE: [PATCH 1/2] drm/amdgpu/vcn: Update fw shared data structure

2022-03-03 Thread Liu, Leo
[AMD Official Use Only]

The series are:
Reviewed-by: Leo Liu 

-Original Message-
From: Dong, Ruijing 
Sent: March 2, 2022 4:25 PM
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Leo ; Deucher, Alexander 
Subject: [PATCH 1/2] drm/amdgpu/vcn: Update fw shared data structure

Add fw log in fw shared data structure.

Signed-off-by: Ruijing Dong 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 14 -  
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 26 +++--
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c   | 18 -
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c   | 18 -
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   | 20 +--
 5 files changed, 61 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 5e0dbf54d561..6f3f55e39ab1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -79,6 +79,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
+   unsigned int fw_shared_size;
int i, r;

INIT_DELAYED_WORK(>vcn.idle_work, amdgpu_vcn_idle_work_handler); 
@@ -226,7 +227,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += 
AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
-   bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
+   fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
+   bo_size += fw_shared_size;

for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i)) @@ -240,10 +242,12 @@ 
int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return r;
}

-   adev->vcn.inst[i].fw_shared_cpu_addr = 
adev->vcn.inst[i].cpu_addr +
-   bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct 
amdgpu_fw_shared));
-   adev->vcn.inst[i].fw_shared_gpu_addr = 
adev->vcn.inst[i].gpu_addr +
-   bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct 
amdgpu_fw_shared));
+   adev->vcn.inst[i].fw_shared.cpu_addr = 
adev->vcn.inst[i].cpu_addr +
+   bo_size - fw_shared_size;
+   adev->vcn.inst[i].fw_shared.gpu_addr = 
adev->vcn.inst[i].gpu_addr +
+   bo_size - fw_shared_size;
+
+   adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;

if (adev->vcn.indirect_sram) {
r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, 
PAGE_SIZE, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 5d3728b027d3..f6569a7d6fdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -205,6 +205,13 @@ struct amdgpu_vcn_reg{
unsignedscratch9;
 };

+struct amdgpu_vcn_fw_shared {
+   void*cpu_addr;
+   uint64_tgpu_addr;
+   uint32_tmem_size;
+   uint32_tlog_offset;
+};
+
 struct amdgpu_vcn_inst {
struct amdgpu_bo*vcpu_bo;
void*cpu_addr;
@@ -221,8 +228,7 @@ struct amdgpu_vcn_inst {
uint64_tdpg_sram_gpu_addr;
uint32_t*dpg_sram_curr_addr;
atomic_tdpg_enc_submission_cnt;
-   void*fw_shared_cpu_addr;
-   uint64_tfw_shared_gpu_addr;
+   struct amdgpu_vcn_fw_shared fw_shared;
 };

 struct amdgpu_vcn {
@@ -265,6 +271,13 @@ struct amdgpu_fw_shared_sw_ring {
uint8_t padding[3];
 };

+struct amdgpu_fw_shared_fw_logging {
+   uint8_t is_enabled;
+   uint32_t addr_lo;
+   uint32_t addr_hi;
+   uint32_t size;
+};
+
 struct amdgpu_fw_shared {
uint32_t present_flag_0;
uint8_t pad[44];
@@ -272,6 +285,15 @@ struct amdgpu_fw_shared {
uint8_t pad1[1];
struct amdgpu_fw_shared_multi_queue multi_queue;
struct amdgpu_fw_shared_sw_ring sw_ring;
+   struct amdgpu_fw_shared_fw_logging fw_log; };
+
+struct amdgpu_vcn_fwlog {
+   uint32_t rptr;
+   uint32_t wptr;
+   uint32_t buffer_size;
+   uint32_t header_size;
+   uint8_t wrapped;
 };

 struct amdgpu_vcn_decode_buffer {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 313fc1b53999..36ec877a2a55 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -172,7 +172,7 @@ static int vcn_v2_0_sw_init(void *handle)
if (r)
return r;

-   fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
+   fw_shared = 

[PATCH] drm/amdgpu: Add DFC CAP support for aldebaran

2022-03-03 Thread David Yu
Add DFC CAP support for aldebaran

Changed incorrect call to psp_init_ta_microcode in psp_init_sriov_microcode to 
psp_init_cap_microcode which caused it to fail even with correct CAP firmware.

Signed-off-by: David Yu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 94bfe502b55e..3ce1d38a7822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -277,7 +277,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
ret = psp_init_cap_microcode(psp, "sienna_cichlid");
break;
case IP_VERSION(13, 0, 2):
-   ret = psp_init_ta_microcode(psp, "aldebaran");
+   ret = psp_init_cap_microcode(psp, "aldebaran");
break;
default:
BUG();
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 2c6070b90dcf..024f60631faf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -31,6 +31,7 @@
 
 MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin");
 MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin");
-- 
2.25.1



RE: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread David Laight
From: Xiaomeng Tong
> Sent: 03 March 2022 07:27
> 
> On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote:
> > on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote:
> > > The problem is the mis-use of iterator outside the loop on exit, and
> > > the iterator will be the HEAD's container_of pointer which pointers
> > > to a type-confused struct. Sidenote: The *mis-use* here refers to
> > > mistakely access to other members of the struct, instead of the
> > > list_head member which acutally is the valid HEAD.
> >
> > The problem is that the HEAD's container_of pointer should never
> > be calculated at all.
> > This is what is fundamentally broken about the current definition.
> 
> Yes, the rule is "the HEAD's container_of pointer should never be
> calculated at all outside the loop", but how do you make sure everyone
> follows this rule?
> Everyone makes mistakes, but we can eliminate them all from the beginning
> with the help of compiler which can catch such use-after-loop things.
> 
> > > IOW, you would dereference a (NULL + offset_of_member) address here.
> >
> >Where?
> 
> In the case where a developer do not follows the above rule, and mistakely
> access a non-list-head member of the HEAD's container_of pointer outside
> the loop. For example:
> struct req{
>   int a;
>   struct list_head h;
> }
> struct req *r;
> list_for_each_entry(r, HEAD, h) {
>   if (r->a == 0x10)
> break;
> }
> // the developer made a mistake: he didn't take this situation into
> // account where all entries in the list are *r->a != 0x10*, and now
> // the r is the HEAD's container_of pointer.
> r->a = 0x20;
> Thus the "r->a = 0x20" would dereference a (NULL + offset_of_member)
> address here.

That is just a bug.
No different to failing to check anything else might 'return'
a NULL pointer.
Because it is a NULL dereference you find out pretty quickly.
The existing loop leaves you with a valid pointer to something
that isn't a list item.

> > > Please remind me if i missed something, thanks.
> > >
> > > Can you share your "alternative definitions" details? thanks!
> >
> > The loop should probably use as extra variable that points
> > to the 'list node' in the next structure.
> > Something like:
> > for (xxx *iter = head->next;
> > iter ==  ? ((item = NULL),0) : ((item = 
> > list_item(iter),1));
> > iter = item->member->next) {
> >...
> > With a bit of casting you can use 'item' to hold 'iter'.
> 
> you still can not make sure everyone follows this rule:
> "do not use iterator outside the loop" without the help of compiler,
> because item is declared outside the loop.

That one has 'iter' defined in the loop.

> BTW, to avoid ambiguity,the "alternative definitions" here i asked is
> something from you in this context:
> "OTOH there may be alternative definitions that can be used to get
> the compiler (or other compiler-like tools) to detect broken code.
> Even if the definition can't possibly generate a working kerrnel."

I was thinking of something like:
if ((pos = list_first)), 1) pos = NULL else
so that unchecked dereferences after the loop will be detectable
as NULL pointer offsets - but that in itself isn't enough to avoid
other warnings.

> > > The "list_for_each_entry_inside(pos, type, head, member)" way makes
> > > the iterator invisiable outside the loop, and would be catched by
> > > compiler if use-after-loop things happened.
> 
> > It is also a compete PITA for anything doing a search.
> 
> You mean it would be a burden on search? can you show me some examples?

The whole business of having to save the pointer to the located item
before breaking the loop, remembering to have set it to NULL earlier etc.

It is so much better if you can just do:
if (found)
break;

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)


RE: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Xiaomeng Tong
> From: Xiaomeng Tong
> > Sent: 03 March 2022 07:27
> > 
> > On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote:
> > > on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote:
> > > > The problem is the mis-use of iterator outside the loop on exit, and
> > > > the iterator will be the HEAD's container_of pointer which pointers
> > > > to a type-confused struct. Sidenote: The *mis-use* here refers to
> > > > mistakely access to other members of the struct, instead of the
> > > > list_head member which acutally is the valid HEAD.
> > >
> > > The problem is that the HEAD's container_of pointer should never
> > > be calculated at all.
> > > This is what is fundamentally broken about the current definition.
> > 
> > Yes, the rule is "the HEAD's container_of pointer should never be
> > calculated at all outside the loop", but how do you make sure everyone
> > follows this rule?
> > Everyone makes mistakes, but we can eliminate them all from the beginning
> > with the help of compiler which can catch such use-after-loop things.
> > 
> > > > IOW, you would dereference a (NULL + offset_of_member) address here.
> > >
> > >Where?
> > 
> > In the case where a developer do not follows the above rule, and mistakely
> > access a non-list-head member of the HEAD's container_of pointer outside
> > the loop. For example:
> > struct req{
> >   int a;
> >   struct list_head h;
> > }
> > struct req *r;
> > list_for_each_entry(r, HEAD, h) {
> >   if (r->a == 0x10)
> > break;
> > }
> > // the developer made a mistake: he didn't take this situation into
> > // account where all entries in the list are *r->a != 0x10*, and now
> > // the r is the HEAD's container_of pointer.
> > r->a = 0x20;
> > Thus the "r->a = 0x20" would dereference a (NULL + offset_of_member)
> > address here.
> 
> That is just a bug.
> No different to failing to check anything else might 'return'
> a NULL pointer.

Yes, but it‘s a mistake everyone has made and will make, we should avoid
this at the beginning with the help of compiler.

> Because it is a NULL dereference you find out pretty quickly.

AFAIK,NULL dereference is a undefined bahavior, can compiler quickly
catch it? Or it can only be applied to some simple/restricted cases.

> The existing loop leaves you with a valid pointer to something
> that isn't a list item.
> 
> > > > Please remind me if i missed something, thanks.
> > > >
> > > > Can you share your "alternative definitions" details? thanks!
> > >
> > > The loop should probably use as extra variable that points
> > > to the 'list node' in the next structure.
> > > Something like:
> > >   for (xxx *iter = head->next;
> > >   iter ==  ? ((item = NULL),0) : ((item = 
> > > list_item(iter),1));
> > >   iter = item->member->next) {
> > >  ...
> > > With a bit of casting you can use 'item' to hold 'iter'.
> > 
> > you still can not make sure everyone follows this rule:
> > "do not use iterator outside the loop" without the help of compiler,
> > because item is declared outside the loop.
> 
> That one has 'iter' defined in the loop.

Oh, sorry, I misunderstood. Then this is the same way with my way of
list_for_each_entry_inside(pos, type, head, member), which declare
the iterator inside the loop.
You go further and make things like ">member == (head)" goes away
to avoid calculate the HEAD's container_of pointer, although the
calculation is harmless.

> 
> > BTW, to avoid ambiguity,the "alternative definitions" here i asked is
> > something from you in this context:
> > "OTOH there may be alternative definitions that can be used to get
> > the compiler (or other compiler-like tools) to detect broken code.
> > Even if the definition can't possibly generate a working kerrnel."
> 
> I was thinking of something like:
>   if ((pos = list_first)), 1) pos = NULL else
> so that unchecked dereferences after the loop will be detectable
> as NULL pointer offsets - but that in itself isn't enough to avoid
> other warnings.
> 

Do you mean put this right after the loop (I changed somthing that i
do not understand, please correct me if i am worng, thanks):
   if (pos == list_first) pos = NULL; else
and compiler can detect the following NULL derefernce?
But if the NULL derefernce is just right after the loop originally,
it will be masked by the *else* keyword。

> > > > The "list_for_each_entry_inside(pos, type, head, member)" way makes
> > > > the iterator invisiable outside the loop, and would be catched by
> > > > compiler if use-after-loop things happened.
> > 
> > > It is also a compete PITA for anything doing a search.
> > 
> > You mean it would be a burden on search? can you show me some examples?
> 
> The whole business of having to save the pointer to the located item
> before breaking the loop, remembering to have set it to NULL earlier etc.

Ok, i see. And then you need pass a "item" to the list_for_each_entry macro
as a new argument.

> 
> It is so much better if you can 

Re: [Kgdb-bugreport] [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Daniel Thompson
On Thu, Mar 03, 2022 at 03:26:57PM +0800, Xiaomeng Tong wrote:
> On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote:
> > on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote:
> > > The problem is the mis-use of iterator outside the loop on exit, and
> > > the iterator will be the HEAD's container_of pointer which pointers
> > > to a type-confused struct. Sidenote: The *mis-use* here refers to
> > > mistakely access to other members of the struct, instead of the
> > > list_head member which acutally is the valid HEAD.
> >
> > The problem is that the HEAD's container_of pointer should never
> > be calculated at all.
> > This is what is fundamentally broken about the current definition.
> 
> Yes, the rule is "the HEAD's container_of pointer should never be
> calculated at all outside the loop", but how do you make sure everyone
> follows this rule?

Your formulation of the rule is correct: never run container_of() on HEAD
pointer.

However the rule that is introduced by list_for_each_entry_inside() is
*not* this rule. The rule it introduces is: never access the iterator
variable outside the loop.

Making the iterator NULL on loop exit does follow the rule you proposed
but using a different technique: do not allow HEAD to be stored in the
iterator variable after loop exit. This also makes it impossible to run
container_of() on the HEAD pointer.


> Everyone makes mistakes, but we can eliminate them all from the beginning
> with the help of compiler which can catch such use-after-loop things.

Indeed but if we introduce new interfaces then we don't have to worry
about existing usages and silent regressions. Code will have been
written knowing the loop can exit with the iterator set to NULL.

Sure it is still possible for programmers to make mistakes and
dereference the NULL pointer but C programmers are well training w.r.t.
NULL pointer checking so such mistakes are much less likely than with
the current list_for_each_entry() macro. This risk must be offset
against the way a NULLify approach can lead to more elegant code when we
are doing a list search.


Daniel.


Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Xiaomeng Tong
correct for typo:

-for (struct list_head *list = head->next, cond = (struct list_head *)-1; cond 
== (struct list_head *)-1; cond = NULL) \
+for (struct list_head *list = head->next, *cond = (struct list_head *)-1; cond 
== (struct list_head *)-1; cond = NULL) \

--
Xiaomeng Tong


Re: [PATCH v9] drm/amdgpu: add drm buddy support to amdgpu

2022-03-03 Thread Christian König




Am 01.03.22 um 21:38 schrieb Arunpravin:

- Remove drm_mm references and replace with drm buddy functionalities
- Add res cursor support for drm buddy

v2(Matthew Auld):
   - replace spinlock with mutex as we call kmem_cache_zalloc
 (..., GFP_KERNEL) in drm_buddy_alloc() function

   - lock drm_buddy_block_trim() function as it calls
 mark_free/mark_split are all globally visible

v3(Matthew Auld):
   - remove trim method error handling as we address the failure case
 at drm_buddy_block_trim() function

v4:
   - fix warnings reported by kernel test robot 

v5:
   - fix merge conflict issue

v6:
   - fix warnings reported by kernel test robot 

v7:
   - remove DRM_BUDDY_RANGE_ALLOCATION flag usage

v8:
   - keep DRM_BUDDY_RANGE_ALLOCATION flag usage
   - resolve conflicts created by drm/amdgpu: remove VRAM accounting v2

v9(Christian):
   - rename label name as fallback
   - move struct amdgpu_vram_mgr to amdgpu_vram_mgr.h
   - remove unnecessary flags from struct amdgpu_vram_reservation
   - rewrite block NULL check condition
   - change else style as per coding standard
   - rewrite the node max size
   - add a helper function to fetch the first entry from the list

Signed-off-by: Arunpravin 
---
  drivers/gpu/drm/Kconfig   |   1 +
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h|  97 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  10 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 249 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  68 +
  5 files changed, 289 insertions(+), 136 deletions(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index f1422bee3dcc..5133c3f028ab 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -280,6 +280,7 @@ config DRM_AMDGPU
select HWMON
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
+   select DRM_BUDDY
help
  Choose this option if you have a recent AMD Radeon graphics card.
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h

index acfa207cf970..864c609ba00b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@
  #include 
  #include 
  
+#include "amdgpu_vram_mgr.h"

+
  /* state back for walking over vram_mgr and gtt_mgr allocations */
  struct amdgpu_res_cursor {
uint64_tstart;
uint64_tsize;
uint64_tremaining;
-   struct drm_mm_node  *node;
+   void*node;
+   uint32_tmem_type;
  };
  
  /**

@@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource 
*res,
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
  {
+   struct drm_buddy_block *block;
+   struct list_head *head, *next;
struct drm_mm_node *node;
  
-	if (!res || res->mem_type == TTM_PL_SYSTEM) {

-   cur->start = start;
-   cur->size = size;
-   cur->remaining = size;
-   cur->node = NULL;
-   WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
-   return;
-   }
+   if (!res)
+   goto fallback;
  
  	BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
  
-	node = to_ttm_range_mgr_node(res)->mm_nodes;

-   while (start >= node->size << PAGE_SHIFT)
-   start -= node++->size << PAGE_SHIFT;
+   cur->mem_type = res->mem_type;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   head = _amdgpu_vram_mgr_node(res)->blocks;
+
+   block = list_first_entry_or_null(head,
+struct drm_buddy_block,
+link);
+   if (!block)
+   goto fallback;
+
+   while (start >= amdgpu_node_size(block)) {
+   start -= amdgpu_node_size(block);
+
+   next = block->link.next;
+   if (next != head)
+   block = list_entry(next, struct 
drm_buddy_block, link);
+   }
+
+   cur->start = amdgpu_node_start(block) + start;
+   cur->size = min(amdgpu_node_size(block) - start, size);
+   cur->remaining = size;
+   cur->node = block;
+   break;
+   case TTM_PL_TT:
+   node = to_ttm_range_mgr_node(res)->mm_nodes;
+   while (start >= node->size << PAGE_SHIFT)
+   start -= node++->size << PAGE_SHIFT;
+
+   cur->start = (node->start << PAGE_SHIFT) + start;
+   cur->size = min((node->size << PAGE_SHIFT) - start, size);
+   

Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Dan Carpenter
On Wed, Mar 02, 2022 at 10:29:31AM +0100, Rasmus Villemoes wrote:
> This won't help the current issue (because it doesn't exist and might
> never), but just in case some compiler people are listening, I'd like to
> have some sort of way to tell the compiler "treat this variable as
> uninitialized from here on". So one could do
> 
> #define kfree(p) do { __kfree(p); __magic_uninit(p); } while (0)
> 

I think this is a good idea.

Smatch can already find all the iterator used outside the loop bugs that
Jakob did with a manageably small number of false positives.  The
problems are that:
1) It would be better to find it in the compile stage instead of later.
2) I hadn't published that check.  Will do shortly.
3) A couple weeks back I noticed that the list_for_each_entry() check
   was no longer working.  Fixed now.
4) Smatch was only looking at cases which dereferenced the iterator and
   not checks for NULL.  I will test the fix for that tonight.
5) Smatch is broken on PowerPC.

Coccinelle also has checks for iterator used outside the loop.
Coccinelle had these checks before Smatch did.  I copied Julia's idea.

If your annotation was added to GCC it would solve all those problems.

But it's kind of awkward that we can't annotate kfree() directly
instead of creating the kfree() macro.  And there are lots of other
functions which free things so you'd have to create a ton of macros
like:

#define gr_free_dma_desc(a, b) do { __gr_free_dma_desc(a, b); 
__magic_uninit(b); } while (0)

And then there are functions which free a struct member:

void free_bar(struct foo *p) { kfree(p->bar); }

Or functions which free a container_of().

Smatch is more evolved than designed but what I do these days is use $0,
$1, $2 to represent the parameters.  So you can say a function frees
$0->bar.  For container_of() then is "(168<~$0)->bar" which means 168
bytes from $0.  Returns are parameter -1 so I guess it would be $(-1),
but as I said Smatch evolved so right now places that talk about
returned values use a different format.

What you could do is just make a parseable table next to the function
definition with all the information.  Then you would use a Perl script
to automatically generate a Coccinelle check to warn about use after
frees.

diff --git a/mm/slab.c b/mm/slab.c
index ddf5737c63d9..c9dffa5c40a2 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3771,6 +3771,9 @@ EXPORT_SYMBOL(kmem_cache_free_bulk);
  *
  * Don't free memory not originally allocated by kmalloc()
  * or you will run into trouble.
+ *
+ * CHECKER information
+ * frees: $0
  */
 void kfree(const void *objp)
 {

regards,
dan carpenter



Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Dan Carpenter
On Wed, Mar 02, 2022 at 12:07:04PM -0800, Kees Cook wrote:
> On Wed, Mar 02, 2022 at 10:29:31AM +0100, Rasmus Villemoes wrote:
> > This won't help the current issue (because it doesn't exist and might
> > never), but just in case some compiler people are listening, I'd like to
> > have some sort of way to tell the compiler "treat this variable as
> > uninitialized from here on". So one could do
> > 
> > #define kfree(p) do { __kfree(p); __magic_uninit(p); } while (0)
> > 
> > with __magic_uninit being a magic no-op that doesn't affect the
> > semantics of the code, but could be used by the compiler's "[is/may be]
> > used uninitialized" machinery to flag e.g. double frees on some odd
> > error path etc. It would probably only work for local automatic
> > variables, but it should be possible to just ignore the hint if p is
> > some expression like foo->bar or has side effects. If we had that, the
> > end-of-loop test could include that to "uninitialize" the iterator.
> 
> I've long wanted to change kfree() to explicitly set pointers to NULL on
> free. https://github.com/KSPP/linux/issues/87 

You also need to be a bit careful with existing code because there are
places which do things like:

drivers/usb/host/r8a66597-hcd.c
   424  kfree(dev);
  ^^^
   425  
   426  for (port = 0; port < r8a66597->max_root_hub; port++) {
   427  if (r8a66597->root_hub[port].dev == dev) {
^^^
   428  r8a66597->root_hub[port].dev = NULL;
   429  break;
   430  }
   431  }

Printing the freed pointer in debug code is another thing people do.

regards,
dan carpenter



[PATCH] drm: Remove redundant code

2022-03-03 Thread Jiapeng Chong
Clean up the following smatch warning:

drivers/gpu/drm/radeon/atom.c:400 atom_skip_src_int() warn: ignoring
unreachable code.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/radeon/atom.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index c1bbfbe28bda..13940f5e783a 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -397,7 +397,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, 
uint8_t attr, int *ptr)
(*ptr)++;
return;
}
-   return;
}
 }
 
-- 
2.20.1.7.g153144c



Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Xiaomeng Tong
> I think this would make sense, it would mean you only assign the containing
> element on valid elements.
>
> I was thinking something along the lines of:
>
> #define list_for_each_entry(pos, head, member)
> \
>   for (struct list_head *list = head->next, typeof(pos) pos;  \
>list == head ? 0 : (( pos = list_entry(pos, list, member), 1));
> \
>list = list->next)
>
> Although the initialization block of the for loop is not valid C, I'm
> not sure there is any way to declare two variables of a different type
> in the initialization part of the loop.

It can be done using a *nested loop*, like this:

#define list_for_each_entry(pos, head, member)  
\
for (struct list_head *list = head->next, cond = (struct list_head 
*)-1; cond == (struct list_head *)-1; cond = NULL) \
  for (typeof(pos) pos; \
 list == head ? 0 : (( pos = list_entry(pos, list, member), 1));
\
 list = list->next)

>
> I believe all this does is get rid of the >member == (head) check
> to terminate the list.

Indeed, although the original way is harmless.

> It alone will not fix any of the other issues that using the iterator
> variable after the loop currently has.

Yes, but I stick with the list_for_each_entry_inside(pos, type, head, member)
way to make the iterator invisiable outside the loop (before and after the 
loop).
It is maintainable longer-term than "type(pos) pos" one and perfect.
see my explain:
https://lore.kernel.org/lkml/20220302093106.8402-1-xiam0nd.t...@gmail.com/
and list_for_each_entry_inside(pos, type, head, member) patch here:
https://lore.kernel.org/lkml/20220301075839.4156-3-xiam0nd.t...@gmail.com/

--
Xiaomeng Tong


RE: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Xiaomeng Tong
On Thu, 3 Mar 2022 04:58:23 +, David Laight wrote:
> on 3 Mar 2022 10:27:29 +0800, Xiaomeng Tong wrote:
> > The problem is the mis-use of iterator outside the loop on exit, and
> > the iterator will be the HEAD's container_of pointer which pointers
> > to a type-confused struct. Sidenote: The *mis-use* here refers to
> > mistakely access to other members of the struct, instead of the
> > list_head member which acutally is the valid HEAD.
>
> The problem is that the HEAD's container_of pointer should never
> be calculated at all.
> This is what is fundamentally broken about the current definition.

Yes, the rule is "the HEAD's container_of pointer should never be
calculated at all outside the loop", but how do you make sure everyone
follows this rule?
Everyone makes mistakes, but we can eliminate them all from the beginning
with the help of compiler which can catch such use-after-loop things.

> > IOW, you would dereference a (NULL + offset_of_member) address here.
>
>Where?

In the case where a developer do not follows the above rule, and mistakely
access a non-list-head member of the HEAD's container_of pointer outside
the loop. For example:
struct req{
  int a;
  struct list_head h;
}
struct req *r;
list_for_each_entry(r, HEAD, h) {
  if (r->a == 0x10)
break;
}
// the developer made a mistake: he didn't take this situation into
// account where all entries in the list are *r->a != 0x10*, and now
// the r is the HEAD's container_of pointer. 
r->a = 0x20;
Thus the "r->a = 0x20" would dereference a (NULL + offset_of_member)
address here.

> > Please remind me if i missed something, thanks.
> >
> > Can you share your "alternative definitions" details? thanks!
>
> The loop should probably use as extra variable that points
> to the 'list node' in the next structure.
> Something like:
>   for (xxx *iter = head->next;
>   iter ==  ? ((item = NULL),0) : ((item = 
> list_item(iter),1));
>   iter = item->member->next) {
>  ...
> With a bit of casting you can use 'item' to hold 'iter'.

you still can not make sure everyone follows this rule: 
"do not use iterator outside the loop" without the help of compiler,
because item is declared outside the loop.

BTW, to avoid ambiguity,the "alternative definitions" here i asked is
something from you in this context:
"OTOH there may be alternative definitions that can be used to get
the compiler (or other compiler-like tools) to detect broken code.
Even if the definition can't possibly generate a working kerrnel."

> > 
> > > OTOH there may be alternative definitions that can be used to get
> > > the compiler (or other compiler-like tools) to detect broken code.
> > > Even if the definition can't possibly generate a working kerrnel.
> > 
> > The "list_for_each_entry_inside(pos, type, head, member)" way makes
> > the iterator invisiable outside the loop, and would be catched by
> > compiler if use-after-loop things happened.

> It is also a compete PITA for anything doing a search.

You mean it would be a burden on search? can you show me some examples?

Or you mean it is too long the list_for_each_entry_inside* string to live
in one single line, and should spilt into two line? If it is the case, there
are 2 way to mitigate it.
1. pass a shorter t as struct type to the macro
2. after all list_for_each_entry macros be replaced with
list_for_each_entry_inside, remove the list_for_each_entry implementations
and rename all list_for_each_entry_inside use back to the origin
list_for_each_entry in a single patch.

For me, it is acceptable and not a such big problem.

--
Xiaomeng Tong


Re: [PATCH 2/6] treewide: remove using list iterator after loop body as a ptr

2022-03-03 Thread Jakob Koschel



> On 3. Mar 2022, at 05:58, David Laight  wrote:
> 
> From: Xiaomeng Tong
>> Sent: 03 March 2022 02:27
>> 
>> On Wed, 2 Mar 2022 14:04:06 +, David Laight
>>  wrote:
>>> I think that it would be better to make any alternate loop macro
>>> just set the variable to NULL on the loop exit.
>>> That is easier to code for and the compiler might be persuaded to
>>> not redo the test.
>> 
>> No, that would lead to a NULL dereference.
> 
> Why, it would make it b ethe same as the 'easy to use':
>   for (item = head; item; item = item->next) {
>   ...
>   if (...)
>   break;
>   ...
>   }
>   if (!item)
>   return;
> 
>> The problem is the mis-use of iterator outside the loop on exit, and
>> the iterator will be the HEAD's container_of pointer which pointers
>> to a type-confused struct. Sidenote: The *mis-use* here refers to
>> mistakely access to other members of the struct, instead of the
>> list_head member which acutally is the valid HEAD.
> 
> The problem is that the HEAD's container_of pointer should never
> be calculated at all.
> This is what is fundamentally broken about the current definition.
> 
>> IOW, you would dereference a (NULL + offset_of_member) address here.
> 
> Where?
> 
>> Please remind me if i missed something, thanks.
>> 
>> Can you share your "alternative definitions" details? thanks!
> 
> The loop should probably use as extra variable that points
> to the 'list node' in the next structure.
> Something like:
>   for (xxx *iter = head->next;
>   iter ==  ? ((item = NULL),0) : ((item = 
> list_item(iter),1));
>   iter = item->member->next) {
>  ...
> With a bit of casting you can use 'item' to hold 'iter'.

I think this would make sense, it would mean you only assign the containing
element on valid elements.

I was thinking something along the lines of:

#define list_for_each_entry(pos, head, member)  
\
for (struct list_head *list = head->next, typeof(pos) pos;  \
 list == head ? 0 : (( pos = list_entry(pos, list, member), 1));
\
 list = list->next)

Although the initialization block of the for loop is not valid C, I'm
not sure there is any way to declare two variables of a different type
in the initialization part of the loop.

I believe all this does is get rid of the >member == (head) check
to terminate the list.
It alone will not fix any of the other issues that using the iterator
variable after the loop currently has.


AFAIK Adrián Moreno is working on doing something along those lines
for the list iterator in openvswitch (that was similar to the kernel
one before) [1].

I *think* they don't declare 'pos' within the loop which we *do want*
to avoid any uses of it after the loop.
(If pos is not declared in the initialization block, shadowing the
*outer* pos, it would just point to the last element of the list or stay
uninitialized if the list is empty).


[1] https://www.mail-archive.com/ovs-dev@openvswitch.org/msg63497.html


> 
>> 
>>> OTOH there may be alternative definitions that can be used to get
>>> the compiler (or other compiler-like tools) to detect broken code.
>>> Even if the definition can't possibly generate a working kerrnel.
>> 
>> The "list_for_each_entry_inside(pos, type, head, member)" way makes
>> the iterator invisiable outside the loop, and would be catched by
>> compiler if use-after-loop things happened.
> 
> It is also a compete PITA for anything doing a search.
> 
>   David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 
> 1PT, UK
> Registration No: 1397386 (Wales)
> 

- Jakob

[PATCH 10/10] drm/amdgpu: add gang submit frontend

2022-03-03 Thread Christian König
Allows submitting jobs as gang which needs to run on multiple engines at the
same time.

All members of the gang get the same implicit, explicit and VM dependencies. So
no gang member will start running until everything else is ready.

The last job is considered the gang leader (usually a submission to the GFX
ring) and used for signaling output dependencies.

Each job is remembered individually as user of a buffer object, so there is no
joining of work at the end.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 244 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h|   9 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  12 +-
 3 files changed, 173 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c6541f7b8f54..7429e64919fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -69,6 +69,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
   unsigned int *num_ibs)
 {
struct drm_sched_entity *entity;
+   unsigned int i;
int r;
 
r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
@@ -83,11 +84,19 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
return -EINVAL;
 
/* Currently we don't support submitting to multiple entities */
-   if (p->entity && p->entity != entity)
+   for (i = 0; i < p->gang_size; ++i) {
+   if (p->entities[i] == entity)
+   goto found;
+   }
+
+   if (i == AMDGPU_CS_GANG_SIZE)
return -EINVAL;
 
-   p->entity = entity;
-   ++(*num_ibs);
+   p->entities[i] = entity;
+   p->gang_size = i + 1;
+
+found:
+   ++(num_ibs[i]);
return 0;
 }
 
@@ -161,11 +170,12 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
   union drm_amdgpu_cs *cs)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+   unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
struct amdgpu_vm *vm = >vm;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
-   unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
+   unsigned int size;
int ret;
int i;
 
@@ -228,7 +238,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
goto free_partial_kdata;
 
-   ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, _ibs);
+   ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
if (ret)
goto free_partial_kdata;
break;
@@ -265,21 +275,27 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
}
 
-   ret = amdgpu_job_alloc(p->adev, num_ibs, >job, vm);
-   if (ret)
-   goto free_all_kdata;
+   if (!p->gang_size)
+   return -EINVAL;
 
-   ret = drm_sched_job_init(>job->base, p->entity, >vm);
-   if (ret)
-   goto free_all_kdata;
+   for (i = 0; i < p->gang_size; ++i) {
+   ret = amdgpu_job_alloc(p->adev, num_ibs[i], >jobs[i], vm);
+   if (ret)
+   goto free_all_kdata;
+
+   ret = drm_sched_job_init(>jobs[i]->base, p->entities[i],
+>vm);
+   if (ret)
+   goto free_all_kdata;
+   }
 
-   if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
+   if (p->ctx->vram_lost_counter != p->jobs[0]->vram_lost_counter) {
ret = -ECANCELED;
goto free_all_kdata;
}
 
if (p->uf_entry.tv.bo)
-   p->job->uf_addr = uf_offset;
+   p->jobs[p->gang_size - 1]->uf_addr = uf_offset;
kvfree(chunk_array);
 
/* Use this opportunity to fill in task info for the vm */
@@ -301,22 +317,18 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
return ret;
 }
 
-static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
-  struct amdgpu_cs_chunk *chunk,
-  unsigned int *num_ibs,
-  unsigned int *ce_preempt,
-  unsigned int *de_preempt)
+static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+  struct amdgpu_ib *ib, struct amdgpu_cs_chunk *chunk,
+  unsigned int *ce_preempt, unsigned int *de_preempt)
 {
-   struct amdgpu_ring *ring = to_amdgpu_ring(p->job->base.sched);
+   struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-   struct amdgpu_ib *ib = >job->ibs[*num_ibs];

[PATCH 07/10] drm/amdgpu: move setting the job resources

2022-03-03 Thread Christian König
Move setting the job resources into amdgpu_job.c

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 21 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 17 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  2 ++
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dd9e708fe97f..c6541f7b8f54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -825,9 +825,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm = >vm;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
-   struct amdgpu_bo *gds;
-   struct amdgpu_bo *gws;
-   struct amdgpu_bo *oa;
int r;
 
INIT_LIST_HEAD(>validated);
@@ -941,22 +938,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 p->bytes_moved_vis);
 
-   gds = p->bo_list->gds_obj;
-   gws = p->bo_list->gws_obj;
-   oa = p->bo_list->oa_obj;
-
-   if (gds) {
-   p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
-   p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
-   }
-   if (gws) {
-   p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
-   p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
-   }
-   if (oa) {
-   p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
-   p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
-   }
+   amdgpu_job_set_resources(p->job, p->bo_list->gds_obj,
+p->bo_list->gws_obj, p->bo_list->oa_obj);
 
if (!r && p->uf_entry.tv.bo) {
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e4ca62225996..e07ceae36a5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -118,6 +118,23 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, 
unsigned size,
return r;
 }
 
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa)
+{
+   if (gds) {
+   job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+   job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
+   }
+   if (gws) {
+   job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+   job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+   }
+   if (oa) {
+   job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+   job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+   }
+}
+
 void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index d599c0540b46..0bab8fe0d419 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -77,6 +77,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned 
num_ibs,
 struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
enum amdgpu_ib_pool_type pool, struct amdgpu_job **job);
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+ struct amdgpu_bo *gws, struct amdgpu_bo *oa);
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
-- 
2.25.1



[PATCH 09/10] drm/amdgpu: add gang submit backend

2022-03-03 Thread Christian König
Allows submitting jobs as gang which needs to run on multiple
engines at the same time.

Basic idea is that we have a global gang submit fence representing when the
gang leader is finally pushed to run on the hardware last.

Jobs submitted as gang are never re-submitted in case of a GPU reset since this
won't work and will just deadlock the hardware immediately again.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 34 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 28 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h|  3 ++
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7f447ed7a67f..a664d43d7502 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -852,6 +852,7 @@ struct amdgpu_device {
u64 fence_context;
unsignednum_rings;
struct amdgpu_ring  *rings[AMDGPU_MAX_RINGS];
+   struct dma_fence __rcu  *gang_submit;
boolib_pool_ready;
struct amdgpu_sa_managerib_pools[AMDGPU_IB_POOL_MAX];
struct amdgpu_sched 
gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
@@ -1233,6 +1234,8 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device 
*adev,
struct amdgpu_ring *ring);
 
 void amdgpu_device_halt(struct amdgpu_device *adev);
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+   struct dma_fence *gang);
 
 /* atpx handler */
 #if defined(CONFIG_VGA_SWITCHEROO)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d78141e2c509..a116b8c08827 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3512,6 +3512,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gmc.gart_size = 512 * 1024 * 1024;
adev->accel_working = false;
adev->num_rings = 0;
+   RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
@@ -3989,6 +3990,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
+   dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
 
amdgpu_reset_fini(adev);
 
@@ -5744,3 +5746,35 @@ void amdgpu_device_halt(struct amdgpu_device *adev)
pci_disable_device(pdev);
pci_wait_for_pending_transaction(pdev);
 }
+
+/**
+ * amdgpu_device_switch_gang - switch to a new gang
+ * @adev: amdgpu_device pointer
+ * @gang: the gang to switch to
+ *
+ * Try to switch to a new gang or return a reference to the current gang if 
that
+ * isn't possible.
+ * Returns: Either NULL if we switched correctly or a reference to the existing
+ * gang.
+ */
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+   struct dma_fence *gang)
+{
+   struct dma_fence *old = NULL;
+
+   do {
+   dma_fence_put(old);
+   old = dma_fence_get_rcu_safe(>gang_submit);
+
+   if (old == gang)
+   break;
+
+   if (!dma_fence_is_signaled(old))
+   return old;
+
+   } while (cmpxchg((struct dma_fence __force **)>gang_submit,
+old, gang) != old);
+
+   dma_fence_put(old);
+   return NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e07ceae36a5c..059e11c7898c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -169,11 +169,29 @@ static void amdgpu_job_free_cb(struct drm_sched_job 
*s_job)
kfree(job);
 }
 
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+   struct amdgpu_job *leader)
+{
+   struct dma_fence *fence = >base.s_fence->scheduled;
+
+   WARN_ON(job->gang_submit);
+
+   /*
+* Don't add a reference when we are the gang leader to avoid circle
+* dependency.
+*/
+   if (job != leader)
+   dma_fence_get(fence);
+   job->gang_submit = fence;
+}
+
 void amdgpu_job_free(struct amdgpu_job *job)
 {
amdgpu_job_free_resources(job);
amdgpu_sync_free(>sync);
amdgpu_sync_free(>sched_sync);
+   if (job->gang_submit != >base.s_fence->scheduled)
+   dma_fence_put(job->gang_submit);
 
/* only put the hw fence if has embedded fence */
if (job->hw_fence.ops != NULL)
@@ 

[PATCH 08/10] drm/amdgpu: initialize the vmid_wait with the stub fence

2022-03-03 Thread Christian König
This way we don't need to check for NULL any more.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ddf46802b1ff..4ba4b54092f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -188,7 +188,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
unsigned i;
int r;
 
-   if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
+   if (!dma_fence_is_signaled(ring->vmid_wait))
return amdgpu_sync_fence(sync, ring->vmid_wait);
 
fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 35bcb6dc1816..7f33ae87cb41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -193,6 +193,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
adev->rings[ring->idx] = ring;
ring->num_hw_submission = sched_hw_submission;
ring->sched_score = sched_score;
+   ring->vmid_wait = dma_fence_get_stub();
r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
-- 
2.25.1



[PATCH 06/10] drm/amdgpu: properly imbed the IBs into the job

2022-03-03 Thread Christian König
We now have standard macros for that.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 6 --
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 38c9fd7b7ad4..e4ca62225996 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -78,14 +78,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 struct amdgpu_job **job, struct amdgpu_vm *vm)
 {
-   size_t size = sizeof(struct amdgpu_job);
-
if (num_ibs == 0)
return -EINVAL;
 
-   size += sizeof(struct amdgpu_ib) * num_ibs;
-
-   *job = kzalloc(size, GFP_KERNEL);
+   *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
if (!*job)
return -ENOMEM;
 
@@ -95,7 +91,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned 
num_ibs,
 */
(*job)->base.sched = >rings[0]->sched;
(*job)->vm = vm;
-   (*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
 
amdgpu_sync_create(&(*job)->sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 6d704772ff42..d599c0540b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -25,6 +25,7 @@
 
 #include 
 #include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
 
 /* bit set means command submit involves a preamble IB */
 #define AMDGPU_PREAMBLE_IB_PRESENT  (1 << 0)
@@ -48,12 +49,10 @@ struct amdgpu_job {
struct amdgpu_vm*vm;
struct amdgpu_sync  sync;
struct amdgpu_sync  sched_sync;
-   struct amdgpu_ib*ibs;
struct dma_fencehw_fence;
struct dma_fence*external_hw_fence;
uint32_tpreamble_status;
uint32_tpreemption_status;
-   uint32_tnum_ibs;
boolvm_needs_flush;
uint64_tvm_pd_addr;
unsignedvmid;
@@ -69,6 +68,9 @@ struct amdgpu_job {
 
/* job_run_counter >= 1 means a resubmit job */
uint32_tjob_run_counter;
+
+   uint32_tnum_ibs;
+   struct amdgpu_ibibs[];
 };
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-- 
2.25.1



[PATCH 05/10] drm/amdgpu: use job and ib structures directly in CS parsers

2022-03-03 Thread Christian König
Instead of providing the ib index provide the job and ib pointers directly to
the patch and parse functions for UVD and VCE.

Also move the set/get functions for IB values to the IB declerations.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h   |  13 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  23 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c  |  36 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h  |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  | 116 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h  |   7 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c|  13 +--
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c|  25 ++---
 9 files changed, 129 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20bf6134baca..dd9e708fe97f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1024,12 +1024,14 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser 
*p)
memcpy(ib->ptr, kptr, job->ibs[i].length_dw * 4);
amdgpu_bo_kunmap(aobj);
 
-   r = amdgpu_ring_parse_cs(ring, p, i);
+   r = amdgpu_ring_parse_cs(ring, p, p->job,
+>job->ibs[i]);
if (r)
return r;
} else {
ib->ptr = (uint32_t *)kptr;
-   r = amdgpu_ring_patch_cs_in_place(ring, p, i);
+   r = amdgpu_ring_patch_cs_in_place(ring, p, p->job,
+ >job->ibs[i]);
amdgpu_bo_kunmap(aobj);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
index 30136eb50d2a..652b5593499f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -73,19 +73,6 @@ struct amdgpu_cs_parser {
struct amdgpu_cs_post_dep   *post_deps;
 };
 
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
- uint32_t ib_idx, int idx)
-{
-   return p->job->ibs[ib_idx].ptr[idx];
-}
-
-static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
-  uint32_t ib_idx, int idx,
-  uint32_t value)
-{
-   p->job->ibs[ib_idx].ptr[idx] = value;
-}
-
 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
   uint64_t addr, struct amdgpu_bo **bo,
   struct amdgpu_bo_va_mapping **mapping);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 05e789fc7a9e..a8bed1b47899 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -163,8 +163,12 @@ struct amdgpu_ring_funcs {
u64 (*get_wptr)(struct amdgpu_ring *ring);
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
-   int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-   int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+   int (*parse_cs)(struct amdgpu_cs_parser *p,
+   struct amdgpu_job *job,
+   struct amdgpu_ib *ib);
+   int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
+struct amdgpu_job *job,
+struct amdgpu_ib *ib);
/* constants to calculate how many DW are needed for an emit */
unsigned emit_frame_size;
unsigned emit_ib_size;
@@ -264,8 +268,8 @@ struct amdgpu_ring {
atomic_t*sched_score;
 };
 
-#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
-#define amdgpu_ring_patch_cs_in_place(r, p, ib) 
((r)->funcs->patch_cs_in_place((p), (ib)))
+#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) 
((r)->funcs->patch_cs_in_place((p), (job), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
 #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
@@ -364,6 +368,17 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
 void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
  struct amdgpu_ring *ring);
 
+static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
+{
+   return ib->ptr[idx];
+}
+
+static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
+  uint32_t value)
+{
+   ib->ptr[idx] = value;
+}
+
 int 

[PATCH 04/10] drm/amdgpu: remove SRIOV and MCBP dependencies from the CS

2022-03-03 Thread Christian König
We should not have any different CS constrains based
on the execution environment.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 58ddc4241f04..20bf6134baca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -320,8 +320,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
return -EINVAL;
 
if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
-   chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT &&
-   (amdgpu_mcbp || amdgpu_sriov_vf(p->adev))) {
+   chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
(*ce_preempt)++;
else
@@ -1062,7 +1061,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser 
*p)
if (r)
return r;
 
-   if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+   if (fpriv->csa_va) {
bo_va = fpriv->csa_va;
r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
if (r)
-- 
2.25.1



[PATCH 03/10] drm/amdgpu: cleanup and reorder amdgpu_cs.c

2022-03-03 Thread Christian König
Sort the functions in the order they are called and cleanup the coding
style and function names to represent the data they process.

Check the size of the IB chunk, initialize resulting entity and scheduler job
much earlier as well.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1374 
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h |2 +-
 2 files changed, 683 insertions(+), 693 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6b6a9d925994..58ddc4241f04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -39,9 +39,61 @@
 #include "amdgpu_gem.h"
 #include "amdgpu_ras.h"
 
-static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_cs_chunk_fence *data,
- uint32_t *offset)
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
+struct amdgpu_device *adev,
+struct drm_file *filp,
+union drm_amdgpu_cs *cs)
+{
+   struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+   if (cs->in.num_chunks == 0)
+   return -EINVAL;
+
+   memset(p, 0, sizeof(*p));
+   p->adev = adev;
+   p->filp = filp;
+
+   p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+   if (!p->ctx)
+   return -EINVAL;
+
+   if (atomic_read(>ctx->guilty)) {
+   amdgpu_ctx_put(p->ctx);
+   return -ECANCELED;
+   }
+   return 0;
+}
+
+static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
+  struct drm_amdgpu_cs_chunk_ib *chunk_ib,
+  unsigned int *num_ibs)
+{
+   struct drm_sched_entity *entity;
+   int r;
+
+   r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance,
+ chunk_ib->ring, );
+   if (r)
+   return r;
+
+   /* Abort if there is no run queue associated with this entity.
+* Possibly because of disabled HW IP*/
+   if (entity->rq == NULL)
+   return -EINVAL;
+
+   /* Currently we don't support submitting to multiple entities */
+   if (p->entity && p->entity != entity)
+   return -EINVAL;
+
+   p->entity = entity;
+   ++(*num_ibs);
+   return 0;
+}
+
+static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
+  struct drm_amdgpu_cs_chunk_fence *data,
+  uint32_t *offset)
 {
struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
@@ -80,11 +132,11 @@ static int amdgpu_cs_user_fence_chunk(struct 
amdgpu_cs_parser *p,
return r;
 }
 
-static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_bo_list_in *data)
+static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
+  struct drm_amdgpu_bo_list_in *data)
 {
+   struct drm_amdgpu_bo_list_entry *info;
int r;
-   struct drm_amdgpu_bo_list_entry *info = NULL;
 
r = amdgpu_bo_create_list_entry_array(data, );
if (r)
@@ -104,7 +156,9 @@ static int amdgpu_cs_bo_handles_chunk(struct 
amdgpu_cs_parser *p,
return r;
 }
 
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
drm_amdgpu_cs *cs)
+/* Copy the data from userspace and go over it the first time */
+static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+  union drm_amdgpu_cs *cs)
 {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = >vm;
@@ -112,28 +166,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
-   int i;
int ret;
+   int i;
 
-   if (cs->in.num_chunks == 0)
-   return 0;
-
-   chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), 
GFP_KERNEL);
+   chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
+GFP_KERNEL);
if (!chunk_array)
return -ENOMEM;
 
-   p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
-   if (!p->ctx) {
-   ret = -EINVAL;
-   goto free_chunk;
-   }
-
-   /* skip guilty context job */
-   if (atomic_read(>ctx->guilty) == 1) {
-   ret = -ECANCELED;
-   goto free_chunk;
-   }
-
/* get chunks */
chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
@@ -168,7 +208,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser 
*p, union drm_amdgpu_cs
size = 

[PATCH 02/10] drm/amdgpu: header cleanup

2022-03-03 Thread Christian König
No function change, just move a bunch of definitions from amdgpu.h into
separate header files.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 95 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h| 93 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h   |  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  | 35 ++-
 .../gpu/drm/amd/amdgpu/amdgpu_trace_points.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c |  1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c |  1 +
 10 files changed, 132 insertions(+), 100 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b89406b01694..7f447ed7a67f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -60,7 +60,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include 
 #include "dm_pp_interface.h"
@@ -276,9 +275,6 @@ extern int amdgpu_num_kcq;
 #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
 
 struct amdgpu_device;
-struct amdgpu_ib;
-struct amdgpu_cs_parser;
-struct amdgpu_job;
 struct amdgpu_irq_src;
 struct amdgpu_fpriv;
 struct amdgpu_bo_va_mapping;
@@ -465,20 +461,6 @@ struct amdgpu_flip_work {
 };
 
 
-/*
- * CP & rings.
- */
-
-struct amdgpu_ib {
-   struct amdgpu_sa_bo *sa_bo;
-   uint32_tlength_dw;
-   uint64_tgpu_addr;
-   uint32_t*ptr;
-   uint32_tflags;
-};
-
-extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-
 /*
  * file private structure
  */
@@ -494,79 +476,6 @@ struct amdgpu_fpriv {
 
 int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
 
-int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- unsigned size,
- enum amdgpu_ib_pool_type pool,
- struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
-   struct dma_fence *f);
-int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
-  struct amdgpu_ib *ibs, struct amdgpu_job *job,
-  struct dma_fence **f);
-int amdgpu_ib_pool_init(struct amdgpu_device *adev);
-void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
-int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
-/*
- * CS.
- */
-struct amdgpu_cs_chunk {
-   uint32_tchunk_id;
-   uint32_tlength_dw;
-   void*kdata;
-};
-
-struct amdgpu_cs_post_dep {
-   struct drm_syncobj *syncobj;
-   struct dma_fence_chain *chain;
-   u64 point;
-};
-
-struct amdgpu_cs_parser {
-   struct amdgpu_device*adev;
-   struct drm_file *filp;
-   struct amdgpu_ctx   *ctx;
-
-   /* chunks */
-   unsignednchunks;
-   struct amdgpu_cs_chunk  *chunks;
-
-   /* scheduler job object */
-   struct amdgpu_job   *job;
-   struct drm_sched_entity *entity;
-
-   /* buffer objects */
-   struct ww_acquire_ctx   ticket;
-   struct amdgpu_bo_list   *bo_list;
-   struct amdgpu_mn*mn;
-   struct amdgpu_bo_list_entry vm_pd;
-   struct list_headvalidated;
-   struct dma_fence*fence;
-   uint64_tbytes_moved_threshold;
-   uint64_tbytes_moved_vis_threshold;
-   uint64_tbytes_moved;
-   uint64_tbytes_moved_vis;
-
-   /* user fence */
-   struct amdgpu_bo_list_entry uf_entry;
-
-   unsignednum_post_deps;
-   struct amdgpu_cs_post_dep   *post_deps;
-};
-
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
- uint32_t ib_idx, int idx)
-{
-   return p->job->ibs[ib_idx].ptr[idx];
-}
-
-static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
-  uint32_t ib_idx, int idx,
-  uint32_t value)
-{
-   p->job->ibs[ib_idx].ptr[idx] = value;
-}
-
 /*
  * Writeback
  */
@@ -1425,10 +1334,6 @@ static inline int amdgpu_acpi_smart_shift_update(struct 
drm_device *dev,
 enum amdgpu_ss ss_state) { 
return 0; }
 #endif
 
-int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
-  uint64_t addr, struct amdgpu_bo **bo,
-  struct amdgpu_bo_va_mapping **mapping);
-
 #if defined(CONFIG_DRM_AMD_DC)
 int amdgpu_dm_display_resume(struct amdgpu_device *adev );
 #else
diff --git 

[PATCH 01/10] drm/amdgpu: install ctx entities with cmpxchg

2022-03-03 Thread Christian König
Since we removed the context lock we need to make sure that not two threads
are trying to install an entity at the same time.

Signed-off-by: Christian König 
Fixes: e68efb27647f ("drm/amdgpu: remove ctx->lock")
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c1f8b0e37b93..72c5f1c53d6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -204,9 +204,15 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, 
u32 hw_ip,
if (r)
goto error_free_entity;
 
-   ctx->entities[hw_ip][ring] = entity;
+   /* It's not an error if we fail to install the new entity */
+   if (cmpxchg(>entities[hw_ip][ring], NULL, entity))
+   goto cleanup_entity;
+
return 0;
 
+cleanup_entity:
+   drm_sched_entity_fini(>entity);
+
 error_free_entity:
kfree(entity);
 
-- 
2.25.1



Gang submit

2022-03-03 Thread Christian König
Hi guys,

this patch set implements the the requirement for so called gang submissions in 
the CS interface.

A gang submission guarantees that multiple IBs can run on different engines at 
the same time.

This is implemented by keeping a global per-device gang around represented by a 
dma_fence which signals as soon as all jobs in a gang are pushed to the 
hardware.

The effect is that as long as members of a gang are waiting to be submitted no 
other gang can start pushing jobs to the hardware and so deadlocks are 
effectively prevented.

The whole set is based on top of my dma_resv_usage work and a few patches 
merged over from amd-staging-drm-next, so it won't easily apply anywhere.

Please review and comment,
Christian.




[PATCH 2/2] drm/amdkfd: implement get_atc_vmid_pasid_mapping_info for gfx10.3

2022-03-03 Thread Yifan Zhang
This patch implements get_atc_vmid_pasid_mapping_info for gfx10.3

Signed-off-by: Yifan Zhang 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index e9c80ce13f3e..ba21ec6b35e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -26,6 +26,8 @@
 #include "gc/gc_10_3_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
 #include "oss/osssys_5_0_0_sh_mask.h"
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
 #include "soc15_common.h"
 #include "v10_structs.h"
 #include "nv.h"
@@ -606,6 +608,18 @@ static int wave_control_execute_v10_3(struct amdgpu_device 
*adev,
return 0;
 }
 
+static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
+   uint8_t vmid, uint16_t *p_pasid)
+{
+   uint32_t value;
+
+   value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
++ vmid);
+   *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+   return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
 static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base)
 {
@@ -788,7 +802,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.hqd_destroy = hqd_destroy_v10_3,
.hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
.wave_control_execute = wave_control_execute_v10_3,
-   .get_atc_vmid_pasid_mapping_info = NULL,
+   .get_atc_vmid_pasid_mapping_info = 
get_atc_vmid_pasid_mapping_info_v10_3,
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
 #if 0
-- 
2.25.1



[PATCH 1/2] drm/amdkfd: judge get_atc_vmid_pasid_mapping_info before call

2022-03-03 Thread Yifan Zhang
Fix the NULL point issue:

[ 3076.255609] BUG: kernel NULL pointer dereference, address: 
[ 3076.255624] #PF: supervisor instruction fetch in kernel mode
[ 3076.255637] #PF: error_code(0x0010) - not-present page
[ 3076.255649] PGD 0 P4D 0
[ 3076.255660] Oops: 0010 [#1] SMP NOPTI
[ 3076.255669] CPU: 20 PID: 2415 Comm: kfdtest Tainted: GW  OE 
5.11.0-41-generic #45~20.04.1-Ubuntu
[ 3076.255691] Hardware name: AMD Splinter/Splinter-RPL, BIOS VS2326337N.FD 
02/07/2022
[ 3076.255706] RIP: 0010:0x0
[ 3076.255718] Code: Unable to access opcode bytes at RIP 0xffd6.
[ 3076.255732] RSP: 0018:b64283e3fc10 EFLAGS: 00010297
[ 3076.255744] RAX:  RBX: 0008 RCX: 0027
[ 3076.255759] RDX: b64283e3fc1e RSI: 0008 RDI: 8c7a87f6
[ 3076.255776] RBP: b64283e3fc78 R08: 8c7d88518ac0 R09: b64283e3fa60
[ 3076.255791] R10: 0001 R11: 0001 R12: 000f
[ 3076.255805] R13: 8c7bdcea5800 R14: 8c7a9f3f3000 R15: 8c7a8696bc00
[ 3076.255820] FS:  () GS:8c7d8850() 
knlGS:
[ 3076.255839] CS:  0010 DS:  ES:  CR0: 80050033
[ 3076.255851] CR2: ffd6 CR3: 000109e3c000 CR4: 00750ee0
[ 3076.255866] PKRU: 5554
[ 3076.255873] Call Trace:
[ 3076.255884]  dbgdev_wave_reset_wavefronts+0x72/0x160 [amdgpu]
[ 3076.256025]  process_termination_cpsch.cold+0x26/0x2f [amdgpu]
[ 3076.256182]  ? ktime_get_mono_fast_ns+0x4e/0xa0
[ 3076.256196]  kfd_process_dequeue_from_all_devices+0x49/0x70 [amdgpu]
[ 3076.256328]  kfd_process_notifier_release+0x187/0x2b0 [amdgpu]
[ 3076.256451]  ? mn_itree_inv_end+0xdc/0x110
[ 3076.256463]  __mmu_notifier_release+0x74/0x1f0
[ 3076.256474]  exit_mmap+0x170/0x200
[ 3076.256484]  ? __handle_mm_fault+0x677/0x920
[ 3076.256496]  ? _cond_resched+0x19/0x30
[ 3076.256507]  mmput+0x5d/0x130
[ 3076.256518]  do_exit+0x332/0xaf0
[ 3076.256526]  ? handle_mm_fault+0xd7/0x2b0
[ 3076.256537]  do_group_exit+0x43/0xa0
[ 3076.256548]  __x64_sys_exit_group+0x18/0x20
[ 3076.256559]  do_syscall_64+0x38/0x90
[ 3076.256569]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Signed-off-by: Yifan Zhang 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c  | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 1cd2ea536bd0..77364afdc606 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -505,14 +505,16 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_dev 
*dev, struct kfd_process
 * to check which VMID the current process is mapped to.
 */
 
-   for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
-   status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
-   (dev->adev, vmid, _pasid);
-
-   if (status && queried_pasid == p->pasid) {
-   pr_debug("Killing wave fronts of vmid %d and pasid 
0x%x\n",
-   vmid, p->pasid);
-   break;
+   if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
+   for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; 
vmid++) {
+   status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+   (dev->adev, vmid, _pasid);
+
+   if (status && queried_pasid == p->pasid) {
+   pr_debug("Killing wave fronts of vmid %d and 
pasid 0x%x\n",
+   vmid, p->pasid);
+   break;
+   }
}
}
 
-- 
2.25.1