[PATCH 2/2] drm/amd/pm: add the missing mapping for PPT feature on SMU13.0.0 and 13.0.7

2023-01-03 Thread Evan Quan
Then we are able to set a new ppt limit via the hwmon interface(power1_cap).

Signed-off-by: Evan Quan 
Change-Id: Ife80277f89065aa8405715e7ae21fb20be7eb706
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index aebdd9747c37..969e5f965540 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -213,6 +213,7 @@ static struct cmn2asic_mapping 
smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(SOC_PCC),
[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+   [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT},
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 5c6c6ad011ca..e87db7e02e8a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -192,6 +192,7 @@ static struct cmn2asic_mapping 
smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(SOC_PCC),
[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+   [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT},
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
-- 
2.34.1



[PATCH 1/2] drm/amd/pm: correct the reference clock for fan speed(rpm) calculation

2023-01-03 Thread Evan Quan
Correct the reference clock as 25Mhz for SMU13 fan speed calculation.

Signed-off-by: Evan Quan 
Change-Id: I9596635b14dd4f9a55ba11324e2e0a3c4d6d3108
--
v1->v2:
  - correct the way for reference clock updating(Lijo)
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 0ac9cac805f9..8bc70ed7ed00 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1261,7 +1261,8 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu,
uint32_t speed)
 {
struct amdgpu_device *adev = smu->adev;
-   uint32_t tach_period, crystal_clock_freq;
+   uint32_t crystal_clock_freq = 2500;
+   uint32_t tach_period;
int ret;
 
if (!speed)
@@ -1271,7 +1272,6 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu,
if (ret)
return ret;
 
-   crystal_clock_freq = amdgpu_asic_get_xclk(adev);
tach_period = 60 * crystal_clock_freq * 1 / (8 * speed);
WREG32_SOC15(THM, 0, regCG_TACH_CTRL,
 REG_SET_FIELD(RREG32_SOC15(THM, 0, regCG_TACH_CTRL),
-- 
2.34.1



Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue

2023-01-03 Thread Yadav, Arvind



On 1/4/2023 12:07 AM, Felix Kuehling wrote:

Am 2023-01-03 um 04:36 schrieb Shashank Sharma:

/*MQD struct for usermode Queue*/
+struct amdgpu_usermode_queue_mqd

This is specific to GC 11.  Every IP and version will have its own MQD
format.  That should live in the IP specific code, not the generic
code.  We already have the generic MQD parameters that we need from
the userq IOCTL.


Noted, we can separate out the generic parameters from gen specific 
parameter, and will try to wrap it around the generic structure.


- Shashank


Is there a reason why you can't use "struct v11_compute_mqd" from 
v11_structs.h?


Hi Felix,

Yes,  V11_compute_mqd does not have these below member which is needed 
for usermode queue.


    uint32_t shadow_base_lo; // offset: 0  (0x0)
    uint32_t shadow_base_hi; // offset: 1  (0x1)
    uint32_t gds_bkup_base_lo ; // offset: 2  (0x2)
    uint32_t gds_bkup_base_hi ; // offset: 3  (0x3)
    uint32_t fw_work_area_base_lo; // offset: 4  (0x4)
    uint32_t fw_work_area_base_hi; // offset: 5  (0x5)
    uint32_t shadow_initialized; // offset: 6  (0x6)
    uint32_t ib_vmid; // offset: 7  (0x7)

So we had to add new MQD structs.

thanks

~arvind



Regards,
  Felix



Re: [PATCH v4 07/27] drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Lazar, Lijo




On 1/4/2023 3:48 AM, Mario Limonciello wrote:

Simplifies the code so that all SDMA versions will get the firmware
name from `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
---
v3->v4:
  * Move out of IP discovery and instead simplify early_init
v2->v3:
  * Fix dGPU naming scheme
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c |  7 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  4 +-
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 47 +---
  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 30 +
  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 55 +---
  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 25 +--
  6 files changed, 13 insertions(+), 155 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 9e85a078d918..83e8f0dae647 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -200,15 +200,18 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
  }
  
  int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,

-  char *fw_name, u32 instance,
-  bool duplicate)
+  u32 instance, bool duplicate)
  {
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
int err = 0, i;
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
+   char ucode_prefix[30];
+   char fw_name[40];
  
+	amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));

+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s%s.bin", ucode_prefix, !instance ? 
"" : "1");


It is safer to keep the original logic with instance number as suffix 
rather than hardcoding to 1.


Thanks,
Lijo


err = amdgpu_ucode_load(adev, >sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 7d99205c2e01..2d16e6d36728 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -124,8 +124,8 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device 
*adev,
  int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
  struct amdgpu_irq_src *source,
  struct amdgpu_iv_entry *entry);
-int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-char *fw_name, u32 instance, bool duplicate);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+  bool duplicate);
  void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
  bool duplicate);
  void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4d780e4430e7..017ae298558e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -575,60 +575,17 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device 
*adev)
  // vega10 real chip need to use PSP to load firmware
  static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
  {
-   const char *chip_name;
-   char fw_name[30];
int ret, i;
  
-	DRM_DEBUG("\n");

-
-   switch (adev->ip_versions[SDMA0_HWIP][0]) {
-   case IP_VERSION(4, 0, 0):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(4, 0, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(4, 2, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(4, 1, 0):
-   case IP_VERSION(4, 1, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(4, 2, 2):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(4, 1, 2):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(4, 4, 0):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (i == 0)
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma.bin", chip_name);
-   else
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma%d.bin", chip_name, i);
if (adev->ip_versions[SDMA0_HWIP][0] 

Re: [PATCH v4 05/27] drm/amd: Add a new helper for loading/validating microcode

2023-01-03 Thread Lazar, Lijo




On 1/4/2023 3:48 AM, Mario Limonciello wrote:

All microcode runs a basic validation after it's been loaded. Each
IP block as part of init will run both.

Introduce a wrapper for request_firmware and amdgpu_ucode_validate.
This wrapper will also remap any error codes from request_firmware
to -ENODEV.  This is so that early_init will fail if firmware couldn't
be loaded instead of the IP block being disabled.

Signed-off-by: Mario Limonciello 
---
v3-v4:
  * New patch
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 24 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  1 +
  2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index eafcddce58d3..8c4a7b09e344 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1312,3 +1312,27 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device 
*adev, int block_type,
  
  	snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);

  }
+
+/*
+ * amdgpu_ucode_load - Load and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name)


'load' also takes a different meaning of loading firmware to ASIC. Maybe 
keep it as 'get' and keep another corresponding common 'put' for 
release_firmware?


Thanks,
Lijo


+{
+   int err = request_firmware(fw, fw_name, adev->dev);
+
+   if (err)
+   return -ENODEV;
+   err = amdgpu_ucode_validate(*fw);
+   if (err)
+   dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+
+   return err;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 552e06929229..b9139fb44506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -544,6 +544,7 @@ void amdgpu_ucode_print_sdma_hdr(const struct 
common_firmware_header *hdr);
  void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
  void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header 
*hdr);
  int amdgpu_ucode_validate(const struct firmware *fw);
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name);
  bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
  


RE: [PATCH] drm/amdgpu: allow query error counters for specific IP block

2023-01-03 Thread Zhou1, Tao
[AMD Official Use Only - General]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: Zhang, Hawking 
> Sent: Wednesday, January 4, 2023 12:25 AM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao ; Yang,
> Stanley ; Li, Candice ; Chai,
> Thomas 
> Cc: Zhang, Hawking 
> Subject: [PATCH] drm/amdgpu: allow query error counters for specific IP block
> 
> amdgpu_ras_block_late_init will be invoked in IP specific ras_late_init call 
> as a
> common helper for all the IP blocks.
> 
> However, when amdgpu_ras_block_late_init call
> amdgpu_ras_query_error_count to query ras error counters,
> amdgpu_ras_query_error_count queries all the IP blocks that support ras query
> interface.
> 
> This results to wrong error counters cached in software copies when there are
> ras errors detected at time zero or warm reset procedure. i.e., in
> sdma_ras_late_init phase, it counts on sdma/mmhub errors, while, in
> mmhub_ras_late_init phase, it still counts on sdma/mmhub errors.
> 
> The change updates amdgpu_ras_query_error_count interface to allow query
> specific ip error counter.
> It introduces a new input parameter: query_info. if query_info is NULL,  it 
> means
> query all the IP blocks, otherwise, only query the ip block specified by 
> query_info.
> 
> Signed-off-by: Hawking Zhang 
> Reviewed-by: Tao Zhou 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 89 +++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 +-
>  2 files changed, 71 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 35b9f2ed2838..7fed63dc09bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1130,11 +1130,54 @@ int amdgpu_ras_error_inject(struct amdgpu_device
> *adev,  }
> 
>  /**
> - * amdgpu_ras_query_error_count -- Get error counts of all IPs
> + * amdgpu_ras_query_error_count_helper -- Get error counter for
> +specific IP
> + * @adev: pointer to AMD GPU device
> + * @ce_count: pointer to an integer to be set to the count of correctible 
> errors.
> + * @ue_count: pointer to an integer to be set to the count of uncorrectible
> errors.
> + * @query_info: pointer to ras_query_if
> + *
> + * Return 0 for query success or do nothing, otherwise return an error
> + * on failures
> + */
> +static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
> +unsigned long *ce_count,
> +unsigned long *ue_count,
> +struct ras_query_if *query_info) 
> {
> + int ret;
> +
> + if (!query_info)
> + /* do nothing if query_info is not specified */
> + return 0;
> +
> + ret = amdgpu_ras_query_error_status(adev, query_info);
> + if (ret)
> + return ret;
> +
> + *ce_count += query_info->ce_count;
> + *ue_count += query_info->ue_count;
> +
> + /* some hardware/IP supports read to clear
> +  * no need to explictly reset the err status after the query call */
> + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
> + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
> + if (amdgpu_ras_reset_error_status(adev, query_info-
> >head.block))
> + dev_warn(adev->dev,
> +  "Failed to reset error counter and error
> status\n");
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * amdgpu_ras_query_error_count -- Get error counts of all IPs or
> +specific IP
>   * @adev: pointer to AMD GPU device
>   * @ce_count: pointer to an integer to be set to the count of correctible 
> errors.
>   * @ue_count: pointer to an integer to be set to the count of uncorrectible
>   * errors.
> + * @query_info: pointer to ras_query_if if the query request is only
> + for
> + * specific ip block; if info is NULL, then the qurey request is for
> + * all the ip blocks that support query ras error counters/status
>   *
>   * If set, @ce_count or @ue_count, count and return the corresponding
>   * error counts in those integer pointers. Return 0 if the device @@ -1142,11
> +1185,13 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
>   */
>  int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
>unsigned long *ce_count,
> -  unsigned long *ue_count)
> +  unsigned long *ue_count,
> +  struct ras_query_if *query_info)
>  {
>   struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>   struct ras_manager *obj;
>   unsigned long ce, ue;
> + int ret;
> 
>   if (!adev->ras_enabled || !con)
>   return -EOPNOTSUPP;
> @@ -1158,26 +1203,23 @@ int amdgpu_ras_query_error_count(struct
> amdgpu_device *adev,
> 
>   ce = 0;
>   ue = 0;
> - list_for_each_entry(obj, >head, node) 

Re: [PATCH 2/2] drm/amdgpu: add AMDGPU_INFO_VM_STAT to return GPU VM

2023-01-03 Thread Marek Olšák
I see about the access now, but did you even look at the patch? Because
what the patch does isn't even exposed to common drm code, such as the
preferred domain and visible VRAM placement, so it can't be in fdinfo right
now.

Or do you even know what fdinfo contains? Because it contains nothing
useful. It only has VRAM and GTT usage, which we already have in the INFO
ioctl, so it has nothing that we need. We mainly need the eviction
information and visible VRAM information now. Everything else is a bonus.

Also, it's undesirable to open and parse a text file if we can just call an
ioctl.

So do you want me to move it into amdgpu_vm.c? Because you could have just
said: Let's move it into amdgpu_vm.c. :)

Thanks,
Marek

On Tue, Jan 3, 2023 at 3:33 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Take a look at /proc/self/fdinfo/$fd.
>
> The Intel guys made that vendor agnostic and are using it within their IGT
> gpu top tool.
>
> Christian.
>
> Am 02.01.23 um 18:57 schrieb Marek Olšák:
>
> What are you talking about? Is fdinfo in sysfs? Userspace drivers can't
> access sysfs.
>
> Marek
>
> On Mon, Jan 2, 2023, 10:56 Christian König <
> ckoenig.leichtzumer...@gmail.com> wrote:
>
>> Well first of all don't mess with the VM internals outside of the VM code.
>>
>> Then why would we want to expose this through the IOCTL interface? We
>> already have this in the fdinfo.
>>
>> Christian.
>>
>> Am 30.12.22 um 23:07 schrieb Marek Olšák:
>>
>> To give userspace a detailed view about its GPU memory usage and
>> evictions.
>> This will help performance investigations.
>>
>> Signed-off-by: Marek Olšák 
>>
>> The patch is attached.
>>
>> Marek
>>
>>
>>
>


Re: [PATCH 1/2] drm/amdgpu: return the PCIe gen and lanes from the INFO

2023-01-03 Thread Marek Olšák
I see. Well, those sysfs files are not usable, and I don't think it would
be important even if they were usable, but for completeness:

The ioctl returns:
pcie_gen = 1
pcie_num_lanes = 16

Theoretical bandwidth from those values: 4.0 GB/s
My DMA test shows this write bandwidth: 3.5 GB/s
It matches the expectation.

Let's see the devices (there is only 1 GPU Navi21 in the system):
$ lspci |egrep '(PCI|VGA).*Navi'
0a:00.0 PCI bridge: Advanced Micro Devices, Inc. [AMD/ATI] Navi 10 XL
Upstream Port of PCI Express Switch (rev c3)
0b:00.0 PCI bridge: Advanced Micro Devices, Inc. [AMD/ATI] Navi 10 XL
Downstream Port of PCI Express Switch
0c:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI]
Navi 21 [Radeon RX 6800/6800 XT / 6900 XT] (rev c3)

Let's read sysfs:

$ cat /sys/bus/pci/devices/:0a:00.0/current_link_width
16
$ cat /sys/bus/pci/devices/:0b:00.0/current_link_width
16
$ cat /sys/bus/pci/devices/:0c:00.0/current_link_width
16
$ cat /sys/bus/pci/devices/:0a:00.0/current_link_speed
2.5 GT/s PCIe
$ cat /sys/bus/pci/devices/:0b:00.0/current_link_speed
16.0 GT/s PCIe
$ cat /sys/bus/pci/devices/:0c:00.0/current_link_speed
16.0 GT/s PCIe

Problem 1: None of the speed numbers match 4 GB/s.
Problem 2: Userspace doesn't know the bus index of the bridges, and it's
not clear which bridge should be used.
Problem 3: The PCIe gen number is missing.

That's all irrelevant because all information should be queryable via the
INFO ioctl. It doesn't matter what sysfs contains because UMDs shouldn't
have to open and parse extra files just to read a couple of integers.

Marek


On Tue, Jan 3, 2023 at 3:31 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Sure they can, those files are accessible to everyone.
>
> The massive advantage is that this is standard for all PCIe devices, so it
> should work vendor independent.
>
> Christian.
>
> Am 02.01.23 um 18:55 schrieb Marek Olšák:
>
> Userspace drivers can't access sysfs.
>
> Marek
>
> On Mon, Jan 2, 2023, 10:54 Christian König <
> ckoenig.leichtzumer...@gmail.com> wrote:
>
>> That stuff is already available as current_link_speed and
>> current_link_width in sysfs.
>>
>> I'm a bit reluctant duplicating this information in the IOCTL interface.
>>
>> Christian.
>>
>> Am 30.12.22 um 23:07 schrieb Marek Olšák:
>>
>> For computing PCIe bandwidth in userspace and troubleshooting PCIe
>> bandwidth issues.
>>
>> For example, my Navi21 has been limited to PCIe gen 1 and this is
>> the first time I noticed it after 2 years.
>>
>> Note that this intentionally fills a hole and padding
>> in drm_amdgpu_info_device.
>>
>> Signed-off-by: Marek Olšák 
>>
>> The patch is attached.
>>
>> Marek
>>
>>
>>
>


[PATCH v4 25/27] drm/amd: Use `amdgpu_ucode_load` helper for SMU

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 5 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 5 +
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index d4756bd30830..1d693cda5818 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -109,10 +109,7 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
-   err = request_firmware(>pm.fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->pm.fw);
+   err = amdgpu_ucode_load(adev, >pm.fw, fw_name);
if (err)
goto out;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 506a49a4b425..845a7fc83ba8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -103,10 +103,7 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
-   err = request_firmware(>pm.fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->pm.fw);
+   err = amdgpu_ucode_load(adev, >pm.fw, fw_name);
if (err)
goto out;
 
-- 
2.34.1



[PATCH v4 27/27] drm/amd: Optimize SRIOV switch/case for PSP microcode load

2023-01-03 Thread Mario Limonciello
Now that IP version decoding is used, a number of case statements
can be combined.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch

 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index f45362dd8228..83e253b5d928 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -132,14 +132,8 @@ static int psp_init_sriov_microcode(struct psp_context 
*psp)
 
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(9, 0, 0):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, ucode_prefix);
-   break;
-   case IP_VERSION(11, 0, 9):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, ucode_prefix);
-   break;
case IP_VERSION(11, 0, 7):
+   case IP_VERSION(11, 0, 9):
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
ret = psp_init_cap_microcode(psp, ucode_prefix);
break;
-- 
2.34.1



[PATCH v4 26/27] drm/amd: Load SMU microcode during early_init

2023-01-03 Thread Mario Limonciello
This will ensure that the microcode is available before the firmware
framebuffer has been destroyed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * new patch
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 2fa79f892a92..ec52830dde24 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -623,6 +623,7 @@ static int smu_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu;
+   int r;
 
smu = kzalloc(sizeof(struct smu_context), GFP_KERNEL);
if (!smu)
@@ -640,7 +641,10 @@ static int smu_early_init(void *handle)
adev->powerplay.pp_handle = smu;
adev->powerplay.pp_funcs = _pm_funcs;
 
-   return smu_set_funcs(adev);
+   r = smu_set_funcs(adev);
+   if (r)
+   return r;
+   return smu_init_microcode(smu);
 }
 
 static int smu_set_default_dpm_table(struct smu_context *smu)
@@ -1067,12 +1071,6 @@ static int smu_sw_init(void *handle)
smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
smu->smu_dpm.requested_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
 
-   ret = smu_init_microcode(smu);
-   if (ret) {
-   dev_err(adev->dev, "Failed to load smu firmware!\n");
-   return ret;
-   }
-
ret = smu_smc_table_sw_init(smu);
if (ret) {
dev_err(adev->dev, "Failed to sw init smc table!\n");
-- 
2.34.1



[PATCH v4 23/27] drm/amd: Use `amdgpu_ucode_load` helper for PSP

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 42 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  3 +-
 2 files changed, 11 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 3b0644600a1f..f45362dd8228 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2912,11 +2912,7 @@ int psp_init_asd_microcode(struct psp_context *psp, char 
*ucode_prefix)
int err = 0;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", ucode_prefix);
-   err = request_firmware(>psp.asd_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.asd_fw);
+   err = amdgpu_ucode_load(adev, >psp.asd_fw, fw_name);
if (err)
goto out;
 
@@ -2928,7 +2924,6 @@ int psp_init_asd_microcode(struct psp_context *psp, char 
*ucode_prefix)

le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
return 0;
 out:
-   dev_err(adev->dev, "fail to initialize asd microcode\n");
release_firmware(adev->psp.asd_fw);
adev->psp.asd_fw = NULL;
return err;
@@ -2942,11 +2937,7 @@ int psp_init_toc_microcode(struct psp_context *psp, char 
*ucode_prefix)
int err = 0;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
-   err = request_firmware(>psp.toc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.toc_fw);
+   err = amdgpu_ucode_load(adev, >psp.toc_fw, fw_name);
if (err)
goto out;
 
@@ -2958,7 +2949,6 @@ int psp_init_toc_microcode(struct psp_context *psp, char 
*ucode_prefix)

le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
return 0;
 out:
-   dev_err(adev->dev, "fail to request/validate toc microcode\n");
release_firmware(adev->psp.toc_fw);
adev->psp.toc_fw = NULL;
return err;
@@ -3105,11 +3095,7 @@ int psp_init_sos_microcode(struct psp_context *psp, char 
*ucode_prefix)
int fw_index = 0;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", ucode_prefix);
-   err = request_firmware(>psp.sos_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.sos_fw);
+   err = amdgpu_ucode_load(adev, >psp.sos_fw, fw_name);
if (err)
goto out;
 
@@ -3181,8 +3167,6 @@ int psp_init_sos_microcode(struct psp_context *psp, char 
*ucode_prefix)
 
return 0;
 out:
-   dev_err(adev->dev,
-   "failed to init sos firmware\n");
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
 
@@ -3340,10 +3324,7 @@ int psp_init_ta_microcode(struct psp_context *psp, char 
*ucode_prefix)
int err;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", ucode_prefix);
-   err = request_firmware(>psp.ta_fw, fw_name, adev->dev);
-   if (err)
-   return err;
-   err = amdgpu_ucode_validate(adev->psp.ta_fw);
+   err = amdgpu_ucode_load(adev, >psp.ta_fw, fw_name);
if (err)
return err;
 
@@ -3383,17 +3364,14 @@ int psp_init_cap_microcode(struct psp_context *psp, 
char *ucode_prefix)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_cap.bin", ucode_prefix);
-   err = request_firmware(>psp.cap_fw, fw_name, adev->dev);
-   if (err) {
-   dev_warn(adev->dev, "cap microcode does not exist, skip\n");
-   err = 0;
-   goto out;
-   }
-
-   err = amdgpu_ucode_validate(adev->psp.cap_fw);
+   err = amdgpu_ucode_load(adev, >psp.cap_fw, fw_name);
if (err) {
+   if (err == -ENODEV) {
+   dev_warn(adev->dev, "cap microcode does not exist, 
skip\n");
+   err = 0;
+   goto out;
+   }
dev_err(adev->dev, "fail to initialize cap microcode\n");
-   goto out;
}
 
info = >firmware.ucode[AMDGPU_UCODE_ID_CAP];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 47b88233bf94..415d32306b9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -506,8 +506,7 @@ int psp_init_asd_microcode(struct psp_context *psp, char 
*ucode_prefix);
 int psp_init_toc_microcode(struct psp_context *psp, char *ucode_prefix);
 int psp_init_sos_microcode(struct psp_context *psp, char *ucode_prefix);
 int psp_init_ta_microcode(struct psp_context *psp, char *ucode_prefix);
-int psp_init_cap_microcode(struct psp_context 

[PATCH v4 24/27] drm/amd/display: Load DMUB microcode during early_init

2023-01-03 Thread Mario Limonciello
If DMUB is required for an ASIC, ensure that the microcode is available
and validates during early_init.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 89 ---
 1 file changed, 58 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 4829b5431e4c..eeccc8af0320 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1945,7 +1945,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
struct dmub_srv_fb_info *fb_info;
struct dmub_srv *dmub_srv;
const struct dmcub_firmware_header_v1_0 *hdr;
-   const char *fw_name_dmub;
enum dmub_asic dmub_asic;
enum dmub_status status;
int r;
@@ -1953,73 +1952,46 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
switch (adev->ip_versions[DCE_HWIP][0]) {
case IP_VERSION(2, 1, 0):
dmub_asic = DMUB_ASIC_DCN21;
-   fw_name_dmub = FIRMWARE_RENOIR_DMUB;
-   if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
-   fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
break;
case IP_VERSION(3, 0, 0):
-   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) {
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
dmub_asic = DMUB_ASIC_DCN30;
-   fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
-   } else {
+   else
dmub_asic = DMUB_ASIC_DCN30;
-   fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
-   }
break;
case IP_VERSION(3, 0, 1):
dmub_asic = DMUB_ASIC_DCN301;
-   fw_name_dmub = FIRMWARE_VANGOGH_DMUB;
break;
case IP_VERSION(3, 0, 2):
dmub_asic = DMUB_ASIC_DCN302;
-   fw_name_dmub = FIRMWARE_DIMGREY_CAVEFISH_DMUB;
break;
case IP_VERSION(3, 0, 3):
dmub_asic = DMUB_ASIC_DCN303;
-   fw_name_dmub = FIRMWARE_BEIGE_GOBY_DMUB;
break;
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
dmub_asic = (adev->external_rev_id == YELLOW_CARP_B0) ? 
DMUB_ASIC_DCN31B : DMUB_ASIC_DCN31;
-   fw_name_dmub = FIRMWARE_YELLOW_CARP_DMUB;
break;
case IP_VERSION(3, 1, 4):
dmub_asic = DMUB_ASIC_DCN314;
-   fw_name_dmub = FIRMWARE_DCN_314_DMUB;
break;
case IP_VERSION(3, 1, 5):
dmub_asic = DMUB_ASIC_DCN315;
-   fw_name_dmub = FIRMWARE_DCN_315_DMUB;
break;
case IP_VERSION(3, 1, 6):
dmub_asic = DMUB_ASIC_DCN316;
-   fw_name_dmub = FIRMWARE_DCN316_DMUB;
break;
case IP_VERSION(3, 2, 0):
dmub_asic = DMUB_ASIC_DCN32;
-   fw_name_dmub = FIRMWARE_DCN_V3_2_0_DMCUB;
break;
case IP_VERSION(3, 2, 1):
dmub_asic = DMUB_ASIC_DCN321;
-   fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
default:
/* ASIC doesn't support DMUB. */
return 0;
}
 
-   r = request_firmware_direct(>dm.dmub_fw, fw_name_dmub, adev->dev);
-   if (r) {
-   DRM_ERROR("DMUB firmware loading failed: %d\n", r);
-   return 0;
-   }
-
-   r = amdgpu_ucode_validate(adev->dm.dmub_fw);
-   if (r) {
-   DRM_ERROR("Couldn't validate DMUB firmware: %d\n", r);
-   return 0;
-   }
-
hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data;
adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version);
 
@@ -4513,6 +4485,61 @@ DEVICE_ATTR_WO(s3_debug);
 
 #endif
 
+static int dm_init_microcode(struct amdgpu_device *adev)
+{
+   char *fw_name_dmub;
+   int r;
+
+   switch (adev->ip_versions[DCE_HWIP][0]) {
+   case IP_VERSION(2, 1, 0):
+   fw_name_dmub = FIRMWARE_RENOIR_DMUB;
+   if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
+   fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
+   break;
+   case IP_VERSION(3, 0, 0):
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+   fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
+   else
+   fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
+   break;
+   case IP_VERSION(3, 0, 1):
+   fw_name_dmub = FIRMWARE_VANGOGH_DMUB;
+   break;
+   case IP_VERSION(3, 0, 2):
+  

[PATCH v4 20/27] drm/amd: Parse both v1 and v2 TA microcode headers using same function

2023-01-03 Thread Mario Limonciello
Several IP versions duplicate code and can't use the common helpers.
Move this code into a single function so that the helpers can be used.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 120 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |   9 +-
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c  |  60 +---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  |  74 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c  |  62 +---
 5 files changed, 107 insertions(+), 218 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 7a2fc920739b..ac4d675abcb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3272,41 +3272,75 @@ static int parse_ta_bin_descriptor(struct psp_context 
*psp,
return 0;
 }
 
-int psp_init_ta_microcode(struct psp_context *psp,
- const char *chip_name)
+static int parse_ta_v1_microcode(struct psp_context *psp)
 {
+   const struct ta_firmware_header_v1_0 *ta_hdr;
struct amdgpu_device *adev = psp->adev;
-   char fw_name[PSP_FW_NAME_LEN];
-   const struct ta_firmware_header_v2_0 *ta_hdr;
-   int err = 0;
-   int ta_index = 0;
 
-   if (!chip_name) {
-   dev_err(adev->dev, "invalid chip name for ta microcode\n");
+   ta_hdr = (const struct ta_firmware_header_v1_0 *)
+adev->psp.ta_fw->data;
+
+   if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
return -EINVAL;
+
+   adev->psp.xgmi_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->xgmi.fw_version);
+   adev->psp.xgmi_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->xgmi.size_bytes);
+   adev->psp.xgmi_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.ras_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->ras.fw_version);
+   adev->psp.ras_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->ras.size_bytes);
+   adev->psp.ras_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->ras.offset_bytes);
+   adev->psp.hdcp_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->hdcp.fw_version);
+   adev->psp.hdcp_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->hdcp.size_bytes);
+   adev->psp.hdcp_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.dtm_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->dtm.fw_version);
+   adev->psp.dtm_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->dtm.size_bytes);
+   adev->psp.dtm_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->dtm.offset_bytes);
+   if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+   adev->psp.securedisplay_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->securedisplay.fw_version);
+   adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+   adev->psp.securedisplay_context.context.bin_desc.start_addr =
+   (uint8_t 
*)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
}
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
-   err = request_firmware(>psp.ta_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
+   return 0;
+}
 
-   err = amdgpu_ucode_validate(adev->psp.ta_fw);
-   if (err)
-   goto out;
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+   const struct ta_firmware_header_v2_0 *ta_hdr;
+   struct amdgpu_device *adev = psp->adev;
+   int err = 0;
+   int ta_index = 0;
 
ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
 
-   if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) {
-   dev_err(adev->dev, "unsupported TA header version\n");
-   err = -EINVAL;
-   goto out;
-   }
+   if (le16_to_cpu(ta_hdr->header.header_version_major) != 2)
+   return -EINVAL;
 
if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
  

[PATCH v4 22/27] drm/amd: Load PSP microcode during early_init

2023-01-03 Thread Mario Limonciello
Simplifies the code so that all PSP versions will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * new patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  | 128 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/psp_v10_0.c   |  16 +--
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c   |  55 ++
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c   |  13 +--
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c   |  27 ++---
 drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c |  14 +--
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c|  16 +--
 8 files changed, 79 insertions(+), 193 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index d51fe3431e2b..3b0644600a1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -122,6 +122,44 @@ static void 
psp_check_pmfw_centralized_cstate_management(struct psp_context *psp
}
 }
 
+static int psp_init_sriov_microcode(struct psp_context *psp)
+{
+   struct amdgpu_device *adev = psp->adev;
+   char ucode_prefix[30];
+   int ret = 0;
+
+   amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+
+   switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(11, 0, 9):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(11, 0, 7):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(13, 0, 2):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+   ret = psp_init_cap_microcode(psp, ucode_prefix);
+   ret &= psp_init_ta_microcode(psp, ucode_prefix);
+   break;
+   case IP_VERSION(13, 0, 0):
+   adev->virt.autoload_ucode_id = 0;
+   break;
+   case IP_VERSION(13, 0, 10):
+   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return ret;
+}
+
 static int psp_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -192,7 +230,10 @@ static int psp_early_init(void *handle)
 
psp_check_pmfw_centralized_cstate_management(psp);
 
-   return 0;
+   if (amdgpu_sriov_vf(adev))
+   return psp_init_sriov_microcode(psp);
+   else
+   return psp_init_microcode(psp);
 }
 
 void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
@@ -350,42 +391,6 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device 
*adev,
return ret;
 }
 
-static int psp_init_sriov_microcode(struct psp_context *psp)
-{
-   struct amdgpu_device *adev = psp->adev;
-   int ret = 0;
-
-   switch (adev->ip_versions[MP0_HWIP][0]) {
-   case IP_VERSION(9, 0, 0):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "vega10");
-   break;
-   case IP_VERSION(11, 0, 9):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "navi12");
-   break;
-   case IP_VERSION(11, 0, 7):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "sienna_cichlid");
-   break;
-   case IP_VERSION(13, 0, 2):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
-   ret = psp_init_cap_microcode(psp, "aldebaran");
-   ret &= psp_init_ta_microcode(psp, "aldebaran");
-   break;
-   case IP_VERSION(13, 0, 0):
-   adev->virt.autoload_ucode_id = 0;
-   break;
-   case IP_VERSION(13, 0, 10):
-   adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
-   break;
-   default:
-   ret = -EINVAL;
-   break;
-   }
-   return ret;
-}
-
 static int psp_sw_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -401,15 +406,6 @@ static int psp_sw_init(void *handle)
ret = -ENOMEM;
}
 
-   if (amdgpu_sriov_vf(adev))
-   ret = psp_init_sriov_microcode(psp);
-   else
-   ret = 

[PATCH v4 21/27] drm/amd: Avoid BUG() for case of SRIOV missing IP version

2023-01-03 Thread Mario Limonciello
No need to crash the kernel.  AMDGPU will now fail to probe.

Signed-off-by: Mario Limonciello 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ac4d675abcb5..d51fe3431e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -380,7 +380,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
break;
default:
-   BUG();
+   ret = -EINVAL;
break;
}
return ret;
-- 
2.34.1



[PATCH v4 16/27] drm/amd: Use `amdgpu_ucode_load` helper for GFX10

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 35 ++
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 49d34c7bbf20..5f6b59e23313 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4030,41 +4030,31 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, 
wks);
-   err = request_firmware(>gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_load(adev, >gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, 
wks);
-   err = request_firmware(>gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_load(adev, >gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, 
wks);
-   err = request_firmware(>gfx.ce_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+   err = amdgpu_ucode_load(adev, >gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
-   err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
+   err = amdgpu_ucode_load(adev, >gfx.rlc_fw, fw_name);
/* don't check this.  There are apparently firmwares in the 
wild with
 * incorrect size in the header
 */
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   if (err == -ENODEV)
+   goto out;
if (err)
dev_dbg(adev->dev,
"gfx10: amdgpu_ucode_validate() failed 
\"%s\"\n",
@@ -4078,21 +4068,15 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, 
wks);
-   err = request_firmware(>gfx.mec_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+   err = amdgpu_ucode_load(adev, >gfx.mec_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, 
wks);
-   err = request_firmware(>gfx.mec2_fw, fw_name, adev->dev);
+   err = amdgpu_ucode_load(adev, >gfx.mec2_fw, fw_name);
if (!err) {
-   err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
-   if (err)
-   goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
@@ -4103,9 +4087,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device 
*adev)
gfx_v10_0_check_fw_write_wait(adev);
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx10: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
-- 
2.34.1



[PATCH v4 18/27] drm/amd: Use `amdgpu_ucode_load` helper for GFX11

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 23 ---
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a56c6e106d00..0c77d165caf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -457,10 +457,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
-   err = request_firmware(>gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_load(adev, >gfx.pfp_fw, fw_name);
if (err)
goto out;
/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
@@ -477,10 +474,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
-   err = request_firmware(>gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_load(adev, >gfx.me_fw, fw_name);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -493,10 +487,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
 
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
ucode_prefix);
-   err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   err = amdgpu_ucode_load(adev, >gfx.rlc_fw, fw_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
@@ -508,10 +499,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
}
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
-   err = request_firmware(>gfx.mec_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+   err = amdgpu_ucode_load(adev, >gfx.mec_fw, fw_name);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -530,9 +518,6 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
 
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx11: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
-- 
2.34.1



[PATCH v4 19/27] drm/amd: Load GFX11 microcode during early_init

2023-01-03 Thread Mario Limonciello
If GFX11 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for GFX11 microcode into the early_init phase
so that if it's not available, driver init will fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move to early_init phase
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 78 ++
 1 file changed, 30 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 0c77d165caf7..5c7bc286618a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -443,6 +443,30 @@ static void gfx_v11_0_free_microcode(struct amdgpu_device 
*adev)
kfree(adev->gfx.rlc.register_list_format);
 }
 
+static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, char 
*ucode_prefix)
+{
+   const struct psp_firmware_header_v1_0 *toc_hdr;
+   int err = 0;
+   char fw_name[40];
+
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
+   err = amdgpu_ucode_load(adev, >psp.toc_fw, fw_name);
+   if (err)
+   goto out;
+
+   toc_hdr = (const struct psp_firmware_header_v1_0 
*)adev->psp.toc_fw->data;
+   adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+   adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+   adev->psp.toc.size_bytes = 
le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+   adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+   
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+   return 0;
+out:
+   release_firmware(adev->psp.toc_fw);
+   adev->psp.toc_fw = NULL;
+   return err;
+}
+
 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
 {
char fw_name[40];
@@ -513,6 +537,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
}
 
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+   err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
+
/* only one MEC for gfx 11.0.0. */
adev->gfx.mec2_fw = NULL;
 
@@ -531,38 +558,6 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
return err;
 }
 
-static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
-{
-   const struct psp_firmware_header_v1_0 *toc_hdr;
-   int err = 0;
-   char fw_name[40];
-   char ucode_prefix[30];
-
-   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
-
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
-   err = request_firmware(>psp.toc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-
-   err = amdgpu_ucode_validate(adev->psp.toc_fw);
-   if (err)
-   goto out;
-
-   toc_hdr = (const struct psp_firmware_header_v1_0 
*)adev->psp.toc_fw->data;
-   adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
-   adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
-   adev->psp.toc.size_bytes = 
le32_to_cpu(toc_hdr->header.ucode_size_bytes);
-   adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
-   
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
-   return 0;
-out:
-   dev_err(adev->dev, "Failed to load TOC microcode\n");
-   release_firmware(adev->psp.toc_fw);
-   adev->psp.toc_fw = NULL;
-   return err;
-}
-
 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
 {
u32 count = 0;
@@ -699,19 +694,11 @@ static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
amdgpu_bo_free_kernel(>gfx.mec.mec_fw_data_obj, NULL, NULL);
 }
 
-static int gfx_v11_0_me_init(struct amdgpu_device *adev)
+static void gfx_v11_0_me_init(struct amdgpu_device *adev)
 {
-   int r;
-
bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
 
amdgpu_gfx_graphics_queue_acquire(adev);
-
-   r = gfx_v11_0_init_microcode(adev);
-   if (r)
-   DRM_ERROR("Failed to load gfx firmware!\n");
-
-   return r;
 }
 
 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
@@ -1324,9 +1311,7 @@ static int gfx_v11_0_sw_init(void *handle)
}
}
 
-   r = gfx_v11_0_me_init(adev);
-   if (r)
-   return r;
+   gfx_v11_0_me_init(adev);
 
r = gfx_v11_0_rlc_init(adev);
if (r) {
@@ -1394,9 +1379,6 @@ static int gfx_v11_0_sw_init(void *handle)
 
/* allocate visible FB for rlc auto-loading fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
-   r = gfx_v11_0_init_toc_microcode(adev);
-   if (r)
-   dev_err(adev->dev, "Failed to load toc 

[PATCH v4 17/27] drm/amd: Load GFX10 microcode during early_init

2023-01-03 Thread Mario Limonciello
Simplifies the code so that GFX10 will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Any failures will cause the driver to fail to probe before the firmware
framebuffer has been removed.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move out of discovery into early_init
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 82 ++
 1 file changed, 17 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 5f6b59e23313..75781722c7e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3974,9 +3974,9 @@ static void gfx_v10_0_check_gfxoff_flag(struct 
amdgpu_device *adev)
 
 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
char fw_name[40];
-   char *wks = "";
+   char ucode_prefix[30];
+   const char *wks = "";
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -3984,71 +3984,31 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
 
DRM_DEBUG("\n");
 
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(10, 1, 10):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(10, 1, 1):
-   chip_name = "navi14";
-   if (!(adev->pdev->device == 0x7340 &&
- adev->pdev->revision != 0x00))
-   wks = "_wks";
-   break;
-   case IP_VERSION(10, 1, 2):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(10, 3, 0):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(10, 3, 2):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(10, 3, 1):
-   chip_name = "vangogh";
-   break;
-   case IP_VERSION(10, 3, 4):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(10, 3, 5):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(10, 3, 3):
-   chip_name = "yellow_carp";
-   break;
-   case IP_VERSION(10, 3, 6):
-   chip_name = "gc_10_3_6";
-   break;
-   case IP_VERSION(10, 1, 3):
-   case IP_VERSION(10, 1, 4):
-   chip_name = "cyan_skillfish2";
-   break;
-   case IP_VERSION(10, 3, 7):
-   chip_name = "gc_10_3_7";
-   break;
-   default:
-   BUG();
-   }
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) &&
+  (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+   wks = "_wks";
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_load(adev, >gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_load(adev, >gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_load(adev, >gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
if (!amdgpu_sriov_vf(adev)) {
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
ucode_prefix);
err = amdgpu_ucode_load(adev, >gfx.rlc_fw, fw_name);
/* don't check this.  There are apparently firmwares in the 
wild with
 * incorrect size in the header
@@ -4067,14 +4027,14 @@ static int gfx_v10_0_init_microcode(struct 
amdgpu_device *adev)
goto out;
}
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, 
wks);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, 
wks);
err = amdgpu_ucode_load(adev, >gfx.mec_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
 
-  

[PATCH v4 13/27] drm/amd: Remove superfluous assignment for `adev->mes.adev`

2023-01-03 Thread Mario Limonciello
`amdgpu_mes_init` already sets `adev->mes.adev`, so there is no need
to also set it in the IP specific versions.

Signed-off-by: Mario Limonciello 
---
v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 1 -
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 9c5ff8b7c202..f58debf2783c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -931,7 +931,6 @@ static int mes_v10_1_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r;
 
-   adev->mes.adev = adev;
adev->mes.funcs = _v10_1_funcs;
adev->mes.kiq_hw_init = _v10_1_kiq_hw_init;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 3af77a32baac..c8bdee9a66c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1020,7 +1020,6 @@ static int mes_v11_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int pipe, r;
 
-   adev->mes.adev = adev;
adev->mes.funcs = _v11_0_funcs;
adev->mes.kiq_hw_init = _v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = _v11_0_kiq_hw_fini;
-- 
2.34.1



[PATCH v4 11/27] drm/amd: Load MES microcode during early_init

2023-01-03 Thread Mario Limonciello
Add an early_init phase to MES for fetching and validating microcode
from the filesystem.

If MES microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Move the request for MES microcode into the early_init phase
so that if it's not available, early_init will fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Introduce new early_init phase for MES
v2->v3:
 * Add a missing newline
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 65 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  1 +
 drivers/gpu/drm/amd/amdgpu/mes_v10_1.c  | 97 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 88 +-
 4 files changed, 100 insertions(+), 151 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 0c546245793b..dd8f35234507 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -21,6 +21,8 @@
  *
  */
 
+#include 
+
 #include "amdgpu_mes.h"
 #include "amdgpu.h"
 #include "soc15_common.h"
@@ -1423,3 +1425,66 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
kfree(vm);
return 0;
 }
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
+{
+   const struct mes_firmware_header_v1_0 *mes_hdr;
+   struct amdgpu_firmware_info *info;
+   char ucode_prefix[30];
+   char fw_name[40];
+   int r;
+
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+   ucode_prefix,
+   pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
+   r = request_firmware(>mes.fw[pipe], fw_name, adev->dev);
+   if (r)
+   goto out;
+
+   r = amdgpu_ucode_validate(adev->mes.fw[pipe]);
+   if (r)
+   goto out;
+
+   mes_hdr = (const struct mes_firmware_header_v1_0 *)
+   adev->mes.fw[pipe]->data;
+   adev->mes.uc_start_addr[pipe] =
+   le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
+   ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
+   adev->mes.data_start_addr[pipe] =
+   le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
+   ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 
32);
+
+   if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+   int ucode, ucode_data;
+
+   if (pipe == AMDGPU_MES_SCHED_PIPE) {
+   ucode = AMDGPU_UCODE_ID_CP_MES;
+   ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
+   } else {
+   ucode = AMDGPU_UCODE_ID_CP_MES1;
+   ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
+   }
+
+   info = >firmware.ucode[ucode];
+   info->ucode_id = ucode;
+   info->fw = adev->mes.fw[pipe];
+   adev->firmware.fw_size +=
+   ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
+ PAGE_SIZE);
+
+   info = >firmware.ucode[ucode_data];
+   info->ucode_id = ucode_data;
+   info->fw = adev->mes.fw[pipe];
+   adev->firmware.fw_size +=
+   ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
+ PAGE_SIZE);
+   }
+
+   return 0;
+
+out:
+   release_firmware(adev->mes.fw[pipe]);
+   adev->mes.fw[pipe] = NULL;
+   return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 97c05d08a551..547ec35691fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -306,6 +306,7 @@ struct amdgpu_mes_funcs {
 
 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
 
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
 int amdgpu_mes_init(struct amdgpu_device *adev);
 void amdgpu_mes_fini(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 614394118a53..9c5ff8b7c202 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -379,82 +379,6 @@ static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.resume_gang = mes_v10_1_resume_gang,
 };
 
-static int mes_v10_1_init_microcode(struct amdgpu_device *adev,
-   enum admgpu_mes_pipe pipe)
-{
-   const char *chip_name;
-   char fw_name[30];
-   int err;
-   const struct mes_firmware_header_v1_0 *mes_hdr;
-   struct amdgpu_firmware_info *info;
-
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(10, 1, 10):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(10, 3, 0):
-   

[PATCH v4 15/27] drm/amd: Load GFX9 microcode during early_init

2023-01-03 Thread Mario Limonciello
If GFX9 microcode is required but not available during early init, the
firmware framebuffer will have already been released and the screen will
freeze.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move microcode load phase to early init
v2->v3:
 * Fix issues found on real hardware where num_gfx_rings not set during
   discovery
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 58 +--
 1 file changed, 9 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 27040821d764..4e9c230e42ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1251,7 +1251,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct 
amdgpu_device *adev)
 }
 
 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
 {
char fw_name[30];
int err;
@@ -1287,7 +1287,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct 
amdgpu_device *adev,
 }
 
 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+  char *chip_name)
 {
char fw_name[30];
int err;
@@ -1344,7 +1344,7 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct 
amdgpu_device *adev)
 }
 
 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
- const char *chip_name)
+ char *chip_name)
 {
char fw_name[30];
int err;
@@ -1392,58 +1392,24 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
 
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
+   char ucode_prefix[30];
int r;
 
DRM_DEBUG("\n");
-
-   switch (adev->ip_versions[GC_HWIP][0]) {
-   case IP_VERSION(9, 0, 1):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(9, 2, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(9, 4, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(9, 2, 2):
-   case IP_VERSION(9, 1, 0):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(9, 4, 1):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(9, 3, 0):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(9, 4, 2):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
+   amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
/* No CPG in Arcturus */
if (adev->gfx.num_gfx_rings) {
-   r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
+   r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
if (r)
return r;
}
 
-   r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
+   r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
if (r)
return r;
 
-   r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
+   r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
if (r)
return r;
 
@@ -2131,12 +2097,6 @@ static int gfx_v9_0_sw_init(void *handle)
 
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
 
-   r = gfx_v9_0_init_microcode(adev);
-   if (r) {
-   DRM_ERROR("Failed to load gfx firmware!\n");
-   return r;
-   }
-
if (adev->gfx.rlc.funcs) {
if (adev->gfx.rlc.funcs->init) {
r = adev->gfx.rlc.funcs->init(adev);
@@ -4578,7 +4538,7 @@ static int gfx_v9_0_early_init(void *handle)
/* init rlcg reg access ctrl */
gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
 
-   return 0;
+   return gfx_v9_0_init_microcode(adev);
 }
 
 static int gfx_v9_0_ecc_late_init(void *handle)
-- 
2.34.1



[PATCH v4 14/27] drm/amd: Use `amdgpu_ucode_load` helper for GFX9

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * new patch
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 43 +--
 1 file changed, 8 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f202b45c413c..27040821d764 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1257,37 +1257,25 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct 
amdgpu_device *adev,
int err;
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
-   err = request_firmware(>gfx.pfp_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+   err = amdgpu_ucode_load(adev, >gfx.pfp_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
-   err = request_firmware(>gfx.me_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.me_fw);
+   err = amdgpu_ucode_load(adev, >gfx.me_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
-   err = request_firmware(>gfx.ce_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+   err = amdgpu_ucode_load(adev, >gfx.ce_fw, fw_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx9: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
@@ -1328,10 +1316,7 @@ static int gfx_v9_0_init_rlc_microcode(struct 
amdgpu_device *adev,
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", 
chip_name);
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", 
chip_name);
-   err = request_firmware(>gfx.rlc_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+   err = amdgpu_ucode_load(adev, >gfx.rlc_fw, fw_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 
*)adev->gfx.rlc_fw->data;
@@ -1341,12 +1326,10 @@ static int gfx_v9_0_init_rlc_microcode(struct 
amdgpu_device *adev,
err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
 out:
if (err) {
-   dev_err(adev->dev,
-   "gfx9: Failed to init firmware \"%s\"\n",
-   fw_name);
release_firmware(adev->gfx.rlc_fw);
adev->gfx.rlc_fw = NULL;
}
+
return err;
 }
 
@@ -1371,12 +1354,9 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", 
chip_name);
 
-   err = request_firmware(>gfx.mec_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
-   err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+   err = amdgpu_ucode_load(adev, >gfx.mec_fw, fw_name);
if (err)
-   goto out;
+   return err;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
 
@@ -1386,11 +1366,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
else
snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_mec2.bin", chip_name);
 
-   err = request_firmware(>gfx.mec2_fw, fw_name, adev->dev);
+   err = amdgpu_ucode_load(adev, >gfx.mec2_fw, fw_name);
if (!err) {
-   err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
-   if (err)
-   goto out;
amdgpu_gfx_cp_init_microcode(adev, 
AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, 
AMDGPU_UCODE_ID_CP_MEC2_JT);
} else {
@@ -1402,13 +1379,9 @@ static int gfx_v9_0_init_cp_compute_microcode(struct 
amdgpu_device *adev,
adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
}
 
-out:
gfx_v9_0_check_if_need_gfxoff(adev);
gfx_v9_0_check_fw_write_wait(adev);
if (err) {
-   dev_err(adev->dev,
-   

[PATCH v4 12/27] drm/amd: Use `amdgpu_ucode_load` helper for MES

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index dd8f35234507..df9efbca0f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1438,11 +1438,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device 
*adev, int pipe)
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
ucode_prefix,
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
-   r = request_firmware(>mes.fw[pipe], fw_name, adev->dev);
-   if (r)
-   goto out;
-
-   r = amdgpu_ucode_validate(adev->mes.fw[pipe]);
+   r = amdgpu_ucode_load(adev, >mes.fw[pipe], fw_name);
if (r)
goto out;
 
@@ -1482,7 +1478,6 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, 
int pipe)
}
 
return 0;
-
 out:
release_firmware(adev->mes.fw[pipe]);
adev->mes.fw[pipe] = NULL;
-- 
2.34.1



[PATCH v4 07/27] drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Mario Limonciello
Simplifies the code so that all SDMA versions will get the firmware
name from `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move out of IP discovery and instead simplify early_init
v2->v3:
 * Fix dGPU naming scheme
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c |  7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 47 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 30 +
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 55 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 25 +--
 6 files changed, 13 insertions(+), 155 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 9e85a078d918..83e8f0dae647 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -200,15 +200,18 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
 }
 
 int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-  char *fw_name, u32 instance,
-  bool duplicate)
+  u32 instance, bool duplicate)
 {
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
int err = 0, i;
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
+   char ucode_prefix[30];
+   char fw_name[40];
 
+   amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s%s.bin", ucode_prefix, 
!instance ? "" : "1");
err = amdgpu_ucode_load(adev, >sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 7d99205c2e01..2d16e6d36728 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -124,8 +124,8 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device 
*adev,
 int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
  struct amdgpu_irq_src *source,
  struct amdgpu_iv_entry *entry);
-int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-char *fw_name, u32 instance, bool duplicate);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+  bool duplicate);
 void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
 bool duplicate);
 void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4d780e4430e7..017ae298558e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -575,60 +575,17 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device 
*adev)
 // vega10 real chip need to use PSP to load firmware
 static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
 {
-   const char *chip_name;
-   char fw_name[30];
int ret, i;
 
-   DRM_DEBUG("\n");
-
-   switch (adev->ip_versions[SDMA0_HWIP][0]) {
-   case IP_VERSION(4, 0, 0):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(4, 0, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(4, 2, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(4, 1, 0):
-   case IP_VERSION(4, 1, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(4, 2, 2):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(4, 1, 2):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(4, 4, 0):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (i == 0)
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma.bin", chip_name);
-   else
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma%d.bin", chip_name, i);
if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
 adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) {
/* Acturus & Aldebaran will leverage the same FW 

[PATCH v4 09/27] drm/amd: Use `amdgpu_ucode_load` helper for VCN

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a23e26b272b4..6d9cb7fb67cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -206,19 +206,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
 
-   r = request_firmware(>vcn.fw, fw_name, adev->dev);
+   r = amdgpu_ucode_load(adev, >vcn.fw, fw_name);
if (r) {
-   dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
-   fw_name);
-   return r;
-   }
-
-   r = amdgpu_ucode_validate(adev->vcn.fw);
-   if (r) {
-   dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware 
\"%s\"\n",
-   fw_name);
-   release_firmware(adev->vcn.fw);
-   adev->vcn.fw = NULL;
+   if (adev->vcn.fw)
+   release_firmware(adev->vcn.fw);
return r;
}
 
-- 
2.34.1



[PATCH v4 10/27] drm/amd: Load VCN microcode during early_init

2023-01-03 Thread Mario Limonciello
Simplifies the code so that all VCN versions will get the firmware
name from `amdgpu_ucode_ip_version_decode` and then use this filename
to load microcode as part of the early_init process.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Move out of IP discovery and introduce early_init phase for VCN
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 94 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  1 +
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c   |  5 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c   |  5 +-
 7 files changed, 52 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 6d9cb7fb67cf..48fc9059c386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -36,25 +36,25 @@
 #include "soc15d.h"
 
 /* Firmware Names */
-#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
-#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
-#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
-#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
-#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
-#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
-#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
-#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
-#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
-#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
-#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
-#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
-#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
-#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
-#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
-#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
-#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
-#define FIRMWARE_VCN4_0_0  "amdgpu/vcn_4_0_0.bin"
-#define FIRMWARE_VCN4_0_2  "amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO   "amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2"amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS  "amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR"amdgpu/renoir_vcn.bin"
+#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin"
+#define FIRMWARE_NAVI10"amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14"amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12"amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID"amdgpu/sienna_cichlid_vcn.bin"
+#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin"
+#define FIRMWARE_VANGOGH   "amdgpu/vangogh_vcn.bin"
+#define FIRMWARE_DIMGREY_CAVEFISH  "amdgpu/dimgrey_cavefish_vcn.bin"
+#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
+#define FIRMWARE_BEIGE_GOBY"amdgpu/beige_goby_vcn.bin"
+#define FIRMWARE_YELLOW_CARP   "amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
+#define FIRMWARE_VCN4_0_0  "amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2  "amdgpu/vcn_4_0_2.bin"
 #define FIRMWARE_VCN4_0_4  "amdgpu/vcn_4_0_4.bin"
 
 MODULE_FIRMWARE(FIRMWARE_RAVEN);
@@ -80,10 +80,26 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
 
 static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
 
+int amdgpu_vcn_early_init(struct amdgpu_device *adev)
+{
+   char ucode_prefix[30];
+   char fw_name[40];
+   int r;
+
+   amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
+   r = amdgpu_ucode_load(adev, >vcn.fw, fw_name);
+   if (r) {
+   release_firmware(adev->vcn.fw);
+   adev->vcn.fw = NULL;
+   }
+
+   return r;
+}
+
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 {
unsigned long bo_size;
-   const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
unsigned int fw_shared_size, log_offset;
@@ -99,46 +115,27 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
switch (adev->ip_versions[UVD_HWIP][0]) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   fw_name = FIRMWARE_RAVEN2;
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   fw_name = FIRMWARE_PICASSO;
-   else
-   fw_name = FIRMWARE_RAVEN;
-   break;
case IP_VERSION(2, 5, 0):
-   fw_name = FIRMWARE_ARCTURUS;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags 

[PATCH v4 08/27] drm/amd: Make SDMA firmware load failures less noisy.

2023-01-03 Thread Mario Limonciello
When firmware is missing we get failures at every step.
```
[3.855086] amdgpu :04:00.0: Direct firmware load for 
amdgpu/green_sardine_sdma.bin failed with error -2
[3.855087] [drm:amdgpu_sdma_init_microcode [amdgpu]] *ERROR* SDMA: Failed 
to init firmware "amdgpu/green_sardine_sdma.bin"
[3.855398] [drm:sdma_v4_0_early_init [amdgpu]] *ERROR* Failed to load sdma 
firmware!
```
Realistically we don't need all of these, a user can tell from the first one
that request_firmware emitted what happened. Drop the others.

Signed-off-by: Mario Limonciello 
---
v3->v4:
* New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 83e8f0dae647..f052173ef1e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -277,10 +277,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
}
 
 out:
-   if (err) {
-   DRM_ERROR("SDMA: Failed to init firmware \"%s\"\n", fw_name);
+   if (err)
amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
-   }
return err;
 }
 
-- 
2.34.1



[PATCH v4 03/27] drm/amd: Convert SMUv11 microcode to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Mario Limonciello
Remove the special casing from SMU v11 code. No intended functional
changes.

Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
v3->v4:
 * No changes
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 35 ++-
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index ad66d57aa102..d4756bd30830 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -93,7 +93,7 @@ static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
 int smu_v11_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
+   char ucode_prefix[30];
char fw_name[SMU_FW_NAME_LEN];
int err = 0;
const struct smc_firmware_header_v1_0 *hdr;
@@ -105,38 +105,9 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
 (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(11, 0, 0):
-   chip_name = "navi10";
-   break;
-   case IP_VERSION(11, 0, 5):
-   chip_name = "navi14";
-   break;
-   case IP_VERSION(11, 0, 9):
-   chip_name = "navi12";
-   break;
-   case IP_VERSION(11, 0, 7):
-   chip_name = "sienna_cichlid";
-   break;
-   case IP_VERSION(11, 0, 11):
-   chip_name = "navy_flounder";
-   break;
-   case IP_VERSION(11, 0, 12):
-   chip_name = "dimgrey_cavefish";
-   break;
-   case IP_VERSION(11, 0, 13):
-   chip_name = "beige_goby";
-   break;
-   case IP_VERSION(11, 0, 2):
-   chip_name = "arcturus";
-   break;
-   default:
-   dev_err(adev->dev, "Unsupported IP version 0x%x\n",
-   adev->ip_versions[MP1_HWIP][0]);
-   return -EINVAL;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(>pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v4 06/27] drm/amd: Use `amdgpu_ucode_load` helper for SDMA

2023-01-03 Thread Mario Limonciello
The `amdgpu_ucode_load` helper will ensure that the return code for
missing firmware is -ENODEV so that early_init can fail.

Signed-off-by: Mario Limonciello 
---
v3->v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index ea5278f094c0..9e85a078d918 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -154,16 +154,11 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device 
*adev,
 
 static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
 {
-   int err = 0;
uint16_t version_major;
const struct common_firmware_header *header = NULL;
const struct sdma_firmware_header_v1_0 *hdr;
const struct sdma_firmware_header_v2_0 *hdr_v2;
 
-   err = amdgpu_ucode_validate(sdma_inst->fw);
-   if (err)
-   return err;
-
header = (const struct common_firmware_header *)
sdma_inst->fw->data;
version_major = le16_to_cpu(header->header_version_major);
@@ -214,7 +209,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
 
-   err = request_firmware(>sdma.instance[instance].fw, fw_name, 
adev->dev);
+   err = amdgpu_ucode_load(adev, >sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
 
-- 
2.34.1



[PATCH v4 02/27] drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"

2023-01-03 Thread Mario Limonciello
This will allow other parts of the driver that currently special
case firmware file names to before IP version style naming to just
have a single call to `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
v3->v4:
 * No changes
v2->v3:
 * Fixes for GFX9 SDMA
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 221 ++
 1 file changed, 221 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5cb62e6249c2..eafcddce58d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1059,12 +1059,233 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
return 0;
 }
 
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int 
block_type)
+{
+   if (block_type == MP0_HWIP) {
+   switch (adev->ip_versions[MP0_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   switch (adev->asic_type) {
+   case CHIP_VEGA10:
+   return "vega10";
+   case CHIP_VEGA12:
+   return "vega12";
+   default:
+   return NULL;
+   }
+   break;
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   if (adev->asic_type == CHIP_RAVEN) {
+   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+   return "raven2";
+   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+   return "picasso";
+   return "raven";
+   }
+   break;
+   case IP_VERSION(11, 0, 0):
+   return "navi10";
+   case IP_VERSION(11, 0, 2):
+   return "vega20";
+   case IP_VERSION(11, 0, 4):
+   return "arcturus";
+   case IP_VERSION(11, 0, 5):
+   return "navi14";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid";
+   case IP_VERSION(11, 0, 9):
+   return "navi12";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby";
+   case IP_VERSION(11, 5, 0):
+   return "vangogh";
+   case IP_VERSION(12, 0, 1):
+   if (adev->asic_type == CHIP_RENOIR) {
+   if (adev->apu_flags & AMD_APU_IS_RENOIR)
+   return "renoir";
+   return "green_sardine";
+   }
+   break;
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran";
+   case IP_VERSION(13, 0, 1):
+   case IP_VERSION(13, 0, 3):
+   return "yellow_carp";
+   }
+   } else if (block_type == MP1_HWIP) {
+   switch (adev->ip_versions[MP1_HWIP][0]) {
+   case IP_VERSION(9, 0, 0):
+   case IP_VERSION(10, 0, 0):
+   case IP_VERSION(10, 0, 1):
+   case IP_VERSION(11, 0, 2):
+   if (adev->asic_type == CHIP_ARCTURUS)
+   return "arcturus_smc";
+   return NULL;
+   case IP_VERSION(11, 0, 0):
+   return "navi10_smc";
+   case IP_VERSION(11, 0, 5):
+   return "navi14_smc";
+   case IP_VERSION(11, 0, 9):
+   return "navi12_smc";
+   case IP_VERSION(11, 0, 7):
+   return "sienna_cichlid_smc";
+   case IP_VERSION(11, 0, 11):
+   return "navy_flounder_smc";
+   case IP_VERSION(11, 0, 12):
+   return "dimgrey_cavefish_smc";
+   case IP_VERSION(11, 0, 13):
+   return "beige_goby_smc";
+   case IP_VERSION(13, 0, 2):
+   return "aldebaran_smc";
+   }
+   } else if (block_type == SDMA0_HWIP) {
+   switch (adev->ip_versions[SDMA0_HWIP][0]) {
+   case IP_VERSION(4, 0, 0):
+   return "vega10_sdma";
+   case IP_VERSION(4, 0, 1):
+   return "vega12_sdma";
+   case IP_VERSION(4, 1, 0):
+   case IP_VERSION(4, 1, 1):
+   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+   return 

[PATCH v4 05/27] drm/amd: Add a new helper for loading/validating microcode

2023-01-03 Thread Mario Limonciello
All microcode runs a basic validation after it's been loaded. Each
IP block as part of init will run both.

Introduce a wrapper for request_firmware and amdgpu_ucode_validate.
This wrapper will also remap any error codes from request_firmware
to -ENODEV.  This is so that early_init will fail if firmware couldn't
be loaded instead of the IP block being disabled.

Signed-off-by: Mario Limonciello 
---
v3-v4:
 * New patch
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 24 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index eafcddce58d3..8c4a7b09e344 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1312,3 +1312,27 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device 
*adev, int block_type,
 
snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
 }
+
+/*
+ * amdgpu_ucode_load - Load and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name)
+{
+   int err = request_firmware(fw, fw_name, adev->dev);
+
+   if (err)
+   return -ENODEV;
+   err = amdgpu_ucode_validate(*fw);
+   if (err)
+   dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+
+   return err;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 552e06929229..b9139fb44506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -544,6 +544,7 @@ void amdgpu_ucode_print_sdma_hdr(const struct 
common_firmware_header *hdr);
 void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
 int amdgpu_ucode_validate(const struct firmware *fw);
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name);
 bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
 
-- 
2.34.1



[PATCH v4 04/27] drm/amd: Convert SMUv13 microcode to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Mario Limonciello
The special case for the one dGPU has been moved into
`amdgpu_ucode_ip_version_decode`, so simplify this code.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
Acked-by: Christian König 
---
v3->v4:
 * No changes
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 0ac9cac805f9..506a49a4b425 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -88,7 +88,6 @@ static const int link_speed[] = {25, 50, 80, 160};
 int smu_v13_0_init_microcode(struct smu_context *smu)
 {
struct amdgpu_device *adev = smu->adev;
-   const char *chip_name;
char fw_name[30];
char ucode_prefix[30];
int err = 0;
@@ -100,16 +99,9 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
if (amdgpu_sriov_vf(adev))
return 0;
 
-   switch (adev->ip_versions[MP1_HWIP][0]) {
-   case IP_VERSION(13, 0, 2):
-   chip_name = "aldebaran_smc";
-   break;
-   default:
-   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
-   chip_name = ucode_prefix;
-   }
+   amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 
sizeof(ucode_prefix));
 
-   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name);
+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
 
err = request_firmware(>pm.fw, fw_name, adev->dev);
if (err)
-- 
2.34.1



[PATCH v4 01/27] drm/amd: Delay removal of the firmware framebuffer

2023-01-03 Thread Mario Limonciello
Removing the firmware framebuffer from the driver means that even
if the driver doesn't support the IP blocks in a GPU it will no
longer be functional after the driver fails to initialize.

This change will ensure that unsupported IP blocks at least cause
the driver to work with the EFI framebuffer.

Cc: sta...@vger.kernel.org
Suggested-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
v3->v4:
 * Drop all R-b/A-b tags.
 * Move to after early IP init instead
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c| 6 --
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9a1a5c2864a0..cdb681398a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -37,6 +37,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT 2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) 
== -EINVAL)
 
+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
@@ -3685,6 +3688,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
 
+   /* Get rid of things like offb */
+   r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, 
_kms_driver);
+   if (r)
+   return r;
+
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index db7e34eacc35..b9f14ec9edb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,7 +23,6 @@
  */
 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -2096,11 +2095,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
 #endif
 
-   /* Get rid of things like offb */
-   ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, 
_kms_driver);
-   if (ret)
-   return ret;
-
adev = devm_drm_dev_alloc(>dev, _kms_driver, 
typeof(*adev), ddev);
if (IS_ERR(adev))
return PTR_ERR(adev);
-- 
2.34.1



[PATCH v4 00/27] Recover from failure to probe GPU

2023-01-03 Thread Mario Limonciello
One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`

This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 22.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 22.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.

This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.

To help the situation make changes to GPU discovery:
1) Delay releasing the firmware framebuffer until after early_init
completed.  This will help the situation of an older kernel that doesn't
yet support the IP blocks probing a new GPU. IP discovery will have failed.
2) Request loading all PSP, VCN, SDMA, SMU, DMCUB, MES and GC microcode
into memory during early_init. This will help the situation of new enough
kernel for the IP discovery phase to otherwise pass but missing microcode
from linux-firmware.git.

v3->v4:
 * Rework to delay framebuffer release until early_init is done
 * Make individual IPs load microcode during early init phase
 * Add SMU and DMCUB cases for early_init loading
 * Add some new helper code for wrapping request_firmware calls (needed for
   early_init to return something besides -ENOENT)
v2->v3:
 * Pick up tags for patches 1-10
 * Rework patch 11 to not validate during discovery
 * Fix bugs with GFX9 due to gfx.num_gfx_rings not being set during discovery
 * Fix naming scheme for SDMA on dGPUs
v1->v2:
 * Take the suggestion from v1 thread to delay the framebuffer release until
   ip discovery is done. This patch is CC to stable to that older stable
   kernels with IP discovery won't try to probe unknown IP.
 * Drop changes to drm aperature.
 * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.
Mario Limonciello (27):
  drm/amd: Delay removal of the firmware framebuffer
  drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
  drm/amd: Convert SMUv11 microcode to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Convert SMUv13 microcode to use
`amdgpu_ucode_ip_version_decode`
  drm/amd: Add a new helper for loading/validating microcode
  drm/amd: Use `amdgpu_ucode_load` helper for SDMA
  drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`
  drm/amd: Make SDMA firmware load failures less noisy.
  drm/amd: Use `amdgpu_ucode_load` helper for VCN
  drm/amd: Load VCN microcode during early_init
  drm/amd: Load MES microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for MES
  drm/amd: Remove superfluous assignment for `adev->mes.adev`
  drm/amd: Use `amdgpu_ucode_load` helper for GFX9
  drm/amd: Load GFX9 microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for GFX10
  drm/amd: Load GFX10 microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for GFX11
  drm/amd: Load GFX11 microcode during early_init
  drm/amd: Parse both v1 and v2 TA microcode headers using same function
  drm/amd: Avoid BUG() for case of SRIOV missing IP version
  drm/amd: Load PSP microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for PSP
  drm/amd/display: Load DMUB microcode during early_init
  drm/amd: Use `amdgpu_ucode_load` helper for SMU
  drm/amd: Load SMU microcode during early_init
  drm/amd: Optimize SRIOV switch/case for PSP microcode load

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  60 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 276 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h   |  15 +-
 

Re: [PATCH v2] drm/amdgpu: Retry DDC probing on DVI on failure if we got an HPD interrupt

2023-01-03 Thread Alex Deucher
On Fri, Dec 23, 2022 at 9:23 AM xurui  wrote:
>
> HPD signals on DVI ports can be fired off before the pins required for
> DDC probing actually make contact, due to the pins for HPD making
> contact first. This results in a HPD signal being asserted but DDC
> probing failing, resulting in hotplugging occasionally failing.
>
> Rescheduling the hotplug work for a second when we run into an HPD
> signal with a failing DDC probe usually gives enough time for the rest
> of the connector's pins to make contact, and fixes this issue.
>
> Signed-off-by: xurui 
> Reported-by: kernel test robot

Applied.  Thanks!

Alex


> ---
> V1 -> V2: Fixed a compilation error
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_connectors.c| 22 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h  |  1 +
>  drivers/gpu/drm/amd/amdgpu/dce_v10_0.c|  6 ++---
>  drivers/gpu/drm/amd/amdgpu/dce_v11_0.c|  6 ++---
>  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c |  6 ++---
>  drivers/gpu/drm/amd/amdgpu/dce_v8_0.c |  6 ++---
>  8 files changed, 36 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6b74df446694..b1d901fe578e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -870,7 +870,7 @@ struct amdgpu_device {
> struct amdgpu_vkms_output   *amdgpu_vkms_output;
> struct amdgpu_mode_info mode_info;
> /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
> -   struct work_struct  hotplug_work;
> +   struct delayed_work hotplug_work;
> struct amdgpu_irq_src   crtc_irq;
> struct amdgpu_irq_src   vline0_irq;
> struct amdgpu_irq_src   vupdate_irq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
> index 2ebbc6382a06..d2abd334b1b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
> @@ -996,13 +996,33 @@ amdgpu_connector_dvi_detect(struct drm_connector 
> *connector, bool force)
> }
> }
>
> +   if (amdgpu_connector->detected_hpd_without_ddc) {
> +   force = true;
> +   amdgpu_connector->detected_hpd_without_ddc = false;
> +   }
> +
> if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) 
> {
> ret = connector->status;
> goto exit;
> }
>
> -   if (amdgpu_connector->ddc_bus)
> +   if (amdgpu_connector->ddc_bus) {
> dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
> +
> +   /* Sometimes the pins required for the DDC probe on DVI
> +* connectors don't make contact at the same time that the 
> ones
> +* for HPD do. If the DDC probe fails even though we had an 
> HPD
> +* signal, try again later
> +*/
> +   if (!dret && !force &&
> +   amdgpu_display_hpd_sense(adev, 
> amdgpu_connector->hpd.hpd)) {
> +   DRM_DEBUG_KMS("hpd detected without ddc, retrying in 
> 1 second\n");
> +   amdgpu_connector->detected_hpd_without_ddc = true;
> +   schedule_delayed_work(>hotplug_work,
> + msecs_to_jiffies(1000));
> +   goto exit;
> +   }
> +   }
> if (dret) {
> amdgpu_connector->detected_by_load = false;
> amdgpu_connector_free_edid(connector);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index b22471b3bd63..a876648e3d7a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -63,7 +63,7 @@
>  void amdgpu_display_hotplug_work_func(struct work_struct *work)
>  {
> struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
> - hotplug_work);
> + hotplug_work.work);
> struct drm_device *dev = adev_to_drm(adev);
> struct drm_mode_config *mode_config = >mode_config;
> struct drm_connector *connector;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> index 8a39300b1a84..93c73faa5714 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
> @@ -534,6 +534,7 @@ struct amdgpu_connector {
> void *con_priv;
> bool dac_load_detect;
> bool detected_by_load; /* if the connection status was determined by 
> load */
> +   bool 

Re: [syzbot] WARNING: locking bug in inet_autobind

2023-01-03 Thread Waiman Long

On 1/3/23 10:39, Felix Kuehling wrote:
The regression point doesn't make sense. The kernel config doesn't 
enable CONFIG_DRM_AMDGPU, so there is no way that a change in AMDGPU 
could have caused this regression.


I agree. It is likely a pre-existing problem or caused by another commit 
that got triggered because of the change in cacheline alignment caused 
by commit c0d9271ecbd ("drm/amdgpu: Delete user queue doorbell variable").


Cheers,
Longman



Regards,
  Felix


Am 2022-12-29 um 01:26 schrieb syzbot:

syzbot has found a reproducer for the following issue on:

HEAD commit:    1b929c02afd3 Linux 6.2-rc1
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=145c6a6848
kernel config: 
https://syzkaller.appspot.com/x/.config?x=2651619a26b4d687
dashboard link: 
https://syzkaller.appspot.com/bug?extid=94cc2a66fc228b23f360
compiler:   gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU 
Binutils for Debian) 2.35.2

syz repro: https://syzkaller.appspot.com/x/repro.syz?x=13e13e3248
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=13790f0848

Downloadable assets:
disk image: 
https://storage.googleapis.com/syzbot-assets/d1849f1ca322/disk-1b929c02.raw.xz
vmlinux: 
https://storage.googleapis.com/syzbot-assets/924cb8aa4ada/vmlinux-1b929c02.xz
kernel image: 
https://storage.googleapis.com/syzbot-assets/8c7330dae0a0/bzImage-1b929c02.xz


The issue was bisected to:

commit c0d9271ecbd891cdeb0fad1edcdd99ee717a655f
Author: Yong Zhao 
Date:   Fri Feb 1 23:36:21 2019 +

 drm/amdgpu: Delete user queue doorbell variables

bisection log: 
https://syzkaller.appspot.com/x/bisect.txt?x=1433ece4a0

final oops: https://syzkaller.appspot.com/x/report.txt?x=1633ece4a0
console output: https://syzkaller.appspot.com/x/log.txt?x=1233ece4a0

IMPORTANT: if you fix the issue, please add the following tag to the 
commit:

Reported-by: syzbot+94cc2a66fc228b23f...@syzkaller.appspotmail.com
Fixes: c0d9271ecbd8 ("drm/amdgpu: Delete user queue doorbell variables")

[ cut here ]
Looking for class "l2tp_sock" with key l2tp_socket_class, but found a 
different class "slock-AF_INET6" with the same key
WARNING: CPU: 0 PID: 7280 at kernel/locking/lockdep.c:937 
look_up_lock_class+0x97/0x110 kernel/locking/lockdep.c:937

Modules linked in:
CPU: 0 PID: 7280 Comm: syz-executor835 Not tainted 
6.2.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, 
BIOS Google 10/26/2022

RIP: 0010:look_up_lock_class+0x97/0x110 kernel/locking/lockdep.c:937
Code: 17 48 81 fa e0 e5 f6 8f 74 59 80 3d 5d bc 57 04 00 75 50 48 c7 
c7 00 4d 4c 8a 48 89 04 24 c6 05 49 bc 57 04 01 e8 a9 42 b9 ff <0f> 
0b 48 8b 04 24 eb 31 9c 5a 80 e6 02 74 95 e8 45 38 02 fa 85 c0

RSP: 0018:c9000b5378b8 EFLAGS: 00010082
RAX:  RBX: 91c06a00 RCX: 
RDX: 8880292d RSI: 8166721c RDI: f520016a6f09
RBP:  R08: 0005 R09: 
R10: 8201 R11: 20676e696b6f6f4c R12: 
R13: 88802a5820b0 R14:  R15: 
FS:  7f1fd7a97700() GS:8880b980() 
knlGS:

CS:  0010 DS:  ES:  CR0: 80050033
CR2: 2100 CR3: 78ab4000 CR4: 003506f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
  
  register_lock_class+0xbe/0x1120 kernel/locking/lockdep.c:1289
  __lock_acquire+0x109/0x56d0 kernel/locking/lockdep.c:4934
  lock_acquire kernel/locking/lockdep.c:5668 [inline]
  lock_acquire+0x1e3/0x630 kernel/locking/lockdep.c:5633
  __raw_spin_lock_bh include/linux/spinlock_api_smp.h:126 [inline]
  _raw_spin_lock_bh+0x33/0x40 kernel/locking/spinlock.c:178
  spin_lock_bh include/linux/spinlock.h:355 [inline]
  lock_sock_nested+0x5f/0xf0 net/core/sock.c:3473
  lock_sock include/net/sock.h:1725 [inline]
  inet_autobind+0x1a/0x190 net/ipv4/af_inet.c:177
  inet_send_prepare net/ipv4/af_inet.c:813 [inline]
  inet_send_prepare+0x325/0x4e0 net/ipv4/af_inet.c:807
  inet6_sendmsg+0x43/0xe0 net/ipv6/af_inet6.c:655
  sock_sendmsg_nosec net/socket.c:714 [inline]
  sock_sendmsg+0xd3/0x120 net/socket.c:734
  __sys_sendto+0x23a/0x340 net/socket.c:2117
  __do_sys_sendto net/socket.c:2129 [inline]
  __se_sys_sendto net/socket.c:2125 [inline]
  __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125
  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
  do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
  entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f1fd78538b9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 15 00 00 90 48 89 f8 48 
89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 
3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48

RSP: 002b:7f1fd7a971f8 EFLAGS: 0212 ORIG_RAX: 002c
RAX: ffda RBX: 7f1fd78f0038 RCX: 7f1fd78538b9
RDX: 

[PATCH 4.19 0/1] drm/amdkfd: Check for null pointer after calling kmemdup

2023-01-03 Thread Dragos-Marian Panait
The following commit is needed to fix CVE-2022-3108:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=abfaf0eee97925905e742aa3b0b72e04a918fa9e

Jiasheng Jiang (1):
  drm/amdkfd: Check for null pointer after calling kmemdup

 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +++
 1 file changed, 3 insertions(+)


base-commit: c652c812211c7a427d16be1d3f904eb02eb4265f
-- 
2.38.1



[PATCH 4.19 1/1] drm/amdkfd: Check for null pointer after calling kmemdup

2023-01-03 Thread Dragos-Marian Panait
From: Jiasheng Jiang 

[ Upstream commit abfaf0eee97925905e742aa3b0b72e04a918fa9e ]

As the possible failure of the allocation, kmemdup() may return NULL
pointer.
Therefore, it should be better to check the 'props2' in order to prevent
the dereference of NULL pointer.

Fixes: 3a87177eb141 ("drm/amdkfd: Add topology support for dGPUs")
Signed-off-by: Jiasheng Jiang 
Reviewed-by: Felix Kuehling 
Signed-off-by: Felix Kuehling 
Signed-off-by: Alex Deucher 
Signed-off-by: Dragos-Marian Panait 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index e2780643f4c3..b05ca3e639b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -397,6 +397,9 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
return -ENODEV;
/* same everything but the other direction */
props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+   if (!props2)
+   return -ENOMEM;
+
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
-- 
2.38.1



Re: [PATCH v3 2/3] drm/amd/display: Report to ACPI video if no panels were found

2023-01-03 Thread Harry Wentland
On 12/8/22 11:42, Mario Limonciello wrote:
> On desktop APUs amdgpu doesn't create a native backlight device
> as no eDP panels are found.  However if the BIOS has reported
> backlight control methods in the ACPI tables then an acpi_video0
> backlight device will be made 8 seconds after boot.
> 
> This has manifested in a power slider on a number of desktop APUs
> ranging from Ryzen 5000 through Ryzen 7000 on various motherboard
> manufacturers. To avoid this, report to the acpi video detection
> that the system does not have any panel connected in the native
> driver.
> 
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786
> Reported-by: Hans de Goede 
> Signed-off-by: Mario Limonciello 
> Reviewed-by: Hans de Goede 

Reviewed-by: Harry Wentland 

Harry

> ---
> v2->v3:
>  * Add Hans' R-b
> v1->v2:
>  * No changes
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 512c32327eb1..b73f61ac5dd5 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -4371,6 +4371,10 @@ static int amdgpu_dm_initialize_drm_device(struct 
> amdgpu_device *adev)
>   amdgpu_set_panel_orientation(>base);
>   }
>  
> + /* If we didn't find a panel, notify the acpi video detection */
> + if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0)
> + acpi_video_report_nolcd();
> +
>   /* Software is initialized. Now we can register interrupt handlers. */
>   switch (adev->asic_type) {
>  #if defined(CONFIG_DRM_AMD_DC_SI)



Re: [PATCH] drm/amdkfd: simplify cases

2023-01-03 Thread Felix Kuehling

Am 2022-12-27 um 12:12 schrieb Alex Deucher:

On Tue, Dec 27, 2022 at 12:10 PM Alex Deucher  wrote:

A number of of the gfx8 cases where the same.  Clean them
up.

typos here fixed up locally.

Alex


Signed-off-by: Alex Deucher 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 ---
  1 file changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 091fc2bb8ce5..521dfa88aad8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -263,23 +263,12 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, 
bool vf)
 f2g = _v8_kfd2kgd;
 break;
 case CHIP_FIJI:
-   gfx_target_version = 80003;
-   f2g = _v8_kfd2kgd;
-   break;
 case CHIP_POLARIS10:
 gfx_target_version = 80003;
 f2g = _v8_kfd2kgd;
 break;
 case CHIP_POLARIS11:
-   gfx_target_version = 80003;
-   if (!vf)
-   f2g = _v8_kfd2kgd;
-   break;
 case CHIP_POLARIS12:
-   gfx_target_version = 80003;
-   if (!vf)
-   f2g = _v8_kfd2kgd;
-   break;
 case CHIP_VEGAM:
 gfx_target_version = 80003;
 if (!vf)
--
2.38.1



Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Felix Kuehling
I think at some point ROCr could start using libdrm APIs for memory 
management and user mode queues on kernels and GPUs that support this. I 
think more work is required on the memory management side, though. ROCr 
would fallback to libhsakmt on older kernels and older GPUs (pre-GFX11).


Regards,
  Felix


Am 2023-01-03 um 14:52 schrieb Alex Deucher:

On Tue, Jan 3, 2023 at 2:25 PM Liu, Shaoyun  wrote:

[AMD Official Use Only - General]

What about the existing rocm apps that already use the  hsakmt APIs for user 
queue ?

We'd have to keep both APIs around for existing chips for backwards
compatibility.

Alex


Shaoyun.liu

-Original Message-
From: Alex Deucher 
Sent: Tuesday, January 3, 2023 2:22 PM
To: Liu, Shaoyun 
Cc: Kuehling, Felix ; Sharma, Shashank ; 
amd-gfx@lists.freedesktop.org; Deucher, Alexander ; Koenig, Christian 
; Yadav, Arvind ; Paneer Selvam, Arunpravin 

Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun  wrote:

[AMD Official Use Only - General]

Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API for 
both  graphic and compute  ?

Yeah, that is the eventual goal, hence the flag for AQL vs PM4.

Alex


Regards
Shaoyun.liu

-Original Message-
From: amd-gfx  On Behalf Of
Felix Kuehling
Sent: Tuesday, January 3, 2023 1:30 PM
To: Sharma, Shashank ;
amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian
; Yadav, Arvind ;
Paneer Selvam, Arunpravin 
Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

Am 2022-12-23 um 14:36 schrieb Shashank Sharma:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics queue.
The userspace app will fill this structure and request the graphics
driver to add a graphics work queue for it. The output of this UAPI
is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
   include/uapi/drm/amdgpu_drm.h | 52 +++
   1 file changed, 52 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h
b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
   #define DRM_AMDGPU_VM   0x13
   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
   #define DRM_AMDGPU_SCHED0x15
+#define DRM_AMDGPU_USERQ 0x16

   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ   DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

   /**
* DOC: memory domains
@@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
   union drm_amdgpu_ctx_out out;
   };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE   1
+#define AMDGPU_USERQ_OP_FREE 2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE(1 << 0)

What does "secure" mean here? I don't see this flag referenced anywhere in the 
rest of the patch series.

Regards,
Felix



+#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+ /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+ __u32   flags;
+ /** IP type: AMDGPU_HW_IP_* */
+ __u32   ip_type;
+ /** GEM object handle */
+ __u32   doorbell_handle;
+ /** Doorbell offset in dwords */
+ __u32   doorbell_offset;
+ /** GPU virtual address of the queue */
+ __u64   queue_va;
+ /** Size of the queue in bytes */
+ __u64   queue_size;
+ /** GPU virtual address of the rptr */
+ __u64   rptr_va;
+ /** GPU virtual address of the wptr */
+ __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+ /** AMDGPU_USERQ_OP_* */
+ __u32   op;
+ /** Flags */
+ __u32   flags;
+ /** Context handle to associate the queue with */
+ __u32   ctx_id;
+ __u32   pad;
+ /** Queue descriptor */
+ struct drm_amdgpu_userq_mqd mqd; };
+
+struct drm_amdgpu_userq_out {
+ /** Queue handle */
+ __u32   q_id;
+ /** Flags */
+ __u32   flags;
+};
+
+union drm_amdgpu_userq {
+ struct drm_amdgpu_userq_in in;
+ struct drm_amdgpu_userq_out out; };
+
   /* vm ioctl */
   #define AMDGPU_VM_OP_RESERVE_VMID   1
   #define AMDGPU_VM_OP_UNRESERVE_VMID 2


Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Alex Deucher
On Tue, Jan 3, 2023 at 2:25 PM Liu, Shaoyun  wrote:
>
> [AMD Official Use Only - General]
>
> What about the existing rocm apps that already use the  hsakmt APIs for user 
> queue ?

We'd have to keep both APIs around for existing chips for backwards
compatibility.

Alex

>
> Shaoyun.liu
>
> -Original Message-
> From: Alex Deucher 
> Sent: Tuesday, January 3, 2023 2:22 PM
> To: Liu, Shaoyun 
> Cc: Kuehling, Felix ; Sharma, Shashank 
> ; amd-gfx@lists.freedesktop.org; Deucher, Alexander 
> ; Koenig, Christian ; 
> Yadav, Arvind ; Paneer Selvam, Arunpravin 
> 
> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>
> On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun  wrote:
> >
> > [AMD Official Use Only - General]
> >
> > Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API 
> > for both  graphic and compute  ?
>
> Yeah, that is the eventual goal, hence the flag for AQL vs PM4.
>
> Alex
>
> >
> > Regards
> > Shaoyun.liu
> >
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Felix Kuehling
> > Sent: Tuesday, January 3, 2023 1:30 PM
> > To: Sharma, Shashank ;
> > amd-gfx@lists.freedesktop.org
> > Cc: Deucher, Alexander ; Koenig, Christian
> > ; Yadav, Arvind ;
> > Paneer Selvam, Arunpravin 
> > Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
> >
> > Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > > From: Alex Deucher 
> > >
> > > This patch intorduces new UAPI/IOCTL for usermode graphics queue.
> > > The userspace app will fill this structure and request the graphics
> > > driver to add a graphics work queue for it. The output of this UAPI
> > > is a queue id.
> > >
> > > This UAPI maps the queue into GPU, so the graphics app can start
> > > submitting work to the queue as soon as the call returns.
> > >
> > > Cc: Alex Deucher 
> > > Cc: Christian Koenig 
> > > Signed-off-by: Alex Deucher 
> > > Signed-off-by: Shashank Sharma 
> > > ---
> > >   include/uapi/drm/amdgpu_drm.h | 52 +++
> > >   1 file changed, 52 insertions(+)
> > >
> > > diff --git a/include/uapi/drm/amdgpu_drm.h
> > > b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> > > 100644
> > > --- a/include/uapi/drm/amdgpu_drm.h
> > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > @@ -54,6 +54,7 @@ extern "C" {
> > >   #define DRM_AMDGPU_VM   0x13
> > >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> > >   #define DRM_AMDGPU_SCHED0x15
> > > +#define DRM_AMDGPU_USERQ 0x16
> > >
> > >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
> > > DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> > >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + 
> > > DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > > @@ -71,6 +72,7 @@ extern "C" {
> > >   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
> > > DRM_AMDGPU_VM, union drm_amdgpu_vm)
> > >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> > > DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> > >   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> > > DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > > +#define DRM_IOCTL_AMDGPU_USERQ   DRM_IOW(DRM_COMMAND_BASE + 
> > > DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> > >
> > >   /**
> > >* DOC: memory domains
> > > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> > >   union drm_amdgpu_ctx_out out;
> > >   };
> > >
> > > +/* user queue IOCTL */
> > > +#define AMDGPU_USERQ_OP_CREATE   1
> > > +#define AMDGPU_USERQ_OP_FREE 2
> > > +
> > > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE(1 << 0)
> >
> > What does "secure" mean here? I don't see this flag referenced anywhere in 
> > the rest of the patch series.
> >
> > Regards,
> >Felix
> >
> >
> > > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > > +
> > > +struct drm_amdgpu_userq_mqd {
> > > + /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > > + __u32   flags;
> > > + /** IP type: AMDGPU_HW_IP_* */
> > > + __u32   ip_type;
> > > + /** GEM object handle */
> > > + __u32   doorbell_handle;
> > > + /** Doorbell offset in dwords */
> > > + __u32   doorbell_offset;
> > > + /** GPU virtual address of the queue */
> > > + __u64   queue_va;
> > > + /** Size of the queue in bytes */
> > > + __u64   queue_size;
> > > + /** GPU virtual address of the rptr */
> > > + __u64   rptr_va;
> > > + /** GPU virtual address of the wptr */
> > > + __u64   wptr_va;
> > > +};
> > > +
> > > +struct drm_amdgpu_userq_in {
> > > + /** AMDGPU_USERQ_OP_* */
> > > + __u32   op;
> > > + /** Flags */
> > > + __u32   flags;
> > > + /** Context handle to associate the queue with */
> > > + __u32   ctx_id;
> > > + __u32   pad;
> > > + /** Queue descriptor */
> > > + struct drm_amdgpu_userq_mqd mqd; };
> > > +
> > > +struct drm_amdgpu_userq_out {
> > > + /** Queue 

Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Alex Deucher
On Mon, Jan 2, 2023 at 6:27 AM Christian König
 wrote:
>
> Am 27.12.22 um 17:58 schrieb Alex Deucher:
> > On Sat, Dec 24, 2022 at 3:21 PM Bas Nieuwenhuizen
> >  wrote:
> >> On Fri, Dec 23, 2022 at 8:37 PM Shashank Sharma  
> >> wrote:
> >>> From: Alex Deucher 
> >>>
> >>> This patch intorduces new UAPI/IOCTL for usermode graphics
> >>> queue. The userspace app will fill this structure and request
> >>> the graphics driver to add a graphics work queue for it. The
> >>> output of this UAPI is a queue id.
> >>>
> >>> This UAPI maps the queue into GPU, so the graphics app can start
> >>> submitting work to the queue as soon as the call returns.
> >>>
> >>> Cc: Alex Deucher 
> >>> Cc: Christian Koenig 
> >>> Signed-off-by: Alex Deucher 
> >>> Signed-off-by: Shashank Sharma 
> >>> ---
> >>>   include/uapi/drm/amdgpu_drm.h | 52 +++
> >>>   1 file changed, 52 insertions(+)
> >>>
> >>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >>> index 0d93ec132ebb..a3d0dd6f62c5 100644
> >>> --- a/include/uapi/drm/amdgpu_drm.h
> >>> +++ b/include/uapi/drm/amdgpu_drm.h
> >>> @@ -54,6 +54,7 @@ extern "C" {
> >>>   #define DRM_AMDGPU_VM  0x13
> >>>   #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
> >>>   #define DRM_AMDGPU_SCHED   0x15
> >>> +#define DRM_AMDGPU_USERQ   0x16
> >>>
> >>>   #define DRM_IOCTL_AMDGPU_GEM_CREATEDRM_IOWR(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >>>   #define DRM_IOCTL_AMDGPU_GEM_MMAP  DRM_IOWR(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >>> @@ -71,6 +72,7 @@ extern "C" {
> >>>   #define DRM_IOCTL_AMDGPU_VMDRM_IOWR(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >>>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >>>   #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >>> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >>>
> >>>   /**
> >>>* DOC: memory domains
> >>> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >>>  union drm_amdgpu_ctx_out out;
> >>>   };
> >>>
> >>> +/* user queue IOCTL */
> >>> +#define AMDGPU_USERQ_OP_CREATE 1
> >>> +#define AMDGPU_USERQ_OP_FREE   2
> >>> +
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> >>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
> >> Can we document what AQL means here?
> > AQL is the packet format used by KFD/ROCm.  The idea is to be able to
> > create queues that support either format (AQL or PM4).
>
> Could we make that a separate queue type? E.g. like SDMA, GFX, Compute?
>
> It's not really a flag which can be applied independent of the queue.

I guess so, but the IP types we already expose don't different queue types:
#define AMDGPU_HW_IP_GFX  0
#define AMDGPU_HW_IP_COMPUTE  1
#define AMDGPU_HW_IP_DMA  2
#define AMDGPU_HW_IP_UVD  3
#define AMDGPU_HW_IP_VCE  4
#define AMDGPU_HW_IP_UVD_ENC  5
#define AMDGPU_HW_IP_VCN_DEC  6
/*
 * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support
 * both encoding and decoding jobs.
 */
#define AMDGPU_HW_IP_VCN_ENC  7
#define AMDGPU_HW_IP_VCN_JPEG 8
#define AMDGPU_HW_IP_NUM  9

I suppose we could add a new AMDGPU_HW_IP_COMPUTE_AQL.

Alex

>
> Regards,
> Christian.
>
> >
> >>
> >>> +
> >>> +struct drm_amdgpu_userq_mqd {
> >>> +   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> >>> +   __u32   flags;
> >>> +   /** IP type: AMDGPU_HW_IP_* */
> >>> +   __u32   ip_type;
> >>> +   /** GEM object handle */
> >>> +   __u32   doorbell_handle;
> >>> +   /** Doorbell offset in dwords */
> >>> +   __u32   doorbell_offset;
> >> What are the doorbell handle/offset for? I don't see any of them used
> >> in the rest of the series (we only check the handle isn't 0, which
> >> isn't enough validation for a GEM handle to consider it valid), and
> >> the kernel seems to allocate some kind of doorbell index in patch 4.
> >> Does userspace need to know about that one? (similarly use_doorbell in
> >> that patch seems like it is never explicitly written to)
> > The doorbell is how you trigger the engine to start processing the
> > user queue.  The idea is that each user process allocates a page of
> > doorbell space (one of the PCI BARs) and then each 64 bit segment in
> > that page could be used for a user mode queue.  So the UMD writes its
> > data to the queue, updates the wptr, and then writes to the doorbell
> > to tell the firmware to start processing the queue.
> >
> >> The other questions I have are about how this interacts with memory
> >> management. Does this have access to all BOs allocated with
> >> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID? What about imported BOs? How does
> >> this interact with VA 

RE: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Liu, Shaoyun
[AMD Official Use Only - General]

What about the existing rocm apps that already use the  hsakmt APIs for user 
queue ?

Shaoyun.liu

-Original Message-
From: Alex Deucher 
Sent: Tuesday, January 3, 2023 2:22 PM
To: Liu, Shaoyun 
Cc: Kuehling, Felix ; Sharma, Shashank 
; amd-gfx@lists.freedesktop.org; Deucher, Alexander 
; Koenig, Christian ; 
Yadav, Arvind ; Paneer Selvam, Arunpravin 

Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun  wrote:
>
> [AMD Official Use Only - General]
>
> Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API 
> for both  graphic and compute  ?

Yeah, that is the eventual goal, hence the flag for AQL vs PM4.

Alex

>
> Regards
> Shaoyun.liu
>
> -Original Message-
> From: amd-gfx  On Behalf Of
> Felix Kuehling
> Sent: Tuesday, January 3, 2023 1:30 PM
> To: Sharma, Shashank ;
> amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, Christian
> ; Yadav, Arvind ;
> Paneer Selvam, Arunpravin 
> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>
> Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > From: Alex Deucher 
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics queue.
> > The userspace app will fill this structure and request the graphics
> > driver to add a graphics work queue for it. The output of this UAPI
> > is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher 
> > Cc: Christian Koenig 
> > Signed-off-by: Alex Deucher 
> > Signed-off-by: Shashank Sharma 
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> > 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM   0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED0x15
> > +#define DRM_AMDGPU_USERQ 0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ   DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >* DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >   union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE   1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE(1 << 0)
>
> What does "secure" mean here? I don't see this flag referenced anywhere in 
> the rest of the patch series.
>
> Regards,
>Felix
>
>
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > + /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > + __u32   flags;
> > + /** IP type: AMDGPU_HW_IP_* */
> > + __u32   ip_type;
> > + /** GEM object handle */
> > + __u32   doorbell_handle;
> > + /** Doorbell offset in dwords */
> > + __u32   doorbell_offset;
> > + /** GPU virtual address of the queue */
> > + __u64   queue_va;
> > + /** Size of the queue in bytes */
> > + __u64   queue_size;
> > + /** GPU virtual address of the rptr */
> > + __u64   rptr_va;
> > + /** GPU virtual address of the wptr */
> > + __u64   wptr_va;
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > + /** AMDGPU_USERQ_OP_* */
> > + __u32   op;
> > + /** Flags */
> > + __u32   flags;
> > + /** Context handle to associate the queue with */
> > + __u32   ctx_id;
> > + __u32   pad;
> > + /** Queue descriptor */
> > + struct drm_amdgpu_userq_mqd mqd; };
> > +
> > +struct drm_amdgpu_userq_out {
> > + /** Queue handle */
> > + __u32   q_id;
> > + /** Flags */
> > + __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > + struct drm_amdgpu_userq_in in;
> > + struct drm_amdgpu_userq_out out; };
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2


Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Alex Deucher
On Tue, Jan 3, 2023 at 2:17 PM Liu, Shaoyun  wrote:
>
> [AMD Official Use Only - General]
>
> Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API 
> for both  graphic and compute  ?

Yeah, that is the eventual goal, hence the flag for AQL vs PM4.

Alex

>
> Regards
> Shaoyun.liu
>
> -Original Message-
> From: amd-gfx  On Behalf Of Felix 
> Kuehling
> Sent: Tuesday, January 3, 2023 1:30 PM
> To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander ; Koenig, Christian 
> ; Yadav, Arvind ; Paneer 
> Selvam, Arunpravin 
> Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management
>
> Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > From: Alex Deucher 
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics queue. The
> > userspace app will fill this structure and request the graphics driver
> > to add a graphics work queue for it. The output of this UAPI is a
> > queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher 
> > Cc: Christian Koenig 
> > Signed-off-by: Alex Deucher 
> > Signed-off-by: Shashank Sharma 
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> > 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM   0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED0x15
> > +#define DRM_AMDGPU_USERQ 0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ   DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >* DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >   union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE   1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE(1 << 0)
>
> What does "secure" mean here? I don't see this flag referenced anywhere in 
> the rest of the patch series.
>
> Regards,
>Felix
>
>
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > + /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > + __u32   flags;
> > + /** IP type: AMDGPU_HW_IP_* */
> > + __u32   ip_type;
> > + /** GEM object handle */
> > + __u32   doorbell_handle;
> > + /** Doorbell offset in dwords */
> > + __u32   doorbell_offset;
> > + /** GPU virtual address of the queue */
> > + __u64   queue_va;
> > + /** Size of the queue in bytes */
> > + __u64   queue_size;
> > + /** GPU virtual address of the rptr */
> > + __u64   rptr_va;
> > + /** GPU virtual address of the wptr */
> > + __u64   wptr_va;
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > + /** AMDGPU_USERQ_OP_* */
> > + __u32   op;
> > + /** Flags */
> > + __u32   flags;
> > + /** Context handle to associate the queue with */
> > + __u32   ctx_id;
> > + __u32   pad;
> > + /** Queue descriptor */
> > + struct drm_amdgpu_userq_mqd mqd;
> > +};
> > +
> > +struct drm_amdgpu_userq_out {
> > + /** Queue handle */
> > + __u32   q_id;
> > + /** Flags */
> > + __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > + struct drm_amdgpu_userq_in in;
> > + struct drm_amdgpu_userq_out out;
> > +};
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2


Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Alex Deucher
On Tue, Jan 3, 2023 at 1:30 PM Felix Kuehling  wrote:
>
> Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> > From: Alex Deucher 
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics
> > queue. The userspace app will fill this structure and request
> > the graphics driver to add a graphics work queue for it. The
> > output of this UAPI is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher 
> > Cc: Christian Koenig 
> > Signed-off-by: Alex Deucher 
> > Signed-off-by: Shashank Sharma 
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 0d93ec132ebb..a3d0dd6f62c5 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM   0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED0x15
> > +#define DRM_AMDGPU_USERQ 0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ   DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >* DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >   union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE   1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE(1 << 0)
>
> What does "secure" mean here? I don't see this flag referenced anywhere
> in the rest of the patch series.

It means the queue operates in secure mode (i.e. encrypted for content
protection -- TMZ in hardware parlance).

Alex

>
> Regards,
>Felix
>
>
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > + /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > + __u32   flags;
> > + /** IP type: AMDGPU_HW_IP_* */
> > + __u32   ip_type;
> > + /** GEM object handle */
> > + __u32   doorbell_handle;
> > + /** Doorbell offset in dwords */
> > + __u32   doorbell_offset;
> > + /** GPU virtual address of the queue */
> > + __u64   queue_va;
> > + /** Size of the queue in bytes */
> > + __u64   queue_size;
> > + /** GPU virtual address of the rptr */
> > + __u64   rptr_va;
> > + /** GPU virtual address of the wptr */
> > + __u64   wptr_va;
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > + /** AMDGPU_USERQ_OP_* */
> > + __u32   op;
> > + /** Flags */
> > + __u32   flags;
> > + /** Context handle to associate the queue with */
> > + __u32   ctx_id;
> > + __u32   pad;
> > + /** Queue descriptor */
> > + struct drm_amdgpu_userq_mqd mqd;
> > +};
> > +
> > +struct drm_amdgpu_userq_out {
> > + /** Queue handle */
> > + __u32   q_id;
> > + /** Flags */
> > + __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > + struct drm_amdgpu_userq_in in;
> > + struct drm_amdgpu_userq_out out;
> > +};
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2


RE: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Liu, Shaoyun
[AMD Official Use Only - General]

Hsakmt  has  the  interfaces for compute user queue. Do we want a unify API for 
both  graphic and compute  ?

Regards
Shaoyun.liu

-Original Message-
From: amd-gfx  On Behalf Of Felix 
Kuehling
Sent: Tuesday, January 3, 2023 1:30 PM
To: Sharma, Shashank ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Koenig, Christian 
; Yadav, Arvind ; Paneer 
Selvam, Arunpravin 
Subject: Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

Am 2022-12-23 um 14:36 schrieb Shashank Sharma:
> From: Alex Deucher 
>
> This patch intorduces new UAPI/IOCTL for usermode graphics queue. The
> userspace app will fill this structure and request the graphics driver
> to add a graphics work queue for it. The output of this UAPI is a
> queue id.
>
> This UAPI maps the queue into GPU, so the graphics app can start
> submitting work to the queue as soon as the call returns.
>
> Cc: Alex Deucher 
> Cc: Christian Koenig 
> Signed-off-by: Alex Deucher 
> Signed-off-by: Shashank Sharma 
> ---
>   include/uapi/drm/amdgpu_drm.h | 52 +++
>   1 file changed, 52 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h index 0d93ec132ebb..a3d0dd6f62c5
> 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -54,6 +54,7 @@ extern "C" {
>   #define DRM_AMDGPU_VM   0x13
>   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
>   #define DRM_AMDGPU_SCHED0x15
> +#define DRM_AMDGPU_USERQ 0x16
>
>   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +72,7 @@ extern "C" {
>   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_VM, union drm_amdgpu_vm)
>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ   DRM_IOW(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>
>   /**
>* DOC: memory domains
> @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
>   union drm_amdgpu_ctx_out out;
>   };
>
> +/* user queue IOCTL */
> +#define AMDGPU_USERQ_OP_CREATE   1
> +#define AMDGPU_USERQ_OP_FREE 2
> +
> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE(1 << 0)

What does "secure" mean here? I don't see this flag referenced anywhere in the 
rest of the patch series.

Regards,
   Felix


> +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> +
> +struct drm_amdgpu_userq_mqd {
> + /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> + __u32   flags;
> + /** IP type: AMDGPU_HW_IP_* */
> + __u32   ip_type;
> + /** GEM object handle */
> + __u32   doorbell_handle;
> + /** Doorbell offset in dwords */
> + __u32   doorbell_offset;
> + /** GPU virtual address of the queue */
> + __u64   queue_va;
> + /** Size of the queue in bytes */
> + __u64   queue_size;
> + /** GPU virtual address of the rptr */
> + __u64   rptr_va;
> + /** GPU virtual address of the wptr */
> + __u64   wptr_va;
> +};
> +
> +struct drm_amdgpu_userq_in {
> + /** AMDGPU_USERQ_OP_* */
> + __u32   op;
> + /** Flags */
> + __u32   flags;
> + /** Context handle to associate the queue with */
> + __u32   ctx_id;
> + __u32   pad;
> + /** Queue descriptor */
> + struct drm_amdgpu_userq_mqd mqd;
> +};
> +
> +struct drm_amdgpu_userq_out {
> + /** Queue handle */
> + __u32   q_id;
> + /** Flags */
> + __u32   flags;
> +};
> +
> +union drm_amdgpu_userq {
> + struct drm_amdgpu_userq_in in;
> + struct drm_amdgpu_userq_out out;
> +};
> +
>   /* vm ioctl */
>   #define AMDGPU_VM_OP_RESERVE_VMID   1
>   #define AMDGPU_VM_OP_UNRESERVE_VMID 2


[PATCH AUTOSEL 6.1 04/10] drm/amd/display: Report to ACPI video if no panels were found

2023-01-03 Thread Sasha Levin
From: Mario Limonciello 

[ Upstream commit c573e240609ff781a0246c0c8c8351abd0475287 ]

On desktop APUs amdgpu doesn't create a native backlight device
as no eDP panels are found.  However if the BIOS has reported
backlight control methods in the ACPI tables then an acpi_video0
backlight device will be made 8 seconds after boot.

This has manifested in a power slider on a number of desktop APUs
ranging from Ryzen 5000 through Ryzen 7000 on various motherboard
manufacturers. To avoid this, report to the acpi video detection
that the system does not have any panel connected in the native
driver.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786
Reported-by: Hans de Goede 
Signed-off-by: Mario Limonciello 
Reviewed-by: Hans de Goede 
Signed-off-by: Rafael J. Wysocki 
Signed-off-by: Sasha Levin 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 512c32327eb1..b73f61ac5dd5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4371,6 +4371,10 @@ static int amdgpu_dm_initialize_drm_device(struct 
amdgpu_device *adev)
amdgpu_set_panel_orientation(>base);
}
 
+   /* If we didn't find a panel, notify the acpi video detection */
+   if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0)
+   acpi_video_report_nolcd();
+
/* Software is initialized. Now we can register interrupt handlers. */
switch (adev->asic_type) {
 #if defined(CONFIG_DRM_AMD_DC_SI)
-- 
2.35.1



Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue

2023-01-03 Thread Felix Kuehling

Am 2023-01-03 um 04:36 schrieb Shashank Sharma:

/*MQD struct for usermode Queue*/
+struct amdgpu_usermode_queue_mqd

This is specific to GC 11.  Every IP and version will have its own MQD
format.  That should live in the IP specific code, not the generic
code.  We already have the generic MQD parameters that we need from
the userq IOCTL.


Noted, we can separate out the generic parameters from gen specific 
parameter, and will try to wrap it around the generic structure.


- Shashank


Is there a reason why you can't use "struct v11_compute_mqd" from 
v11_structs.h?


Regards,
  Felix



Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Felix Kuehling

Am 2022-12-23 um 14:36 schrieb Shashank Sharma:

From: Alex Deucher 

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 52 +++
  1 file changed, 52 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 0d93ec132ebb..a3d0dd6f62c5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
  #define DRM_AMDGPU_VM 0x13
  #define DRM_AMDGPU_FENCE_TO_HANDLE0x14
  #define DRM_AMDGPU_SCHED  0x15
+#define DRM_AMDGPU_USERQ   0x16
  
  #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)

  #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
  #define DRM_IOCTL_AMDGPU_VM   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
  #define DRM_IOCTL_AMDGPU_SCHEDDRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
  
  /**

   * DOC: memory domains
@@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
union drm_amdgpu_ctx_out out;
  };
  
+/* user queue IOCTL */

+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)


What does "secure" mean here? I don't see this flag referenced anywhere 
in the rest of the patch series.


Regards,
  Felix



+#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
+
+struct drm_amdgpu_userq_mqd {
+   /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
+   __u32   flags;
+   /** IP type: AMDGPU_HW_IP_* */
+   __u32   ip_type;
+   /** GEM object handle */
+   __u32   doorbell_handle;
+   /** Doorbell offset in dwords */
+   __u32   doorbell_offset;
+   /** GPU virtual address of the queue */
+   __u64   queue_va;
+   /** Size of the queue in bytes */
+   __u64   queue_size;
+   /** GPU virtual address of the rptr */
+   __u64   rptr_va;
+   /** GPU virtual address of the wptr */
+   __u64   wptr_va;
+};
+
+struct drm_amdgpu_userq_in {
+   /** AMDGPU_USERQ_OP_* */
+   __u32   op;
+   /** Flags */
+   __u32   flags;
+   /** Context handle to associate the queue with */
+   __u32   ctx_id;
+   __u32   pad;
+   /** Queue descriptor */
+   struct drm_amdgpu_userq_mqd mqd;
+};
+
+struct drm_amdgpu_userq_out {
+   /** Queue handle */
+   __u32   q_id;
+   /** Flags */
+   __u32   flags;
+};
+
+union drm_amdgpu_userq {
+   struct drm_amdgpu_userq_in in;
+   struct drm_amdgpu_userq_out out;
+};
+
  /* vm ioctl */
  #define AMDGPU_VM_OP_RESERVE_VMID 1
  #define AMDGPU_VM_OP_UNRESERVE_VMID   2


[PATCH] drm/amdgpu: allow query error counters for specific IP block

2023-01-03 Thread Hawking Zhang
amdgpu_ras_block_late_init will be invoked in IP
specific ras_late_init call as a common helper for
all the IP blocks.

However, when amdgpu_ras_block_late_init call
amdgpu_ras_query_error_count to query ras error
counters, amdgpu_ras_query_error_count queries
all the IP blocks that support ras query interface.

This results to wrong error counters cached in
software copies when there are ras errors detected
at time zero or warm reset procedure. i.e., in
sdma_ras_late_init phase, it counts on sdma/mmhub
errors, while, in mmhub_ras_late_init phase, it
still counts on sdma/mmhub errors.

The change updates amdgpu_ras_query_error_count
interface to allow query specific ip error counter.
It introduces a new input parameter: query_info. if
query_info is NULL,  it means query all the IP blocks,
otherwise, only query the ip block specified by
query_info.

Signed-off-by: Hawking Zhang 
Reviewed-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 89 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 +-
 2 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 35b9f2ed2838..7fed63dc09bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1130,11 +1130,54 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_ras_query_error_count -- Get error counts of all IPs
+ * amdgpu_ras_query_error_count_helper -- Get error counter for specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible 
errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible 
errors.
+ * @query_info: pointer to ras_query_if
+ *
+ * Return 0 for query success or do nothing, otherwise return an error
+ * on failures
+ */
+static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
+  unsigned long *ce_count,
+  unsigned long *ue_count,
+  struct ras_query_if *query_info)
+{
+   int ret;
+
+   if (!query_info)
+   /* do nothing if query_info is not specified */
+   return 0;
+
+   ret = amdgpu_ras_query_error_status(adev, query_info);
+   if (ret)
+   return ret;
+
+   *ce_count += query_info->ce_count;
+   *ue_count += query_info->ue_count;
+
+   /* some hardware/IP supports read to clear
+* no need to explictly reset the err status after the query call */
+   if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+   adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+   if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
+   dev_warn(adev->dev,
+"Failed to reset error counter and error 
status\n");
+   }
+
+   return 0;
+}
+
+/**
+ * amdgpu_ras_query_error_count -- Get error counts of all IPs or specific IP
  * @adev: pointer to AMD GPU device
  * @ce_count: pointer to an integer to be set to the count of correctible 
errors.
  * @ue_count: pointer to an integer to be set to the count of uncorrectible
  * errors.
+ * @query_info: pointer to ras_query_if if the query request is only for
+ * specific ip block; if info is NULL, then the qurey request is for
+ * all the ip blocks that support query ras error counters/status
  *
  * If set, @ce_count or @ue_count, count and return the corresponding
  * error counts in those integer pointers. Return 0 if the device
@@ -1142,11 +1185,13 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
  */
 int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 unsigned long *ce_count,
-unsigned long *ue_count)
+unsigned long *ue_count,
+struct ras_query_if *query_info)
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
unsigned long ce, ue;
+   int ret;
 
if (!adev->ras_enabled || !con)
return -EOPNOTSUPP;
@@ -1158,26 +1203,23 @@ int amdgpu_ras_query_error_count(struct amdgpu_device 
*adev,
 
ce = 0;
ue = 0;
-   list_for_each_entry(obj, >head, node) {
-   struct ras_query_if info = {
-   .head = obj->head,
-   };
-   int res;
-
-   res = amdgpu_ras_query_error_status(adev, );
-   if (res)
-   return res;
+   if (!query_info) {
+   /* query all the ip blocks that support ras query interface */
+   list_for_each_entry(obj, >head, node) {
+   struct ras_query_if info = {
+   .head = obj->head,
+  

Re: [syzbot] WARNING: locking bug in inet_autobind

2023-01-03 Thread Felix Kuehling



Am 2023-01-03 um 11:05 schrieb Waiman Long:

On 1/3/23 10:39, Felix Kuehling wrote:
The regression point doesn't make sense. The kernel config doesn't 
enable CONFIG_DRM_AMDGPU, so there is no way that a change in AMDGPU 
could have caused this regression.


I agree. It is likely a pre-existing problem or caused by another 
commit that got triggered because of the change in cacheline alignment 
caused by commit c0d9271ecbd ("drm/amdgpu: Delete user queue doorbell 
variable").
I don't think the change can affect cache line alignment. The entire 
amdgpu driver doesn't even get compiled in the kernel config that was 
used, and the change doesn't touch any files outside 
drivers/gpu/drm/amd/amdgpu:


# CONFIG_DRM_AMDGPU is not set

My guess would be that it's an intermittent bug that is confusing bisect.

Regards,
  Felix




Cheers,
Longman



Regards,
  Felix


Am 2022-12-29 um 01:26 schrieb syzbot:

syzbot has found a reproducer for the following issue on:

HEAD commit:    1b929c02afd3 Linux 6.2-rc1
git tree:   upstream
console output: 
https://syzkaller.appspot.com/x/log.txt?x=145c6a6848
kernel config: 
https://syzkaller.appspot.com/x/.config?x=2651619a26b4d687
dashboard link: 
https://syzkaller.appspot.com/bug?extid=94cc2a66fc228b23f360
compiler:   gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU 
Binutils for Debian) 2.35.2

syz repro: https://syzkaller.appspot.com/x/repro.syz?x=13e13e3248
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=13790f0848

Downloadable assets:
disk image: 
https://storage.googleapis.com/syzbot-assets/d1849f1ca322/disk-1b929c02.raw.xz
vmlinux: 
https://storage.googleapis.com/syzbot-assets/924cb8aa4ada/vmlinux-1b929c02.xz
kernel image: 
https://storage.googleapis.com/syzbot-assets/8c7330dae0a0/bzImage-1b929c02.xz


The issue was bisected to:

commit c0d9271ecbd891cdeb0fad1edcdd99ee717a655f
Author: Yong Zhao 
Date:   Fri Feb 1 23:36:21 2019 +

 drm/amdgpu: Delete user queue doorbell variables

bisection log: 
https://syzkaller.appspot.com/x/bisect.txt?x=1433ece4a0

final oops: https://syzkaller.appspot.com/x/report.txt?x=1633ece4a0
console output: 
https://syzkaller.appspot.com/x/log.txt?x=1233ece4a0


IMPORTANT: if you fix the issue, please add the following tag to the 
commit:

Reported-by: syzbot+94cc2a66fc228b23f...@syzkaller.appspotmail.com
Fixes: c0d9271ecbd8 ("drm/amdgpu: Delete user queue doorbell 
variables")


[ cut here ]
Looking for class "l2tp_sock" with key l2tp_socket_class, but found 
a different class "slock-AF_INET6" with the same key
WARNING: CPU: 0 PID: 7280 at kernel/locking/lockdep.c:937 
look_up_lock_class+0x97/0x110 kernel/locking/lockdep.c:937

Modules linked in:
CPU: 0 PID: 7280 Comm: syz-executor835 Not tainted 
6.2.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, 
BIOS Google 10/26/2022

RIP: 0010:look_up_lock_class+0x97/0x110 kernel/locking/lockdep.c:937
Code: 17 48 81 fa e0 e5 f6 8f 74 59 80 3d 5d bc 57 04 00 75 50 48 c7 
c7 00 4d 4c 8a 48 89 04 24 c6 05 49 bc 57 04 01 e8 a9 42 b9 ff <0f> 
0b 48 8b 04 24 eb 31 9c 5a 80 e6 02 74 95 e8 45 38 02 fa 85 c0

RSP: 0018:c9000b5378b8 EFLAGS: 00010082
RAX:  RBX: 91c06a00 RCX: 
RDX: 8880292d RSI: 8166721c RDI: f520016a6f09
RBP:  R08: 0005 R09: 
R10: 8201 R11: 20676e696b6f6f4c R12: 
R13: 88802a5820b0 R14:  R15: 
FS:  7f1fd7a97700() GS:8880b980() 
knlGS:

CS:  0010 DS:  ES:  CR0: 80050033
CR2: 2100 CR3: 78ab4000 CR4: 003506f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
  
  register_lock_class+0xbe/0x1120 kernel/locking/lockdep.c:1289
  __lock_acquire+0x109/0x56d0 kernel/locking/lockdep.c:4934
  lock_acquire kernel/locking/lockdep.c:5668 [inline]
  lock_acquire+0x1e3/0x630 kernel/locking/lockdep.c:5633
  __raw_spin_lock_bh include/linux/spinlock_api_smp.h:126 [inline]
  _raw_spin_lock_bh+0x33/0x40 kernel/locking/spinlock.c:178
  spin_lock_bh include/linux/spinlock.h:355 [inline]
  lock_sock_nested+0x5f/0xf0 net/core/sock.c:3473
  lock_sock include/net/sock.h:1725 [inline]
  inet_autobind+0x1a/0x190 net/ipv4/af_inet.c:177
  inet_send_prepare net/ipv4/af_inet.c:813 [inline]
  inet_send_prepare+0x325/0x4e0 net/ipv4/af_inet.c:807
  inet6_sendmsg+0x43/0xe0 net/ipv6/af_inet6.c:655
  sock_sendmsg_nosec net/socket.c:714 [inline]
  sock_sendmsg+0xd3/0x120 net/socket.c:734
  __sys_sendto+0x23a/0x340 net/socket.c:2117
  __do_sys_sendto net/socket.c:2129 [inline]
  __se_sys_sendto net/socket.c:2125 [inline]
  __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125
  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
  do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
  

Re: [syzbot] WARNING: locking bug in inet_autobind

2023-01-03 Thread Felix Kuehling
The regression point doesn't make sense. The kernel config doesn't 
enable CONFIG_DRM_AMDGPU, so there is no way that a change in AMDGPU 
could have caused this regression.


Regards,
  Felix


Am 2022-12-29 um 01:26 schrieb syzbot:

syzbot has found a reproducer for the following issue on:

HEAD commit:1b929c02afd3 Linux 6.2-rc1
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=145c6a6848
kernel config:  https://syzkaller.appspot.com/x/.config?x=2651619a26b4d687
dashboard link: https://syzkaller.appspot.com/bug?extid=94cc2a66fc228b23f360
compiler:   gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for 
Debian) 2.35.2
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=13e13e3248
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=13790f0848

Downloadable assets:
disk image: 
https://storage.googleapis.com/syzbot-assets/d1849f1ca322/disk-1b929c02.raw.xz
vmlinux: 
https://storage.googleapis.com/syzbot-assets/924cb8aa4ada/vmlinux-1b929c02.xz
kernel image: 
https://storage.googleapis.com/syzbot-assets/8c7330dae0a0/bzImage-1b929c02.xz

The issue was bisected to:

commit c0d9271ecbd891cdeb0fad1edcdd99ee717a655f
Author: Yong Zhao 
Date:   Fri Feb 1 23:36:21 2019 +

 drm/amdgpu: Delete user queue doorbell variables

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=1433ece4a0
final oops: https://syzkaller.appspot.com/x/report.txt?x=1633ece4a0
console output: https://syzkaller.appspot.com/x/log.txt?x=1233ece4a0

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+94cc2a66fc228b23f...@syzkaller.appspotmail.com
Fixes: c0d9271ecbd8 ("drm/amdgpu: Delete user queue doorbell variables")

[ cut here ]
Looking for class "l2tp_sock" with key l2tp_socket_class, but found a different class 
"slock-AF_INET6" with the same key
WARNING: CPU: 0 PID: 7280 at kernel/locking/lockdep.c:937 
look_up_lock_class+0x97/0x110 kernel/locking/lockdep.c:937
Modules linked in:
CPU: 0 PID: 7280 Comm: syz-executor835 Not tainted 6.2.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
10/26/2022
RIP: 0010:look_up_lock_class+0x97/0x110 kernel/locking/lockdep.c:937
Code: 17 48 81 fa e0 e5 f6 8f 74 59 80 3d 5d bc 57 04 00 75 50 48 c7 c7 00 4d 4c 8a 
48 89 04 24 c6 05 49 bc 57 04 01 e8 a9 42 b9 ff <0f> 0b 48 8b 04 24 eb 31 9c 5a 
80 e6 02 74 95 e8 45 38 02 fa 85 c0
RSP: 0018:c9000b5378b8 EFLAGS: 00010082
RAX:  RBX: 91c06a00 RCX: 
RDX: 8880292d RSI: 8166721c RDI: f520016a6f09
RBP:  R08: 0005 R09: 
R10: 8201 R11: 20676e696b6f6f4c R12: 
R13: 88802a5820b0 R14:  R15: 
FS:  7f1fd7a97700() GS:8880b980() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 2100 CR3: 78ab4000 CR4: 003506f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
  
  register_lock_class+0xbe/0x1120 kernel/locking/lockdep.c:1289
  __lock_acquire+0x109/0x56d0 kernel/locking/lockdep.c:4934
  lock_acquire kernel/locking/lockdep.c:5668 [inline]
  lock_acquire+0x1e3/0x630 kernel/locking/lockdep.c:5633
  __raw_spin_lock_bh include/linux/spinlock_api_smp.h:126 [inline]
  _raw_spin_lock_bh+0x33/0x40 kernel/locking/spinlock.c:178
  spin_lock_bh include/linux/spinlock.h:355 [inline]
  lock_sock_nested+0x5f/0xf0 net/core/sock.c:3473
  lock_sock include/net/sock.h:1725 [inline]
  inet_autobind+0x1a/0x190 net/ipv4/af_inet.c:177
  inet_send_prepare net/ipv4/af_inet.c:813 [inline]
  inet_send_prepare+0x325/0x4e0 net/ipv4/af_inet.c:807
  inet6_sendmsg+0x43/0xe0 net/ipv6/af_inet6.c:655
  sock_sendmsg_nosec net/socket.c:714 [inline]
  sock_sendmsg+0xd3/0x120 net/socket.c:734
  __sys_sendto+0x23a/0x340 net/socket.c:2117
  __do_sys_sendto net/socket.c:2129 [inline]
  __se_sys_sendto net/socket.c:2125 [inline]
  __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125
  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
  do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
  entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f1fd78538b9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 
48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 
c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:7f1fd7a971f8 EFLAGS: 0212 ORIG_RAX: 002c
RAX: ffda RBX: 7f1fd78f0038 RCX: 7f1fd78538b9
RDX:  RSI:  RDI: 0004
RBP: 7f1fd78f0030 R08: 2100 R09: 001c
R10: 04008000 R11: 0212 R12: 7f1fd78f003c
R13: 7f1fd79ffc8f R14: 7f1fd7a97300 R15: 00022000
  



Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Christian König

Am 03.01.23 um 15:34 schrieb Alex Deucher:

On Tue, Jan 3, 2023 at 4:35 AM Christian König
 wrote:

Am 03.01.23 um 10:22 schrieb Shashank Sharma:

On 03/01/2023 10:15, Christian König wrote:

Am 03.01.23 um 10:12 schrieb Shashank Sharma:

On 02/01/2023 13:39, Christian König wrote:

Hi Shashank,

Am 26.12.22 um 11:41 schrieb Shashank Sharma:

[SNIP]

 /* df */
   struct amdgpu_dfdf;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
   unsigned longras_counter_ce;
   unsigned longras_counter_ue;
   uint32_tstable_pstate;
+struct amdgpu_usermode_queue*userq;

Why should we have this in the ctx here???

We are allocating a few things dynamically for the queue, which
would be valid until we destroy this queue. Also we need to save
this queue

container at some place for the destroy function,  and I thought
it would make sense to keep this with the context ptr, as this is
how we are

identifying the incoming request.

I have absolutely no idea how you end up with that design.

The ctx object is the CS IOCTL context, that is not even remotely
related to anything the user queues should be doing.

Please completely drop that relationship and don't use any of the
ctx object stuff in the user queue code.


Historically the workload submission always came with a context (due
to CS IOCTL), so we thought it would make sense to still have its
relevance in the new workload submission method. Would you prefer
this new submission to be independent of AMDGPU context ?

Well not prefer, the point is that this doesn't make any sense at all.

See the amdgpu_ctx object contains the resulting fence pointers for
the CS IOCTL as well as information necessary for the CS IOCTL to
work (e.g. scheduler entities etc...).

I don't see how anything from that stuff would be useful for the MES
or user queues.

Christian.


I am getting your point, and it makes sense as well. But in such
scenario, we might have to create something parallel to
AMDGPU_USERQ_CTX which is doing very much the same.

We can still do it to make a logically separate entity, but any
suggestions on where to keep this udev_ctx ptr (if not in adev, as
well as not ctx) ?


Take a look at the amdgpu_ctx_mgr object with the mutex and the idr and
how this is embedded into the amdgpu_fpriv object. It should become
pretty clear from there on.

I don't think we need an userq_ctx or similar, each userq should be an
independent object. What we need is an userq_mgr object which holds the
collection of all the useq objects the client application has created
through it's fpriv connection to the driver.

Don't we want to associate the queues to a ctx for guilty tracking
purposes when there is a hang?


Nope, absolutely not.

The hang detection around the context was just another design bug we 
inherited from the windows driver.


What we should do instead is to use the error field in the dma_fence 
object just like every other driver and component does.


Christian.



Alex


Regards,
Christian.


- Shashank



- Shashank



Christian.


- Shashank


Regards,
Christian.


   };
 struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a
+ * copy of this software and associated documentation files
(the "Software"),
+ * to deal in the Software without restriction, including
without limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to
whom the
+ * Software is furnished to do so, subject to the following
conditions:
+ *
+ * The above copyright notice and this permission notice shall
be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a,

Re: [RFC 5/7] drm/amdgpu: Create context for usermode queue

2023-01-03 Thread Alex Deucher
On Tue, Jan 3, 2023 at 4:40 AM Shashank Sharma  wrote:
>
>
> On 29/12/2022 18:54, Alex Deucher wrote:
> > On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma  
> > wrote:
> >> The FW expects us to allocate atleast one page as process
> >> context space, and one for gang context space. This patch adds some
> >> object for the same.
> > This should be handled in the IP specific code for the MQD creation.
> > Each IP may have different requirements for MQD related metadata.
> >
> > Alex
>
> Noted, so 3 IP specific functions so far,
>
> .init_mqd(), .map() and .create_ctx_space().
>

I think this can be handled in init_mqd().  No need for a separate callback.

Alex

> - Shashank
>
> >
> >> Cc: Alex Deucher 
> >> Cc: Christian Koenig 
> >>
> >> Signed-off-by: Shashank Sharma 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++
> >>   .../drm/amd/include/amdgpu_usermode_queue.h   |  8 +++
> >>   2 files changed, 65 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >> index b566ce4cb7f0..2a854a5e2f70 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >> @@ -69,6 +69,56 @@ amdgpu_userqueue_get_doorbell(struct amdgpu_device 
> >> *adev,
> >>   return 0;
> >>   }
> >>
> >> +static int
> >> +amdgpu_userqueue_create_context(struct amdgpu_device *adev, struct 
> >> amdgpu_usermode_queue *queue)
> >> +{
> >> +int r;
> >> +struct amdgpu_userq_ctx *pctx = >proc_ctx;
> >> +struct amdgpu_userq_ctx *gctx = >gang_ctx;
> >> +/*
> >> + * The FW expects atleast one page space allocated for
> >> + * process context related work, and one for gang context.
> >> + */
> >> +r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> >> +AMDGPU_GEM_DOMAIN_VRAM,
> >> +>obj,
> >> +>gpu_addr,
> >> +>cpu_ptr);
> >> +if (r) {
> >> +DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
> >> +return r;
> >> +}
> >> +
> >> +r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> >> +AMDGPU_GEM_DOMAIN_VRAM,
> >> +>obj,
> >> +>gpu_addr,
> >> +>cpu_ptr);
> >> +if (r) {
> >> +DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
> >> +amdgpu_bo_free_kernel(>obj,
> >> +  >gpu_addr,
> >> +  >cpu_ptr);
> >> +return r;
> >> +}
> >> +
> >> +return 0;
> >> +}
> >> +
> >> +static void
> >> +amdgpu_userqueue_free_context(struct amdgpu_device *adev, struct 
> >> amdgpu_usermode_queue *queue)
> >> +{
> >> +struct amdgpu_userq_ctx *pctx = >proc_ctx;
> >> +struct amdgpu_userq_ctx *gctx = >gang_ctx;
> >> +
> >> +amdgpu_bo_free_kernel(>obj,
> >> +  >gpu_addr,
> >> +  >cpu_ptr);
> >> +amdgpu_bo_free_kernel(>obj,
> >> +  >gpu_addr,
> >> +  >cpu_ptr);
> >> +}
> >> +
> >>   static void
> >>   amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct 
> >> amdgpu_usermode_queue *queue)
> >>   {
> >> @@ -282,6 +332,12 @@ int amdgpu_userqueue_create(struct amdgpu_device 
> >> *adev, struct drm_file *filp,
> >>   goto free_mqd;
> >>   }
> >>
> >> +r = amdgpu_userqueue_create_context(adev, queue);
> >> +if (r < 0) {
> >> +DRM_ERROR("Failed to create context for queue\n");
> >> +goto free_mqd;
> >> +}
> >> +
> >>   ctx->userq = queue;
> >>   args->out.q_id = queue->queue_id;
> >>   args->out.flags = 0;
> >> @@ -306,6 +362,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device 
> >> *adev, struct drm_file *filp,
> >>   struct amdgpu_usermode_queue *queue = ctx->userq;
> >>
> >>   mutex_lock(>userq.userq_mutex);
> >> +amdgpu_userqueue_free_context(adev, queue);
> >>   amdgpu_userqueue_destroy_mqd(queue);
> >>   amdgpu_userqueue_remove_index(adev, queue);
> >>   ctx->userq = NULL;
> >> diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
> >> b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >> index c1fe39ffaf72..8bf3c0be6937 100644
> >> --- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >> +++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
> >> @@ -26,6 +26,12 @@
> >>
> >>   #define AMDGPU_MAX_USERQ 512
> >>
> >> +struct amdgpu_userq_ctx {
> >> +   struct amdgpu_bo *obj;
> >> +   uint64_t gpu_addr;
> >> +   void*cpu_ptr;
> >> +};
> >> +
> >>   struct amdgpu_usermode_queue {
> >>  int queue_id;
> >>  int queue_type;
> >> @@ -44,6 +50,8 @@ struct amdgpu_usermode_queue {
> >>
> >> 

Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Alex Deucher
On Tue, Jan 3, 2023 at 4:35 AM Christian König
 wrote:
>
> Am 03.01.23 um 10:22 schrieb Shashank Sharma:
> >
> > On 03/01/2023 10:15, Christian König wrote:
> >> Am 03.01.23 um 10:12 schrieb Shashank Sharma:
> >>>
> >>> On 02/01/2023 13:39, Christian König wrote:
>  Hi Shashank,
> 
>  Am 26.12.22 um 11:41 schrieb Shashank Sharma:
> > [SNIP]
> >>> /* df */
> >>>   struct amdgpu_dfdf;
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> index 0fa0e56daf67..f7413859b14f 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >>>   unsigned longras_counter_ce;
> >>>   unsigned longras_counter_ue;
> >>>   uint32_tstable_pstate;
> >>> +struct amdgpu_usermode_queue*userq;
> >>
> >> Why should we have this in the ctx here???
> >
> > We are allocating a few things dynamically for the queue, which
> > would be valid until we destroy this queue. Also we need to save
> > this queue
> >
> > container at some place for the destroy function,  and I thought
> > it would make sense to keep this with the context ptr, as this is
> > how we are
> >
> > identifying the incoming request.
> 
>  I have absolutely no idea how you end up with that design.
> 
>  The ctx object is the CS IOCTL context, that is not even remotely
>  related to anything the user queues should be doing.
> 
>  Please completely drop that relationship and don't use any of the
>  ctx object stuff in the user queue code.
> 
> >>> Historically the workload submission always came with a context (due
> >>> to CS IOCTL), so we thought it would make sense to still have its
> >>> relevance in the new workload submission method. Would you prefer
> >>> this new submission to be independent of AMDGPU context ?
> >>
> >> Well not prefer, the point is that this doesn't make any sense at all.
> >>
> >> See the amdgpu_ctx object contains the resulting fence pointers for
> >> the CS IOCTL as well as information necessary for the CS IOCTL to
> >> work (e.g. scheduler entities etc...).
> >>
> >> I don't see how anything from that stuff would be useful for the MES
> >> or user queues.
> >>
> >> Christian.
> >
> >
> > I am getting your point, and it makes sense as well. But in such
> > scenario, we might have to create something parallel to
> > AMDGPU_USERQ_CTX which is doing very much the same.
> >
> > We can still do it to make a logically separate entity, but any
> > suggestions on where to keep this udev_ctx ptr (if not in adev, as
> > well as not ctx) ?
>
>
> Take a look at the amdgpu_ctx_mgr object with the mutex and the idr and
> how this is embedded into the amdgpu_fpriv object. It should become
> pretty clear from there on.
>
> I don't think we need an userq_ctx or similar, each userq should be an
> independent object. What we need is an userq_mgr object which holds the
> collection of all the useq objects the client application has created
> through it's fpriv connection to the driver.

Don't we want to associate the queues to a ctx for guilty tracking
purposes when there is a hang?

Alex

>
> Regards,
> Christian.
>
> >
> > - Shashank
> >
> >
> >>
> >>>
> >>> - Shashank
> >>>
> >>>
>  Christian.
> 
> >
> > - Shashank
> >
> >>
> >> Regards,
> >> Christian.
> >>
> >>>   };
> >>> struct amdgpu_ctx_mgr {
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>> new file mode 100644
> >>> index ..3b6e8f75495c
> >>> --- /dev/null
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
> >>> @@ -0,0 +1,187 @@
> >>> +/*
> >>> + * Copyright 2022 Advanced Micro Devices, Inc.
> >>> + *
> >>> + * Permission is hereby granted, free of charge, to any person
> >>> obtaining a
> >>> + * copy of this software and associated documentation files
> >>> (the "Software"),
> >>> + * to deal in the Software without restriction, including
> >>> without limitation
> >>> + * the rights to use, copy, modify, merge, publish, distribute,
> >>> sublicense,
> >>> + * and/or sell copies of the Software, and to permit persons to
> >>> whom the
> >>> + * Software is furnished to do so, subject to the following
> >>> conditions:
> >>> + *
> >>> + * The above copyright notice and this permission notice shall
> >>> be included in
> >>> + * all copies or substantial portions of the Software.
> >>> + *
> >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> >>> KIND, EXPRESS OR
> >>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> >>> 

Re: [PATCH] drm/amdgpu: grab extra fence reference for drm_sched_job_add_dependency

2023-01-03 Thread Michel Dänzer
On 1/3/23 15:26, Alex Deucher wrote:
> On Tue, Jan 3, 2023 at 3:34 AM Christian König
>  wrote:
>>
>> I assume that this was already upstreamed while I was on sick leave?
> 
> Yes.

Though there seem to be more issues still, see comments on 
https://gitlab.freedesktop.org/drm/amd/-/issues/2309 .


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH] drm/amdgpu: grab extra fence reference for drm_sched_job_add_dependency

2023-01-03 Thread Alex Deucher
On Tue, Jan 3, 2023 at 3:34 AM Christian König
 wrote:
>
> I assume that this was already upstreamed while I was on sick leave?

Yes.

Alex


>
> Thanks,
> Christian.
>
> Am 21.12.22 um 22:10 schrieb Alex Deucher:
> > Applied.  Thanks!
> >
> > Alex
> >
> > On Mon, Dec 19, 2022 at 3:01 PM Borislav Petkov  wrote:
> >> On Mon, Dec 19, 2022 at 11:47:18AM +0100, Christian König wrote:
> >>> That function consumes the reference.
> >>>
> >>> Signed-off-by: Christian König 
> >>> Fixes: aab9cf7b6954 ("drm/amdgpu: use scheduler dependencies for VM 
> >>> updates")
> >>> ---
> >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 2 ++
> >>>   1 file changed, 2 insertions(+)
> >> Thanks, that fixes it.
> >>
> >> Reported-by: Borislav Petkov (AMD) 
> >> Tested-by: Borislav Petkov (AMD) 
> >>
> >> --
> >> Regards/Gruss,
> >>  Boris.
> >>
> >> https://people.kernel.org/tglx/notes-about-netiquette
>


Re: [RFC 1/7] drm/amdgpu: UAPI for user queue management

2023-01-03 Thread Alex Deucher
On Mon, Jan 2, 2023 at 8:26 AM Christian König  wrote:
>
> Am 23.12.22 um 20:36 schrieb Shashank Sharma:
> > From: Alex Deucher 
> >
> > This patch intorduces new UAPI/IOCTL for usermode graphics
> > queue. The userspace app will fill this structure and request
> > the graphics driver to add a graphics work queue for it. The
> > output of this UAPI is a queue id.
> >
> > This UAPI maps the queue into GPU, so the graphics app can start
> > submitting work to the queue as soon as the call returns.
> >
> > Cc: Alex Deucher 
> > Cc: Christian Koenig 
> > Signed-off-by: Alex Deucher 
> > Signed-off-by: Shashank Sharma 
> > ---
> >   include/uapi/drm/amdgpu_drm.h | 52 +++
> >   1 file changed, 52 insertions(+)
> >
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 0d93ec132ebb..a3d0dd6f62c5 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -54,6 +54,7 @@ extern "C" {
> >   #define DRM_AMDGPU_VM   0x13
> >   #define DRM_AMDGPU_FENCE_TO_HANDLE  0x14
> >   #define DRM_AMDGPU_SCHED0x15
> > +#define DRM_AMDGPU_USERQ 0x16
> >
> >   #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >   #define DRM_IOCTL_AMDGPU_GEM_MMAP   DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > @@ -71,6 +72,7 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> > +#define DRM_IOCTL_AMDGPU_USERQ   DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >
> >   /**
> >* DOC: memory domains
> > @@ -288,6 +290,56 @@ union drm_amdgpu_ctx {
> >   union drm_amdgpu_ctx_out out;
> >   };
> >
> > +/* user queue IOCTL */
> > +#define AMDGPU_USERQ_OP_CREATE   1
> > +#define AMDGPU_USERQ_OP_FREE 2
> > +
> > +#define AMDGPU_USERQ_MQD_FLAGS_SECURE(1 << 0)
> > +#define AMDGPU_USERQ_MQD_FLAGS_AQL   (1 << 1)
> > +
> > +struct drm_amdgpu_userq_mqd {
> > + /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> > + __u32   flags;
> > + /** IP type: AMDGPU_HW_IP_* */
> > + __u32   ip_type;
> > + /** GEM object handle */
> > + __u32   doorbell_handle;
> > + /** Doorbell offset in dwords */
> > + __u32   doorbell_offset;
> > + /** GPU virtual address of the queue */
> > + __u64   queue_va;
> > + /** Size of the queue in bytes */
> > + __u64   queue_size;
> > + /** GPU virtual address of the rptr */
> > + __u64   rptr_va;
> > + /** GPU virtual address of the wptr */
> > + __u64   wptr_va;
>
> We should probably note somewhere that those are inputs to the queue and
> need to be allocated by userspace somewhere.
>
> > +};
> > +
> > +struct drm_amdgpu_userq_in {
> > + /** AMDGPU_USERQ_OP_* */
> > + __u32   op;
> > + /** Flags */
> > + __u32   flags;
>
> > + /** Context handle to associate the queue with */
> > + __u32   ctx_id;
>
> Uff, this is just blunt nonsense. Queues are not related to ctx objects
> in any way possible.

I thought we wanted to have queues associated with contexts for
hang/guilty tracking.

Alex

>
> > + __u32   pad;
> > + /** Queue descriptor */
> > + struct drm_amdgpu_userq_mqd mqd;
> > +};
> > +
> > +struct drm_amdgpu_userq_out {
> > + /** Queue handle */
> > + __u32   q_id;
> > + /** Flags */
> > + __u32   flags;
> > +};
> > +
> > +union drm_amdgpu_userq {
> > + struct drm_amdgpu_userq_in in;
> > + struct drm_amdgpu_userq_out out;
> > +};
> > +
> >   /* vm ioctl */
> >   #define AMDGPU_VM_OP_RESERVE_VMID   1
> >   #define AMDGPU_VM_OP_UNRESERVE_VMID 2
>


Re: [PATCH v2 00/11] Recover from failure to probe GPU

2023-01-03 Thread Alex Deucher
On Tue, Jan 3, 2023 at 5:10 AM Lazar, Lijo  wrote:
>
>
>
> On 12/28/2022 10:00 PM, Mario Limonciello wrote:
> > One of the first thing that KMS drivers do during initialization is
> > destroy the system firmware framebuffer by means of
> > `drm_aperture_remove_conflicting_pci_framebuffers`
> >
> > This means that if for any reason the GPU failed to probe the user
> > will be stuck with at best a screen frozen at the last thing that
> > was shown before the KMS driver continued it's probe.
> >
> > The problem is most pronounced when new GPU support is introduced
> > because users will need to have a recent linux-firmware snapshot
> > on their system when they boot a kernel with matching support.
> >
> > However the problem is further exaggerated in the case of amdgpu because
> > it has migrated to "IP discovery" where amdgpu will attempt to load
> > on "ALL" AMD GPUs even if the driver is missing support for IP blocks
> > contained in that GPU.
> >
> > IP discovery requires some probing and isn't run until after the
> > framebuffer has been destroyed.
> >
> > This means a situation can occur where a user purchases a new GPU not
> > yet supported by a distribution and when booting the installer it will
> > "freeze" even if the distribution doesn't have the matching kernel support
> > for those IP blocks.
> >
> > The perfect example of this is Ubuntu 22.10 and the new dGPUs just
> > launched by AMD.  The installation media ships with kernel 5.19 (which
> > has IP discovery) but the amdgpu support for those IP blocks landed in
> > kernel 6.0. The matching linux-firmware was released after 22.10's launch.
> > The screen will freeze without nomodeset. Even if a user manages to install
> > and then upgrades to kernel 6.0 after install they'll still have the
> > problem of missing firmware, and the same experience.
> >
> > This is quite jarring for users, particularly if they don't know
> > that they have to use "nomodeset" to install.
> >
> > To help the situation make changes to GPU discovery:
> > 1) Delay releasing the firmware framebuffer until after IP discovery has
> > completed.  This will help the situation of an older kernel that doesn't
> > yet support the IP blocks probing a new GPU.
> > 2) Request loading all PSP, VCN, SDMA, MES and GC microcode into memory
> > during IP discovery. This will help the situation of new enough kernel for
> > the IP discovery phase to otherwise pass but missing microcode from
> > linux-firmware.git.
> >
> > Not all requested firmware will be loaded during IP discovery as some of it
> > will require larger driver architecture changes. For example SMU firmware
> > isn't loaded on certain products, but that's not known until later on when
> > the early_init phase of the SMU load occurs.
> >
> > v1->v2:
> >   * Take the suggestion from v1 thread to delay the framebuffer release 
> > until
> > ip discovery is done. This patch is CC to stable to that older stable
> > kernels with IP discovery won't try to probe unknown IP.
> >   * Drop changes to drm aperature.
> >   * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.
> >
>
> What is the gain here in just checking if firmware files are available?
> It can fail anywhere during sw_init and it's the same situation.

Other failures are presumably a bug or hardware issue.  The missing
firmware would be a common issue when chips are first launched.
Thinking about it a bit more, another option might be to move the
calls to request_firmware() into the IP specific early_init()
functions and then move the drm_aperture release after early_init().
That would keep the firmware handling in the IPs and should still
happen early enough that we haven't messed with the hardware yet.

Alex

>
> Restricting IP FWs to IP specific files looks better to me than
> centralizing and creating interdependencies.
>
> Thanks,
> Lijo
>
> > Mario Limonciello (11):
> >drm/amd: Delay removal of the firmware framebuffer
> >drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
> >drm/amd: Convert SMUv11 microcode init to use
> >  `amdgpu_ucode_ip_version_decode`
> >drm/amd: Convert SMU v13 to use `amdgpu_ucode_ip_version_decode`
> >drm/amd: Request SDMA microcode during IP discovery
> >drm/amd: Request VCN microcode during IP discovery
> >drm/amd: Request MES microcode during IP discovery
> >drm/amd: Request GFX9 microcode during IP discovery
> >drm/amd: Request GFX10 microcode during IP discovery
> >drm/amd: Request GFX11 microcode during IP discovery
> >drm/amd: Request PSP microcode during IP discovery
> >
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   8 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 590 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 -
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |   2 -
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  |   9 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  |   2 +-
> 

Re: [PATCH 09/18] vfio-mdev/mdpy-fb: Do not set struct fb_info.apertures

2023-01-03 Thread Thomas Zimmermann



Am 20.12.22 um 10:32 schrieb Javier Martinez Canillas:

[adding Kirti Wankhede and k...@vger.kernel.org to Cc list]

On 12/19/22 17:05, Thomas Zimmermann wrote:

Generic fbdev drivers use the apertures field in struct fb_info to
control ownership of the framebuffer memory and graphics device. Do
not set the values in mdpy-fb.

Signed-off-by: Thomas Zimmermann 
---
  samples/vfio-mdev/mdpy-fb.c | 8 
  1 file changed, 8 deletions(-)

diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
index 9ec93d90e8a5..1de5801cd2e8 100644
--- a/samples/vfio-mdev/mdpy-fb.c
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -161,14 +161,6 @@ static int mdpy_fb_probe(struct pci_dev *pdev,
goto err_release_fb;
}
  
-	info->apertures = alloc_apertures(1);

-   if (!info->apertures) {
-   ret = -ENOMEM;
-   goto err_unmap;
-   }
-   info->apertures->ranges[0].base = info->fix.smem_start;
-   info->apertures->ranges[0].size = info->fix.smem_len;
-
info->fbops = _fb_ops;
info->flags = FBINFO_DEFAULT;
info->pseudo_palette = par->palette;

Reviewed-by: Javier Martinez Canillas 

But I think an ack from Kirti Wankhede or other virt folk is needed if you
want to merge this through drm-misc-next.


ping. Could I have a review from the vfio devs, please.

Best regards
Thomas





--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH AUTOSEL 5.15 24/27] Revert "drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly""

2023-01-03 Thread Yury Zhuravlev
Hello!

Why is this revert for revert coming for 6.1 but reverted again for 6.1.2?
My GPU is not working correctly again!
https://cdn.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.1.2

It seems like somebody made a mistake and pick up the wrong patch for the
stable channel.

Regards!

On Sat, Nov 19, 2022 at 11:14 AM Sasha Levin  wrote:

> From: Asher Song 
>
> [ Upstream commit 30b8e7b8ee3be003e0df85c857c5cd0e0bd58b82 ]
>
> This reverts commit 4545ae2ed3f2f7c3f615a53399c9c8460ee5bca7.
>
> The origin patch "drm/amdgpu: getting fan speed pwm for vega10 properly"
> works fine.
> Test failure is caused by test case self.
>
> Signed-off-by: Asher Song 
> Reviewed-by: Guchun Chen 
> Signed-off-by: Alex Deucher 
> Signed-off-by: Sasha Levin 
> ---
>  .../amd/pm/powerplay/hwmgr/vega10_thermal.c   | 25 +--
>  1 file changed, 12 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> index dad3e3741a4e..190af79f3236 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
> @@ -67,22 +67,21 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr
> *hwmgr,
>  int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr,
> uint32_t *speed)
>  {
> -   uint32_t current_rpm;
> -   uint32_t percent = 0;
> -
> -   if (hwmgr->thermal_controller.fanInfo.bNoFan)
> -   return 0;
> +   struct amdgpu_device *adev = hwmgr->adev;
> +   uint32_t duty100, duty;
> +   uint64_t tmp64;
>
> -   if (vega10_get_current_rpm(hwmgr, _rpm))
> -   return -1;
> +   duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
> +   CG_FDO_CTRL1, FMAX_DUTY100);
> +   duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS),
> +   CG_THERMAL_STATUS, FDO_PWM_DUTY);
>
> -   if (hwmgr->thermal_controller.
> -   advanceFanControlParameters.usMaxFanRPM != 0)
> -   percent = current_rpm * 255 /
> -   hwmgr->thermal_controller.
> -   advanceFanControlParameters.usMaxFanRPM;
> +   if (!duty100)
> +   return -EINVAL;
>
> -   *speed = MIN(percent, 255);
> +   tmp64 = (uint64_t)duty * 255;
> +   do_div(tmp64, duty100);
> +   *speed = MIN((uint32_t)tmp64, 255);
>
> return 0;
>  }
> --
> 2.35.1
>
>


Re: [PATCH v2 00/11] Recover from failure to probe GPU

2023-01-03 Thread Lazar, Lijo




On 12/28/2022 10:00 PM, Mario Limonciello wrote:

One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`

This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 22.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 22.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.

This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.

To help the situation make changes to GPU discovery:
1) Delay releasing the firmware framebuffer until after IP discovery has
completed.  This will help the situation of an older kernel that doesn't
yet support the IP blocks probing a new GPU.
2) Request loading all PSP, VCN, SDMA, MES and GC microcode into memory
during IP discovery. This will help the situation of new enough kernel for
the IP discovery phase to otherwise pass but missing microcode from
linux-firmware.git.

Not all requested firmware will be loaded during IP discovery as some of it
will require larger driver architecture changes. For example SMU firmware
isn't loaded on certain products, but that's not known until later on when
the early_init phase of the SMU load occurs.

v1->v2:
  * Take the suggestion from v1 thread to delay the framebuffer release until
ip discovery is done. This patch is CC to stable to that older stable
kernels with IP discovery won't try to probe unknown IP.
  * Drop changes to drm aperature.
  * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.



What is the gain here in just checking if firmware files are available? 
It can fail anywhere during sw_init and it's the same situation.


Restricting IP FWs to IP specific files looks better to me than 
centralizing and creating interdependencies.


Thanks,
Lijo


Mario Limonciello (11):
   drm/amd: Delay removal of the firmware framebuffer
   drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
   drm/amd: Convert SMUv11 microcode init to use
 `amdgpu_ucode_ip_version_decode`
   drm/amd: Convert SMU v13 to use `amdgpu_ucode_ip_version_decode`
   drm/amd: Request SDMA microcode during IP discovery
   drm/amd: Request VCN microcode during IP discovery
   drm/amd: Request MES microcode during IP discovery
   drm/amd: Request GFX9 microcode during IP discovery
   drm/amd: Request GFX10 microcode during IP discovery
   drm/amd: Request GFX11 microcode during IP discovery
   drm/amd: Request PSP microcode during IP discovery

  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   8 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 590 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |   2 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  |   9 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 208 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  85 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 180 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c|  64 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 143 +
  drivers/gpu/drm/amd/amdgpu/mes_v10_1.c|  28 -
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  25 +-
  drivers/gpu/drm/amd/amdgpu/psp_v10_0.c| 106 +---
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c| 165 +
  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c| 102 +--
  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c|  82 ---
  drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c  |  36 --
  drivers/gpu/drm/amd/amdgpu/psp_v3_1.c |  36 --
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c|  61 +-
  

Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver

2023-01-03 Thread Christian König

Am 03.01.23 um 11:00 schrieb Shashank Sharma:


On 03/01/2023 10:47, Christian König wrote:

Am 03.01.23 um 10:43 schrieb Shashank Sharma:


On 29/12/2022 19:02, Alex Deucher wrote:
On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
 wrote:

This is a RFC series to implement usermode graphics queues for AMDGPU
driver (Navi 3X and above). The idea of usermode graphics queue is to
allow direct workload submission from a userspace graphics process 
who

has amdgpu graphics context.

Once we have some initial feedback on the design, we will publish a
follow up V1 series with a libdrm consumer test.

I think this should look more like the following:
1. Convert doorbells to full fledged GEM objects just like vram.  Then
update the GEM IOCTL to allow allocation of doorbell BOs.
2. Store MQD data per amdgpu_ctx.


If my understanding of the comments is correct, we are having 
conflicting opinions here on where to save the MQD data. @Christian ?


You need something like an amdgpu_userq object which holds the BO 
with the MQD the hardware is using as well as anything else necessary 
for the queue.


And we will be storing it into fpriv->amdgpu driver_private area 
(probably by using something like amdgpu_useq_mgr or similar), similar 
to amdgpu_ctx_mgr.


Exactly that, yes. The amdgpu_userq_mgr keeps the idr/mutex and 
everything necessary per client while the amdgpu_userq object represents 
the queue itself.


Christian.



- Shashank



Regards,
Christian.




3. Create secure semaphore pool and map RO into each GPUVM.
4. Add callbacks to each IP type that supports user mode queues.
These callbacks should handle the IP specific MQD initialization and
mapping/unmapping details including allocation of BOs for the MQD
itself and any relevant metadata.  The USERQ IOCTL handler will look
up the callback based on the IP type specified in the IOCTL.


Noted.

Shashank



Alex


Cc: Alex Deucher 
Cc: Christian Koenig 

Alex Deucher (1):
   drm/amdgpu: UAPI for user queue management

Arunpravin Paneer Selvam (1):
   drm/amdgpu: Secure semaphore for usermode queue

Arvind Yadav (1):
   drm/amdgpu: Create MQD for userspace queue

Shashank Sharma (4):
   drm/amdgpu: Add usermode queue for gfx work
   drm/amdgpu: Allocate doorbell slot for user queue
   drm/amdgpu: Create context for usermode queue
   drm/amdgpu: Map userqueue into HW

  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  14 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 


  .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 
  .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
  .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 
++

  include/uapi/drm/amdgpu_drm.h |  52 ++
  8 files changed, 1413 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
  create mode 100644 
drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
  create mode 100644 
drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h


--
2.34.1







Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver

2023-01-03 Thread Shashank Sharma



On 03/01/2023 10:47, Christian König wrote:

Am 03.01.23 um 10:43 schrieb Shashank Sharma:


On 29/12/2022 19:02, Alex Deucher wrote:
On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
 wrote:

This is a RFC series to implement usermode graphics queues for AMDGPU
driver (Navi 3X and above). The idea of usermode graphics queue is to
allow direct workload submission from a userspace graphics process who
has amdgpu graphics context.

Once we have some initial feedback on the design, we will publish a
follow up V1 series with a libdrm consumer test.

I think this should look more like the following:
1. Convert doorbells to full fledged GEM objects just like vram.  Then
update the GEM IOCTL to allow allocation of doorbell BOs.
2. Store MQD data per amdgpu_ctx.


If my understanding of the comments is correct, we are having 
conflicting opinions here on where to save the MQD data. @Christian ?


You need something like an amdgpu_userq object which holds the BO with 
the MQD the hardware is using as well as anything else necessary for 
the queue.


And we will be storing it into fpriv->amdgpu driver_private area 
(probably by using something like amdgpu_useq_mgr or similar), similar 
to amdgpu_ctx_mgr.


- Shashank



Regards,
Christian.




3. Create secure semaphore pool and map RO into each GPUVM.
4. Add callbacks to each IP type that supports user mode queues.
These callbacks should handle the IP specific MQD initialization and
mapping/unmapping details including allocation of BOs for the MQD
itself and any relevant metadata.  The USERQ IOCTL handler will look
up the callback based on the IP type specified in the IOCTL.


Noted.

Shashank



Alex


Cc: Alex Deucher 
Cc: Christian Koenig 

Alex Deucher (1):
   drm/amdgpu: UAPI for user queue management

Arunpravin Paneer Selvam (1):
   drm/amdgpu: Secure semaphore for usermode queue

Arvind Yadav (1):
   drm/amdgpu: Create MQD for userspace queue

Shashank Sharma (4):
   drm/amdgpu: Add usermode queue for gfx work
   drm/amdgpu: Allocate doorbell slot for user queue
   drm/amdgpu: Create context for usermode queue
   drm/amdgpu: Map userqueue into HW

  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  14 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 
  .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 
  .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
  .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 
++

  include/uapi/drm/amdgpu_drm.h |  52 ++
  8 files changed, 1413 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
  create mode 100644 
drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
  create mode 100644 
drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h


--
2.34.1





Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver

2023-01-03 Thread Christian König

Am 03.01.23 um 10:43 schrieb Shashank Sharma:


On 29/12/2022 19:02, Alex Deucher wrote:
On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
 wrote:

This is a RFC series to implement usermode graphics queues for AMDGPU
driver (Navi 3X and above). The idea of usermode graphics queue is to
allow direct workload submission from a userspace graphics process who
has amdgpu graphics context.

Once we have some initial feedback on the design, we will publish a
follow up V1 series with a libdrm consumer test.

I think this should look more like the following:
1. Convert doorbells to full fledged GEM objects just like vram.  Then
update the GEM IOCTL to allow allocation of doorbell BOs.
2. Store MQD data per amdgpu_ctx.


If my understanding of the comments is correct, we are having 
conflicting opinions here on where to save the MQD data. @Christian ?


You need something like an amdgpu_userq object which holds the BO with 
the MQD the hardware is using as well as anything else necessary for the 
queue.


Regards,
Christian.




3. Create secure semaphore pool and map RO into each GPUVM.
4. Add callbacks to each IP type that supports user mode queues.
These callbacks should handle the IP specific MQD initialization and
mapping/unmapping details including allocation of BOs for the MQD
itself and any relevant metadata.  The USERQ IOCTL handler will look
up the callback based on the IP type specified in the IOCTL.


Noted.

Shashank



Alex


Cc: Alex Deucher 
Cc: Christian Koenig 

Alex Deucher (1):
   drm/amdgpu: UAPI for user queue management

Arunpravin Paneer Selvam (1):
   drm/amdgpu: Secure semaphore for usermode queue

Arvind Yadav (1):
   drm/amdgpu: Create MQD for userspace queue

Shashank Sharma (4):
   drm/amdgpu: Add usermode queue for gfx work
   drm/amdgpu: Allocate doorbell slot for user queue
   drm/amdgpu: Create context for usermode queue
   drm/amdgpu: Map userqueue into HW

  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  14 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 
  .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 
  .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
  .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 
++

  include/uapi/drm/amdgpu_drm.h |  52 ++
  8 files changed, 1413 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
  create mode 100644 
drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
  create mode 100644 
drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h


--
2.34.1





Re: [RFC 0/7] RFC: Usermode queue for AMDGPU driver

2023-01-03 Thread Shashank Sharma



On 29/12/2022 19:02, Alex Deucher wrote:

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma  wrote:

This is a RFC series to implement usermode graphics queues for AMDGPU
driver (Navi 3X and above). The idea of usermode graphics queue is to
allow direct workload submission from a userspace graphics process who
has amdgpu graphics context.

Once we have some initial feedback on the design, we will publish a
follow up V1 series with a libdrm consumer test.

I think this should look more like the following:
1. Convert doorbells to full fledged GEM objects just like vram.  Then
update the GEM IOCTL to allow allocation of doorbell BOs.
2. Store MQD data per amdgpu_ctx.


If my understanding of the comments is correct, we are having 
conflicting opinions here on where to save the MQD data. @Christian ?



3. Create secure semaphore pool and map RO into each GPUVM.
4. Add callbacks to each IP type that supports user mode queues.
These callbacks should handle the IP specific MQD initialization and
mapping/unmapping details including allocation of BOs for the MQD
itself and any relevant metadata.  The USERQ IOCTL handler will look
up the callback based on the IP type specified in the IOCTL.


Noted.

Shashank



Alex


Cc: Alex Deucher 
Cc: Christian Koenig 

Alex Deucher (1):
   drm/amdgpu: UAPI for user queue management

Arunpravin Paneer Selvam (1):
   drm/amdgpu: Secure semaphore for usermode queue

Arvind Yadav (1):
   drm/amdgpu: Create MQD for userspace queue

Shashank Sharma (4):
   drm/amdgpu: Add usermode queue for gfx work
   drm/amdgpu: Allocate doorbell slot for user queue
   drm/amdgpu: Create context for usermode queue
   drm/amdgpu: Map userqueue into HW

  drivers/gpu/drm/amd/amdgpu/Makefile   |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  14 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 486 
  .../amd/amdgpu/amdgpu_userqueue_secure_sem.c  | 245 
  .../drm/amd/include/amdgpu_usermode_queue.h   |  68 +++
  .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++
  include/uapi/drm/amdgpu_drm.h |  52 ++
  8 files changed, 1413 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_secure_sem.c
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h

--
2.34.1



Re: [RFC 5/7] drm/amdgpu: Create context for usermode queue

2023-01-03 Thread Shashank Sharma



On 29/12/2022 18:54, Alex Deucher wrote:

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma  wrote:

The FW expects us to allocate atleast one page as process
context space, and one for gang context space. This patch adds some
object for the same.

This should be handled in the IP specific code for the MQD creation.
Each IP may have different requirements for MQD related metadata.

Alex


Noted, so 3 IP specific functions so far,

.init_mqd(), .map() and .create_ctx_space().

- Shashank




Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 57 +++
  .../drm/amd/include/amdgpu_usermode_queue.h   |  8 +++
  2 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index b566ce4cb7f0..2a854a5e2f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -69,6 +69,56 @@ amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
  return 0;
  }

+static int
+amdgpu_userqueue_create_context(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+int r;
+struct amdgpu_userq_ctx *pctx = >proc_ctx;
+struct amdgpu_userq_ctx *gctx = >gang_ctx;
+/*
+ * The FW expects atleast one page space allocated for
+ * process context related work, and one for gang context.
+ */
+r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+>obj,
+>gpu_addr,
+>cpu_ptr);
+if (r) {
+DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+return r;
+}
+
+r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+AMDGPU_GEM_DOMAIN_VRAM,
+>obj,
+>gpu_addr,
+>cpu_ptr);
+if (r) {
+DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+amdgpu_bo_free_kernel(>obj,
+  >gpu_addr,
+  >cpu_ptr);
+return r;
+}
+
+return 0;
+}
+
+static void
+amdgpu_userqueue_free_context(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+struct amdgpu_userq_ctx *pctx = >proc_ctx;
+struct amdgpu_userq_ctx *gctx = >gang_ctx;
+
+amdgpu_bo_free_kernel(>obj,
+  >gpu_addr,
+  >cpu_ptr);
+amdgpu_bo_free_kernel(>obj,
+  >gpu_addr,
+  >cpu_ptr);
+}
+
  static void
  amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
  {
@@ -282,6 +332,12 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, 
struct drm_file *filp,
  goto free_mqd;
  }

+r = amdgpu_userqueue_create_context(adev, queue);
+if (r < 0) {
+DRM_ERROR("Failed to create context for queue\n");
+goto free_mqd;
+}
+
  ctx->userq = queue;
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
@@ -306,6 +362,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, 
struct drm_file *filp,
  struct amdgpu_usermode_queue *queue = ctx->userq;

  mutex_lock(>userq.userq_mutex);
+amdgpu_userqueue_free_context(adev, queue);
  amdgpu_userqueue_destroy_mqd(queue);
  amdgpu_userqueue_remove_index(adev, queue);
  ctx->userq = NULL;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h 
b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
index c1fe39ffaf72..8bf3c0be6937 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h
@@ -26,6 +26,12 @@

  #define AMDGPU_MAX_USERQ 512

+struct amdgpu_userq_ctx {
+   struct amdgpu_bo *obj;
+   uint64_t gpu_addr;
+   void*cpu_ptr;
+};
+
  struct amdgpu_usermode_queue {
 int queue_id;
 int queue_type;
@@ -44,6 +50,8 @@ struct amdgpu_usermode_queue {

 struct amdgpu_bo*mqd_obj;
 struct amdgpu_vm*vm;
+   struct amdgpu_userq_ctx proc_ctx;
+   struct amdgpu_userq_ctx gang_ctx;
 struct list_headlist;
  };

--
2.34.1



Re: [RFC 6/7] drm/amdgpu: Map userqueue into HW

2023-01-03 Thread Shashank Sharma



On 29/12/2022 18:51, Alex Deucher wrote:

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma  wrote:

This patch add the function to map/unmap the usermode queue into the HW,
using the prepared MQD and other objects. After this mapping, the queue
will be ready to accept the workload.

This should also be a callback into IP specific code.  It will be
different for each IP type and version.

Alex


Noted, so far we have two IP specific functions, .init_mqd() and .map()

- Shashank


Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 71 +++
  1 file changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 2a854a5e2f70..b164e24247ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -50,6 +50,67 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
struct amdgpu_usermode
  ida_simple_remove(>ida, queue->queue_id);
  }

+static int amdgpu_userqueue_map(struct amdgpu_device *adev,
+struct amdgpu_usermode_queue *queue)
+{
+int r;
+struct mes_add_queue_input queue_input;
+
+memset(_input, 0x0, sizeof(struct mes_add_queue_input));
+
+queue_input.process_va_start = 0;
+queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+queue_input.process_quantum = 10; /* 10ms */
+queue_input.gang_quantum = 1; /* 1ms */
+queue_input.paging = false;
+
+queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
+queue_input.process_context_addr = queue->proc_ctx.gpu_addr;
+queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+queue_input.gang_global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+
+queue_input.process_id = queue->pasid;
+queue_input.queue_type = queue->queue_type;
+queue_input.mqd_addr = queue->mqd_gpu_addr;
+queue_input.wptr_addr = queue->wptr_gpu_addr;
+queue_input.queue_size = queue->queue_size >> 2;
+queue_input.doorbell_offset = queue->doorbell_index;
+queue_input.page_table_base_addr =  queue->vm->pd_phys_addr;
+
+amdgpu_mes_lock(>mes);
+r = adev->mes.funcs->add_hw_queue(>mes, _input);
+amdgpu_mes_unlock(>mes);
+if (r) {
+DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+return r;
+}
+
+DRM_DEBUG_DRIVER("Queue %d mapped successfully\n", queue->queue_id);
+return 0;
+}
+
+static void amdgpu_userqueue_unmap(struct amdgpu_device *adev,
+struct amdgpu_usermode_queue *queue)
+{
+int r;
+struct mes_remove_queue_input queue_input;
+
+memset(_input, 0x0, sizeof(struct mes_remove_queue_input));
+queue_input.doorbell_offset = queue->doorbell_index;
+queue_input.gang_context_addr = queue->gang_ctx.gpu_addr;
+
+amdgpu_mes_lock(>mes);
+r = adev->mes.funcs->remove_hw_queue(>mes, _input);
+amdgpu_mes_unlock(>mes);
+
+if (r) {
+DRM_ERROR("Failed to unmap usermode queue %d\n", queue->queue_id);
+return;
+}
+
+DRM_DEBUG_DRIVER("Usermode queue %d unmapped\n", queue->queue_id);
+}
+
  static int
  amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
  struct amdgpu_usermode_queue *queue)
@@ -338,12 +399,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, 
struct drm_file *filp,
  goto free_mqd;
  }

+r = amdgpu_userqueue_map(adev, queue);
+if (r < 0) {
+DRM_ERROR("Failed to map queue\n");
+goto free_ctx;
+}
+
  ctx->userq = queue;
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(>userq.userq_mutex);
  return 0;

+free_ctx:
+amdgpu_userqueue_free_context(adev, queue);
+
  free_mqd:
  amdgpu_userqueue_destroy_mqd(queue);

@@ -362,6 +432,7 @@ void amdgpu_userqueue_destroy(struct amdgpu_device *adev, 
struct drm_file *filp,
  struct amdgpu_usermode_queue *queue = ctx->userq;

  mutex_lock(>userq.userq_mutex);
+amdgpu_userqueue_unmap(adev, queue);
  amdgpu_userqueue_free_context(adev, queue);
  amdgpu_userqueue_destroy_mqd(queue);
  amdgpu_userqueue_remove_index(adev, queue);
--
2.34.1



Re: [RFC 4/7] drm/amdgpu: Allocate doorbell slot for user queue

2023-01-03 Thread Shashank Sharma



On 29/12/2022 18:50, Alex Deucher wrote:

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma  wrote:

This patch allocates a doorbell slot in the bar, for the usermode queue.
We are using the unique queue-id to get this slot from MES.

We should manage the doorbell BAR just like VRAM.  I had a set of
patches to convert doorbell memory to GEM objects.  The user should be
able to allocate doorbell memory via the GEM IOCTL just like VRAM or
GTT.  Then when the user calls the USERQ IOCTL, we can just look up
the GEM object from the handle and then calculate the doorbell offset
based on the offset of the GEM object from the start of the BAR.

Alex


Noted,

- Shashank


Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 28 +++
  1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index a91cc304cb9e..b566ce4cb7f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -50,6 +50,25 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
struct amdgpu_usermode
  ida_simple_remove(>ida, queue->queue_id);
  }

+static int
+amdgpu_userqueue_get_doorbell(struct amdgpu_device *adev,
+struct amdgpu_usermode_queue *queue)
+{
+int r;
+unsigned int doorbell_index;
+
+r = amdgpu_mes_alloc_process_doorbells(adev, _index);
+   if (r < 0) {
+DRM_ERROR("Failed to allocate doorbell for user queue\n");
+return r;
+}
+
+/* We are using qnique queue-id to get doorbell here */
+queue->doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
+   doorbell_index, queue->queue_id);
+return 0;
+}
+
  static void
  amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
  {
@@ -257,12 +276,21 @@ int amdgpu_userqueue_create(struct amdgpu_device *adev, 
struct drm_file *filp,
  goto free_queue;
  }

+r = amdgpu_userqueue_get_doorbell(adev, queue);
+if (r) {
+DRM_ERROR("Failed to create doorbell for queue\n");
+goto free_mqd;
+}
+
  ctx->userq = queue;
  args->out.q_id = queue->queue_id;
  args->out.flags = 0;
  mutex_unlock(>userq.userq_mutex);
  return 0;

+free_mqd:
+amdgpu_userqueue_destroy_mqd(queue);
+
  free_queue:
  amdgpu_userqueue_remove_index(adev, queue);
  mutex_unlock(>userq.userq_mutex);
--
2.34.1



Re: [RFC 3/7] drm/amdgpu: Create MQD for userspace queue

2023-01-03 Thread Shashank Sharma



On 29/12/2022 18:47, Alex Deucher wrote:

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma  wrote:

From: Arvind Yadav 

MQD describes the properies of a user queue to the HW, and allows it to
accurately configure the queue while mapping it in GPU HW. This patch
adds:
- A new header file which contains the MQD definition
- A new function which creates an MQD object and fills it with userqueue
   data

Cc: Alex Deucher 
Cc: Christian Koenig 

Signed-off-by: Arvind Yadav 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 138 +
  .../amd/include/amdgpu_usermode_queue_mqd.h   | 544 ++
  2 files changed, 682 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue_mqd.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 3b6e8f75495c..a91cc304cb9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -25,7 +25,10 @@
  #include "amdgpu_vm.h"
  #include "amdgpu_mes.h"
  #include "amdgpu_usermode_queue.h"
+#include "amdgpu_usermode_queue_mqd.h"
  #include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"

Don't add IP specific code to this file.

Noted,

  #define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))

@@ -47,6 +50,134 @@ amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
struct amdgpu_usermode
  ida_simple_remove(>ida, queue->queue_id);
  }

+static void
+amdgpu_userqueue_setup_mqd(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)

This should be a ring callback or some new IP level callback to init
an MQD since we'll need this for multiple IP types and generations of
IPs.


We are still using the MES ring which has an existing callback for this, 
but I think it allows an IP specific callback as well. I will check that 
out.



+{
+struct amdgpu_usermode_queue_mqd *mqd = queue->mqd_cpu_ptr;
+uint64_t hqd_gpu_addr, wb_gpu_addr;
+uint32_t tmp;
+uint32_t rb_bufsz;
+
+/* set up gfx hqd wptr */
+mqd->cp_gfx_hqd_wptr = 0;
+mqd->cp_gfx_hqd_wptr_hi = 0;
+
+/* set the pointer to the MQD */
+mqd->cp_mqd_base_addr = queue->mqd_gpu_addr & 0xfffc;
+mqd->cp_mqd_base_addr_hi = upper_32_bits(queue->mqd_gpu_addr);
+
+/* set up mqd control */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+mqd->cp_gfx_mqd_control = tmp;
+
+/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+mqd->cp_gfx_hqd_vmid = 0;
+
+/* set up default queue priority level
+* 0x0 = low priority, 0x1 = high priority */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+mqd->cp_gfx_hqd_queue_priority = tmp;
+
+/* set up time quantum */
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+mqd->cp_gfx_hqd_quantum = tmp;
+
+/* set up gfx hqd base. this is similar as CP_RB_BASE */
+hqd_gpu_addr = queue->queue_gpu_addr >> 8;
+mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+wb_gpu_addr = queue->rptr_gpu_addr;
+mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffc;
+mqd->cp_gfx_hqd_rptr_addr_hi =
+upper_32_bits(wb_gpu_addr) & 0x;
+
+/* set up rb_wptr_poll addr */
+wb_gpu_addr = queue->wptr_gpu_addr;
+mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffc;
+mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0x;
+
+/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+rb_bufsz = order_base_2(queue->queue_size / 4) - 1;
+tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+mqd->cp_gfx_hqd_cntl = tmp;
+
+/* set up cp_doorbell_control */
+tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+if (queue->use_doorbell) {
+tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+DOORBELL_OFFSET, queue->doorbell_index);
+tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+DOORBELL_EN, 1);
+} else {
+tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+DOORBELL_EN, 0);
+}
+mqd->cp_rb_doorbell_control = tmp;
+
+/* reset 

Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Christian König

Am 03.01.23 um 10:22 schrieb Shashank Sharma:


On 03/01/2023 10:15, Christian König wrote:

Am 03.01.23 um 10:12 schrieb Shashank Sharma:


On 02/01/2023 13:39, Christian König wrote:

Hi Shashank,

Am 26.12.22 um 11:41 schrieb Shashank Sharma:

[SNIP]

    /* df */
  struct amdgpu_df    df;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
  unsigned long    ras_counter_ce;
  unsigned long    ras_counter_ue;
  uint32_t    stable_pstate;
+    struct amdgpu_usermode_queue    *userq;


Why should we have this in the ctx here???


We are allocating a few things dynamically for the queue, which 
would be valid until we destroy this queue. Also we need to save 
this queue


container at some place for the destroy function,  and I thought 
it would make sense to keep this with the context ptr, as this is 
how we are


identifying the incoming request.


I have absolutely no idea how you end up with that design.

The ctx object is the CS IOCTL context, that is not even remotely 
related to anything the user queues should be doing.


Please completely drop that relationship and don't use any of the 
ctx object stuff in the user queue code.


Historically the workload submission always came with a context (due 
to CS IOCTL), so we thought it would make sense to still have its 
relevance in the new workload submission method. Would you prefer 
this new submission to be independent of AMDGPU context ?


Well not prefer, the point is that this doesn't make any sense at all.

See the amdgpu_ctx object contains the resulting fence pointers for 
the CS IOCTL as well as information necessary for the CS IOCTL to 
work (e.g. scheduler entities etc...).


I don't see how anything from that stuff would be useful for the MES 
or user queues.


Christian.



I am getting your point, and it makes sense as well. But in such 
scenario, we might have to create something parallel to 
AMDGPU_USERQ_CTX which is doing very much the same.


We can still do it to make a logically separate entity, but any 
suggestions on where to keep this udev_ctx ptr (if not in adev, as 
well as not ctx) ?



Take a look at the amdgpu_ctx_mgr object with the mutex and the idr and 
how this is embedded into the amdgpu_fpriv object. It should become 
pretty clear from there on.


I don't think we need an userq_ctx or similar, each userq should be an 
independent object. What we need is an userq_mgr object which holds the 
collection of all the useq objects the client application has created 
through it's fpriv connection to the driver.


Regards,
Christian.



- Shashank






- Shashank



Christian.



- Shashank



Regards,
Christian.


  };
    struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files 
(the "Software"),
+ * to deal in the Software without restriction, including 
without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall 
be included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY 
CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
THE USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
sizeof(__u64)))

+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+    int index;
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    index = ida_simple_get(>ida, 2, AMDGPU_MAX_USERQ, 
GFP_KERNEL);

+    return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, 
struct amdgpu_usermode_queue *queue)

+{
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    

Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Shashank Sharma



On 02/01/2023 14:53, Christian König wrote:

Am 29.12.22 um 18:41 schrieb Alex Deucher:
On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma 
 wrote:

This patch adds skeleton code for usermode queue creation. It
typically contains:
- A new structure to keep all the user queue data in one place.
- An IOCTL function to create/free a usermode queue.
- A function to generate unique index for the queue.
- A global ptr in amdgpu_dev

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 
++

  .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +
  5 files changed, 246 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 
drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h


diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile

index 6ad39cf71bdd..e2a34ee57bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -209,6 +209,8 @@ amdgpu-y += \
  # add amdkfd interfaces
  amdgpu-y += amdgpu_amdkfd.o

+# add usermode queue
+amdgpu-y += amdgpu_userqueue.o

  ifneq ($(CONFIG_HSA_AMD),)
  AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 8639a4f9c6e8..4b566fcfca18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,6 +749,11 @@ struct amdgpu_mqd {
 struct amdgpu_mqd_prop *p);
  };

+struct amdgpu_userq_globals {
+   struct ida ida;
+   struct mutex userq_mutex;
+};
+
  #define AMDGPU_RESET_MAGIC_NUM 64
  #define AMDGPU_MAX_DF_PERFMONS 4
  #define AMDGPU_PRODUCT_NAME_LEN 64
@@ -955,6 +960,7 @@ struct amdgpu_device {
 bool    enable_mes_kiq;
 struct amdgpu_mes   mes;
 struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
+   struct amdgpu_userq_globals userq;

 /* df */
 struct amdgpu_df    df;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
 unsigned long   ras_counter_ce;
 unsigned long   ras_counter_ue;
 uint32_t    stable_pstate;
+   struct amdgpu_usermode_queue    *userq;

There can be multiple queues per context.  We should make this a list.


  };

  struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
sizeof(__u64)))


You seem to have a very very big misunderstanding here.

access_ok() is used for CPU pointer validation, but this here are 
pointers into the GPUVM address space. This is something completely 
different!


Thanks, It seems like there is a misunderstanding in my side on 
definition of these input parameters, let me follow up.


- Shashank




Regards,
Christian.


+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+    int index;
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    index = ida_simple_get(>ida, 2, 

Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Shashank Sharma



On 03/01/2023 10:15, Christian König wrote:

Am 03.01.23 um 10:12 schrieb Shashank Sharma:


On 02/01/2023 13:39, Christian König wrote:

Hi Shashank,

Am 26.12.22 um 11:41 schrieb Shashank Sharma:

[SNIP]

    /* df */
  struct amdgpu_df    df;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
  unsigned long    ras_counter_ce;
  unsigned long    ras_counter_ue;
  uint32_t    stable_pstate;
+    struct amdgpu_usermode_queue    *userq;


Why should we have this in the ctx here???


We are allocating a few things dynamically for the queue, which 
would be valid until we destroy this queue. Also we need to save 
this queue


container at some place for the destroy function,  and I thought it 
would make sense to keep this with the context ptr, as this is how 
we are


identifying the incoming request.


I have absolutely no idea how you end up with that design.

The ctx object is the CS IOCTL context, that is not even remotely 
related to anything the user queues should be doing.


Please completely drop that relationship and don't use any of the 
ctx object stuff in the user queue code.


Historically the workload submission always came with a context (due 
to CS IOCTL), so we thought it would make sense to still have its 
relevance in the new workload submission method. Would you prefer 
this new submission to be independent of AMDGPU context ?


Well not prefer, the point is that this doesn't make any sense at all.

See the amdgpu_ctx object contains the resulting fence pointers for 
the CS IOCTL as well as information necessary for the CS IOCTL to work 
(e.g. scheduler entities etc...).


I don't see how anything from that stuff would be useful for the MES 
or user queues.


Christian.



I am getting your point, and it makes sense as well. But in such 
scenario, we might have to create something parallel to AMDGPU_USERQ_CTX 
which is doing very much the same.


We can still do it to make a logically separate entity, but any 
suggestions on where to keep this udev_ctx ptr (if not in adev, as well 
as not ctx) ?


- Shashank






- Shashank



Christian.



- Shashank



Regards,
Christian.


  };
    struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including 
without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall 
be included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 
THE USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
sizeof(__u64)))

+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+    int index;
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    index = ida_simple_get(>ida, 2, AMDGPU_MAX_USERQ, 
GFP_KERNEL);

+    return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)

+{
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    ida_simple_remove(>ida, queue->queue_id);
+}
+
+static int
+amdgpu_userqueue_validate_input(struct amdgpu_device *adev, 
struct drm_amdgpu_userq_mqd *mqd_in)

+{
+    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
mqd_in->doorbell_offset == 0) {

+    DRM_ERROR("Invalid queue object address\n");
+    return -EINVAL;
+    }
+
+    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
mqd_in->wptr_va == 0) {

+    DRM_ERROR("Invalid queue object value\n");
+    return -EINVAL;
+    }
+
+ 

Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Shashank Sharma



On 29/12/2022 18:41, Alex Deucher wrote:

On Fri, Dec 23, 2022 at 2:37 PM Shashank Sharma  wrote:

This patch adds skeleton code for usermode queue creation. It
typically contains:
- A new structure to keep all the user queue data in one place.
- An IOCTL function to create/free a usermode queue.
- A function to generate unique index for the queue.
- A global ptr in amdgpu_dev

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
---
  drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   6 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 187 ++
  .../drm/amd/include/amdgpu_usermode_queue.h   |  50 +
  5 files changed, 246 insertions(+)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
  create mode 100644 drivers/gpu/drm/amd/include/amdgpu_usermode_queue.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6ad39cf71bdd..e2a34ee57bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -209,6 +209,8 @@ amdgpu-y += \
  # add amdkfd interfaces
  amdgpu-y += amdgpu_amdkfd.o

+# add usermode queue
+amdgpu-y += amdgpu_userqueue.o

  ifneq ($(CONFIG_HSA_AMD),)
  AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8639a4f9c6e8..4b566fcfca18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -749,6 +749,11 @@ struct amdgpu_mqd {
 struct amdgpu_mqd_prop *p);
  };

+struct amdgpu_userq_globals {
+   struct ida ida;
+   struct mutex userq_mutex;
+};
+
  #define AMDGPU_RESET_MAGIC_NUM 64
  #define AMDGPU_MAX_DF_PERFMONS 4
  #define AMDGPU_PRODUCT_NAME_LEN 64
@@ -955,6 +960,7 @@ struct amdgpu_device {
 boolenable_mes_kiq;
 struct amdgpu_mes   mes;
 struct amdgpu_mqd   mqds[AMDGPU_HW_IP_NUM];
+   struct amdgpu_userq_globals userq;

 /* df */
 struct amdgpu_dfdf;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
 unsigned long   ras_counter_ce;
 unsigned long   ras_counter_ue;
 uint32_tstable_pstate;
+   struct amdgpu_usermode_queue*userq;

There can be multiple queues per context.  We should make this a list.


Noted, will change it into a queue. We are still in discussion (in 
another thread) if we have to move this from context to some place else.



  };

  struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, sizeof(__u64)))
+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+int index;
+struct amdgpu_userq_globals *uqg = >userq;
+
+index = ida_simple_get(>ida, 2, AMDGPU_MAX_USERQ, GFP_KERNEL);
+return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)
+{
+struct amdgpu_userq_globals *uqg = >userq;
+
+ida_simple_remove(>ida, queue->queue_id);
+}
+
+static int

Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Christian König

Am 03.01.23 um 10:12 schrieb Shashank Sharma:


On 02/01/2023 13:39, Christian König wrote:

Hi Shashank,

Am 26.12.22 um 11:41 schrieb Shashank Sharma:

[SNIP]

    /* df */
  struct amdgpu_df    df;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
  unsigned long    ras_counter_ce;
  unsigned long    ras_counter_ue;
  uint32_t    stable_pstate;
+    struct amdgpu_usermode_queue    *userq;


Why should we have this in the ctx here???


We are allocating a few things dynamically for the queue, which 
would be valid until we destroy this queue. Also we need to save 
this queue


container at some place for the destroy function,  and I thought it 
would make sense to keep this with the context ptr, as this is how 
we are


identifying the incoming request.


I have absolutely no idea how you end up with that design.

The ctx object is the CS IOCTL context, that is not even remotely 
related to anything the user queues should be doing.


Please completely drop that relationship and don't use any of the ctx 
object stuff in the user queue code.


Historically the workload submission always came with a context (due 
to CS IOCTL), so we thought it would make sense to still have its 
relevance in the new workload submission method. Would you prefer this 
new submission to be independent of AMDGPU context ?


Well not prefer, the point is that this doesn't make any sense at all.

See the amdgpu_ctx object contains the resulting fence pointers for the 
CS IOCTL as well as information necessary for the CS IOCTL to work (e.g. 
scheduler entities etc...).


I don't see how anything from that stuff would be useful for the MES or 
user queues.


Christian.



- Shashank



Christian.



- Shashank



Regards,
Christian.


  };
    struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY 
KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
sizeof(__u64)))

+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+    int index;
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    index = ida_simple_get(>ida, 2, AMDGPU_MAX_USERQ, 
GFP_KERNEL);

+    return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)

+{
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    ida_simple_remove(>ida, queue->queue_id);
+}
+
+static int
+amdgpu_userqueue_validate_input(struct amdgpu_device *adev, 
struct drm_amdgpu_userq_mqd *mqd_in)

+{
+    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
mqd_in->doorbell_offset == 0) {

+    DRM_ERROR("Invalid queue object address\n");
+    return -EINVAL;
+    }
+
+    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
mqd_in->wptr_va == 0) {

+    DRM_ERROR("Invalid queue object value\n");
+    return -EINVAL;
+    }
+
+    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
AMDGPU_HW_IP_NUM) {

+    DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
+    return -EINVAL;
+    }
+
+    if (!CHECK_ACCESS(mqd_in->queue_va) || 
!CHECK_ACCESS(mqd_in->rptr_va) ||

+    !CHECK_ACCESS(mqd_in->wptr_va)) {
+    DRM_ERROR("Invalid mapping of queue ptrs, access 
error\n");

+  

Re: [RFC 2/7] drm/amdgpu: Add usermode queue for gfx work

2023-01-03 Thread Shashank Sharma



On 02/01/2023 13:39, Christian König wrote:

Hi Shashank,

Am 26.12.22 um 11:41 schrieb Shashank Sharma:

[SNIP]

    /* df */
  struct amdgpu_df    df;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 0fa0e56daf67..f7413859b14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
  unsigned long    ras_counter_ce;
  unsigned long    ras_counter_ue;
  uint32_t    stable_pstate;
+    struct amdgpu_usermode_queue    *userq;


Why should we have this in the ctx here???


We are allocating a few things dynamically for the queue, which would 
be valid until we destroy this queue. Also we need to save this queue


container at some place for the destroy function,  and I thought it 
would make sense to keep this with the context ptr, as this is how we 
are


identifying the incoming request.


I have absolutely no idea how you end up with that design.

The ctx object is the CS IOCTL context, that is not even remotely 
related to anything the user queues should be doing.


Please completely drop that relationship and don't use any of the ctx 
object stuff in the user queue code.


Historically the workload submission always came with a context (due to 
CS IOCTL), so we thought it would make sense to still have its relevance 
in the new workload submission method. Would you prefer this new 
submission to be independent of AMDGPU context ?


- Shashank



Christian.



- Shashank



Regards,
Christian.


  };
    struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c

new file mode 100644
index ..3b6e8f75495c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to 
whom the
+ * Software is furnished to do so, subject to the following 
conditions:

+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_usermode_queue.h"
+#include "soc15_common.h"
+
+#define CHECK_ACCESS(a) (access_ok((const void __user *)a, 
sizeof(__u64)))

+
+static int
+amdgpu_userqueue_index(struct amdgpu_device *adev)
+{
+    int index;
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    index = ida_simple_get(>ida, 2, AMDGPU_MAX_USERQ, 
GFP_KERNEL);

+    return index;
+}
+
+static void
+amdgpu_userqueue_remove_index(struct amdgpu_device *adev, struct 
amdgpu_usermode_queue *queue)

+{
+    struct amdgpu_userq_globals *uqg = >userq;
+
+    ida_simple_remove(>ida, queue->queue_id);
+}
+
+static int
+amdgpu_userqueue_validate_input(struct amdgpu_device *adev, struct 
drm_amdgpu_userq_mqd *mqd_in)

+{
+    if (mqd_in->queue_va == 0 || mqd_in->doorbell_handle == 0 || 
mqd_in->doorbell_offset == 0) {

+    DRM_ERROR("Invalid queue object address\n");
+    return -EINVAL;
+    }
+
+    if (mqd_in->queue_size == 0 || mqd_in->rptr_va == 0 || 
mqd_in->wptr_va == 0) {

+    DRM_ERROR("Invalid queue object value\n");
+    return -EINVAL;
+    }
+
+    if (mqd_in->ip_type < AMDGPU_HW_IP_GFX || mqd_in->ip_type >= 
AMDGPU_HW_IP_NUM) {

+    DRM_ERROR("Invalid HW IP type 0x%x\n", mqd_in->ip_type);
+    return -EINVAL;
+    }
+
+    if (!CHECK_ACCESS(mqd_in->queue_va) || 
!CHECK_ACCESS(mqd_in->rptr_va) ||

+    !CHECK_ACCESS(mqd_in->wptr_va)) {
+    DRM_ERROR("Invalid mapping of queue ptrs, access 
error\n");

+    return -EINVAL;
+    }
+
+    DRM_DEBUG_DRIVER("Input parameters to create queue are valid\n");
+    return 0;
+}
+
+int amdgpu_userqueue_create(struct amdgpu_device *adev, struct 
drm_file *filp,

+    union drm_amdgpu_userq *args)
+{
+    int r, pasid;
+    struct amdgpu_usermode_queue *queue;
+    struct amdgpu_fpriv *fpriv = filp->driver_priv;
+    struct amdgpu_vm 

Re: [PATCH] [RFC] drm/drm_buddy fails to initialize on 32-bit architectures

2023-01-03 Thread Christian König

Am 25.12.22 um 20:39 schrieb Luís Mendes:

Re-sending with the correct  linux-kernel mailing list email address.
Sorry for the inconvenience.

The proposed patch fixes the issue and allows amdgpu to work again on
armhf with a AMD RX 550 card, however it may not be the best solution
for the issue, as detailed below.

include/log2.h defined macros rounddown_pow_of_two(...) and
roundup_pow_of_two(...) do not handle 64-bit values on 32-bit
architectures (tested on armv9 armhf machine) causing
drm_buddy_init(...) to fail on BUG_ON with an underflow on the order
value, thus impeding amdgpu to load properly (no GUI).

One option is to modify rounddown_pow_of_two(...) to detect if the
variable takes 32 bits or less and call __rounddown_pow_of_two_u32(u32
n) or if the variable takes more space than 32 bits, then call
__rounddown_pow_of_two_u64(u64 n). This would imply renaming
__rounddown_pow_of_two(unsigne
d long n) to
__rounddown_pow_of_two_u32(u32 n) and add a new function
__rounddown_pow_of_two_u64(u64 n). This would be the most transparent
solution, however there a few complications, and they are:
- that the mm subsystem will fail to link on armhf with an undefined
reference on __aeabi_uldivmod
- there a few drivers that directly call __rounddown_pow_of_two(...)
- that other drivers and subsystems generate warnings

So this alternate solution was devised which avoids touching existing
code paths, and just updates drm_buddy which seems to be the only
driver that is failing, however I am not sure if this is the proper
way to go. So I would like to get a second opinion on this, by those
who know.

/include/linux/log2.h
/drivers/gpu/drm/drm_buddy.c

Signed-off-by: Luís Mendes 

8--8<

diff -uprN linux-next/drivers/gpu/drm/drm_buddy.c
linux-nextLM/drivers/gpu/drm/drm_buddy.c
--- linux-next/drivers/gpu/drm/drm_buddy.c2022-12-25
16:29:26.0 +
+++ linux-nextLM/drivers/gpu/drm/drm_buddy.c2022-12-25
17:04:32.136007116 +
@@ -128,7 +128,7 @@ int drm_buddy_init(struct drm_buddy *mm,
  unsigned int order;
  u64 root_size;

-root_size = rounddown_pow_of_two(size);
+root_size = rounddown_pow_of_two_u64(size);
  order = ilog2(root_size) - ilog2(chunk_size);


I think this can be handled much easier if keep around the root_order 
instead of the root_size in the first place.


Cause ilog2() does the right thing even for non power of two values and 
so we just need the order for the offset subtraction below.


Arun can you take a closer look at this?

Regards,
Christian.



  root = drm_block_alloc(mm, NULL, order, offset);
diff -uprN linux-next/include/linux/log2.h linux-nextLM/include/linux/log2.h
--- linux-next/include/linux/log2.h2022-12-25 16:29:29.0 +
+++ linux-nextLM/include/linux/log2.h2022-12-25 17:00:34.319901492 +
@@ -58,6 +58,18 @@ unsigned long __roundup_pow_of_two(unsig
  }

  /**
+ * __roundup_pow_of_two_u64() - round up to nearest power of two
+ * (unsgined 64-bits precision version)
+ * @n: value to round up
+ */
+static inline __attribute__((const))
+u64 __roundup_pow_of_two_u64(u64 n)
+{
+return 1ULL << fls64(n - 1);
+}
+
+
+/**
   * __rounddown_pow_of_two() - round down to nearest power of two
   * @n: value to round down
   */
@@ -68,6 +80,17 @@ unsigned long __rounddown_pow_of_two(uns
  }

  /**
+ * __rounddown_pow_of_two_u64() - round down to nearest power of two
+ * (unsgined 64-bits precision version)
+ * @n: value to round down
+ */
+static inline __attribute__((const))
+u64 __rounddown_pow_of_two_u64(u64 n)
+{
+return 1ULL << (fls64(n) - 1);
+}
+
+/**
   * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
   * @n: parameter
   *
@@ -163,6 +186,7 @@ unsigned long __rounddown_pow_of_two(uns
  __ilog2_u64(n)\
   )

+
  /**
   * roundup_pow_of_two - round the given value up to nearest power of two
   * @n: parameter
@@ -181,6 +205,25 @@ unsigned long __rounddown_pow_of_two(uns
   )

  /**
+ * roundup_pow_of_two_u64 - round the given value up to nearest power of two
+ * (unsgined 64-bits precision version)
+ * @n: parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two_u64(n)\
+(\
+__builtin_constant_p(n) ? (\
+((n) == 1) ? 1 :\
+(1ULL << (ilog2((n) - 1) + 1))\
+   ) :\
+__roundup_pow_of_two_u64(n)\
+ )
+
+
+/**
   * rounddown_pow_of_two - round the given value down to nearest power of two
   * @n: parameter
   *
@@ -195,6 +238,22 @@ unsigned long __rounddown_pow_of_two(uns
  __rounddown_pow_of_two(n)\
   )

+/**
+ * rounddown_pow_of_two_u64 - round the given value down to nearest
power of two
+ * (unsgined 64-bits precision 

Re: [PATCH] drm/amdgpu: grab extra fence reference for drm_sched_job_add_dependency

2023-01-03 Thread Christian König

I assume that this was already upstreamed while I was on sick leave?

Thanks,
Christian.

Am 21.12.22 um 22:10 schrieb Alex Deucher:

Applied.  Thanks!

Alex

On Mon, Dec 19, 2022 at 3:01 PM Borislav Petkov  wrote:

On Mon, Dec 19, 2022 at 11:47:18AM +0100, Christian König wrote:

That function consumes the reference.

Signed-off-by: Christian König 
Fixes: aab9cf7b6954 ("drm/amdgpu: use scheduler dependencies for VM updates")
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 2 ++
  1 file changed, 2 insertions(+)

Thanks, that fixes it.

Reported-by: Borislav Petkov (AMD) 
Tested-by: Borislav Petkov (AMD) 

--
Regards/Gruss,
 Boris.

https://people.kernel.org/tglx/notes-about-netiquette




Re: [PATCH 2/2] drm/amdgpu: add AMDGPU_INFO_VM_STAT to return GPU VM

2023-01-03 Thread Christian König

Take a look at /proc/self/fdinfo/$fd.

The Intel guys made that vendor agnostic and are using it within their 
IGT gpu top tool.


Christian.

Am 02.01.23 um 18:57 schrieb Marek Olšák:
What are you talking about? Is fdinfo in sysfs? Userspace drivers 
can't access sysfs.


Marek

On Mon, Jan 2, 2023, 10:56 Christian König 
 wrote:


Well first of all don't mess with the VM internals outside of the
VM code.

Then why would we want to expose this through the IOCTL interface?
We already have this in the fdinfo.

Christian.

Am 30.12.22 um 23:07 schrieb Marek Olšák:

To give userspace a detailed view about its GPU memory usage and
evictions.
This will help performance investigations.

Signed-off-by: Marek Olšák 

The patch is attached.

Marek




Re: [PATCH 12/13] drm/scheduler: rework entity flush, kill and fini

2023-01-03 Thread Dmitry Osipenko
On 1/2/23 17:17, youling 257 wrote:
> which patch?

https://patchwork.freedesktop.org/patch/512652/

I applied it to next-fixes

-- 
Best regards,
Dmitry



Re: [PATCH 12/13] drm/scheduler: rework entity flush, kill and fini

2023-01-03 Thread youling 257
which patch?

2023-01-02 17:24 GMT+08:00, Dmitry Osipenko :
> On 1/1/23 21:29, youling257 wrote:
>> Linux 6.2-rc1 has memory leak on amdgpu, git bisect bad commit is
>> "drm/scheduler: rework entity flush, kill and fini".
>> git bisect start
>> # status: waiting for both good and bad commits
>> # good: [eb7081409f94a9a8608593d0fb63a1aa3d6f95d8] Linux 6.1-rc6
>> git bisect good eb7081409f94a9a8608593d0fb63a1aa3d6f95d8
>> # status: waiting for bad commit, 1 good commit known
>> # bad: [66efff515a6500d4b4976fbab3bee8b92a1137fb] Merge tag
>> 'amd-drm-next-6.2-2022-12-07' of
>> https://gitlab.freedesktop.org/agd5f/linux into drm-next
>> git bisect bad 66efff515a6500d4b4976fbab3bee8b92a1137fb
>> # good: [49e8e6343df688d68b12c2af50791ca37520f0b7] Merge tag
>> 'amd-drm-next-6.2-2022-11-04' of
>> https://gitlab.freedesktop.org/agd5f/linux into drm-next
>> git bisect good 49e8e6343df688d68b12c2af50791ca37520f0b7
>> # bad: [fc58764bbf602b65a6f63c53e5fd6feae76c510c] Merge tag
>> 'amd-drm-next-6.2-2022-11-18' of
>> https://gitlab.freedesktop.org/agd5f/linux into drm-next
>> git bisect bad fc58764bbf602b65a6f63c53e5fd6feae76c510c
>> # bad: [4e291f2f585313efa5200cce655e17c94906e50a] Merge tag
>> 'drm-misc-next-2022-11-10-1' of git://anongit.freedesktop.org/drm/drm-misc
>> into drm-next
>> git bisect bad 4e291f2f585313efa5200cce655e17c94906e50a
>> # good: [78a43c7e3b2ff5aed1809f93b4f87a418355789e] drm/nouveau/gr/gf100-:
>> make global attrib_cb actually global
>> git bisect good 78a43c7e3b2ff5aed1809f93b4f87a418355789e
>> # bad: [611fc22c9e5e13276c819a7f7a7d19b794bbed1a] drm/arm/hdlcd: remove
>> calls to drm_mode_config_cleanup()
>> git bisect bad 611fc22c9e5e13276c819a7f7a7d19b794bbed1a
>> # bad: [a8d9621b9fc67957b3de334cc1b5f47570fb90a0] drm/ingenic: Don't set
>> struct drm_driver.output_poll_changed
>> git bisect bad a8d9621b9fc67957b3de334cc1b5f47570fb90a0
>> # good: [2cf9886e281678ae9ee57e24a656749071d543bb] drm/scheduler: remove
>> drm_sched_dependency_optimized
>> git bisect good 2cf9886e281678ae9ee57e24a656749071d543bb
>> # bad: [8e4e4c2f53ffcb0ef746dc3b87ce1a57c5c94c7d] Merge drm/drm-next into
>> drm-misc-next
>> git bisect bad 8e4e4c2f53ffcb0ef746dc3b87ce1a57c5c94c7d
>> # bad: [47078311b8efebdefd5b3b2f87e2b02b14f49c66] drm/ingenic: Fix missing
>> platform_driver_unregister() call in ingenic_drm_init()
>> git bisect bad 47078311b8efebdefd5b3b2f87e2b02b14f49c66
>> # bad: [a82f30b04c6aaefe62cbbfd297e1bb23435b6b3a] drm/scheduler: rename
>> dependency callback into prepare_job
>> git bisect bad a82f30b04c6aaefe62cbbfd297e1bb23435b6b3a
>> # bad: [2fdb8a8f07c2f1353770a324fd19b8114e4329ac] drm/scheduler: rework
>> entity flush, kill and fini
>> git bisect bad 2fdb8a8f07c2f1353770a324fd19b8114e4329ac
>> # first bad commit: [2fdb8a8f07c2f1353770a324fd19b8114e4329ac]
>> drm/scheduler: rework entity flush, kill and fini
>>
>> @Rob Clark, i test your patch fixed my problem.
>
> The linux-next already carried the fix for a couple weeks. It will land
> to 6.2-rc once drm-fixes branch will be synced with the 6.2.
>
> --
> Best regards,
> Dmitry
>
>


Re: [PATCH 1/2] drm/amdgpu: return the PCIe gen and lanes from the INFO

2023-01-03 Thread Christian König

Sure they can, those files are accessible to everyone.

The massive advantage is that this is standard for all PCIe devices, so 
it should work vendor independent.


Christian.

Am 02.01.23 um 18:55 schrieb Marek Olšák:

Userspace drivers can't access sysfs.

Marek

On Mon, Jan 2, 2023, 10:54 Christian König 
 wrote:


That stuff is already available as current_link_speed and
current_link_width in sysfs.

I'm a bit reluctant duplicating this information in the IOCTL
interface.

Christian.

Am 30.12.22 um 23:07 schrieb Marek Olšák:

For computing PCIe bandwidth in userspace and troubleshooting PCIe
bandwidth issues.

For example, my Navi21 has been limited to PCIe gen 1 and this is
the first time I noticed it after 2 years.

Note that this intentionally fills a hole and padding
in drm_amdgpu_info_device.

Signed-off-by: Marek Olšák 

The patch is attached.

Marek





Re: [RFC PATCH 2/3] drm/amdgpu: Add range param to amdgpu_vm_update_range

2023-01-03 Thread Christian König

Am 21.12.22 um 00:27 schrieb Felix Kuehling:

This allows page table updates to be coordinated with interval notifiers
to avoid writing stale page table entries to the pabe table. Moving the
critical section inside the page table update avoids lock dependencies
with page table allocations under the notifier lock.

Suggested-by: Christian König 
Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 27 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 58 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  6 ++-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  4 +-
  4 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a04f7aef4ca9..556d2e5d90e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -768,6 +768,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
   * @vram_base: base for vram mappings
   * @res: ttm_resource to map
   * @pages_addr: DMA addresses to use for mapping
+ * @range: optional HMM range for coordination with interval notifier
   * @fence: optional resulting fence
   *
   * Fill in the page table entries between @start and @last.
@@ -780,7 +781,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
   struct dma_resv *resv, uint64_t start, uint64_t last,
   uint64_t flags, uint64_t offset, uint64_t vram_base,
   struct ttm_resource *res, dma_addr_t *pages_addr,
-  struct dma_fence **fence)
+  struct hmm_range *range, struct dma_fence **fence)
  {
struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -794,7 +795,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
if (!tlb_cb) {
r = -ENOMEM;
-   goto error_unlock;
+   goto error_dev_exit;
}
  
  	/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,

@@ -811,6 +812,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
memset(, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+#ifdef CONFIG_MMU_NOTIFIER
+   params.range = range;
+#endif
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
@@ -823,12 +827,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
  
-	amdgpu_vm_eviction_lock(vm);

-   if (vm->evicting) {
-   r = -EBUSY;
-   goto error_free;
-   }
-
if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
struct dma_fence *tmp = dma_fence_get_stub();
  
@@ -893,7 +891,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,

start = tmp;
}
  
+	r = amdgpu_vm_pts_lock();

+   if (r)
+   goto error_free;
r = vm->update_funcs->commit(, fence);
+   amdgpu_vm_pts_unlock();


This won't work. We need the lock for updates as well and not just for 
committing them.


  
  	if (flush_tlb || params.table_freed) {

tlb_cb->vm = vm;
@@ -911,8 +913,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  error_free:
kfree(tlb_cb);
  
-error_unlock:

-   amdgpu_vm_eviction_unlock(vm);
+error_dev_exit:
drm_dev_exit(idx);
return r;
  }
@@ -1058,7 +1059,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
   resv, mapping->start, mapping->last,
   update_flags, mapping->offset,
   vram_base, mem, pages_addr,
-  last_update);
+  NULL, last_update);
if (r)
return r;
}
@@ -1253,7 +1254,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
   mapping->start, mapping->last,
   init_pte_value, 0, 0, NULL, NULL,
-  );
+  NULL, );
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -2512,7 +2513,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
}
  
  	r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,

-  addr,