On 8/6/2025 8:50 PM, Deucher, Alexander wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
>> -----Original Message-----
>> From: Lazar, Lijo <lijo.la...@amd.com>
>> Sent: Wednesday, August 6, 2025 11:17 AM
>> To: Deucher, Alexander <alexander.deuc...@amd.com>; amd-
>> g...@lists.freedesktop.org
>> Cc: sta...@vger.kernel.org
>> Subject: Re: [PATCH] drm/amdgpu/discovery: fix fw based ip discovery
>>
>>
>>
>> On 7/30/2025 9:29 PM, Alex Deucher wrote:
>>> We only need the fw based discovery table for sysfs.  No need to parse
>>> it.  Additionally parsing some of the board specific tables may result
>>> in incorrect data on some boards.
>>> just load the binary and don't parse it on those boards.
>>>
>>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4441
>>> Fixes: 80a0e8282933 ("drm/amdgpu/discovery: optionally use fw based ip
>>> discovery")
>>> Cc: sta...@vger.kernel.org
>>> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
>>
>> One generic question - if discovery content is completely ignored by driver, 
>> how
>> external tool using sysfs could consume the data? Wouldn't there be a 
>> mismatch in
>> the config?
> 
> The IP register offsets are what tools use sysfs for.  It's possible some of 
> the other data is invalid (e.g., the harvest tables) because they are not 
> coming from IFWI in this case.
> 

Thanks for the info. Probably, keeping the same comment in the commit
description or code helps - discovery binary file is loaded to provide
sysfs interface to get valid IP register offsets and data from other
tables are not reliable.

Thanks,
Lijo

> Alex
> 
> 
>>
>> Thanks,
>> Lijo
>>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  5 +-
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 72
>>> ++++++++++---------
>>>  2 files changed, 41 insertions(+), 36 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index efe98ffb679a4..b2538cff222ce 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2570,9 +2570,6 @@ static int
>>> amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
>>>
>>>     adev->firmware.gpu_info_fw = NULL;
>>>
>>> -   if (adev->mman.discovery_bin)
>>> -           return 0;
>>> -
>>>     switch (adev->asic_type) {
>>>     default:
>>>             return 0;
>>> @@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct
>> amdgpu_device *adev)
>>>             chip_name = "arcturus";
>>>             break;
>>>     case CHIP_NAVI12:
>>> +           if (adev->mman.discovery_bin)
>>> +                   return 0;
>>>             chip_name = "navi12";
>>>             break;
>>>     }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>>> index 81b3443c8d7f4..27bd7659961e8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>>> @@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct
>>> amdgpu_device *adev)
>>>
>>>     switch (adev->asic_type) {
>>>     case CHIP_VEGA10:
>>> -   case CHIP_VEGA12:
>>> -   case CHIP_RAVEN:
>>> -   case CHIP_VEGA20:
>>> -   case CHIP_ARCTURUS:
>>> -   case CHIP_ALDEBARAN:
>>> -           /* this is not fatal.  We have a fallback below
>>> -            * if the new firmwares are not present. some of
>>> -            * this will be overridden below to keep things
>>> -            * consistent with the current behavior.
>>> +           /* This is not fatal.  We only need the discovery
>>> +            * binary for sysfs.  We don't need it for a
>>> +            * functional system.
>>>              */
>>> -           r = amdgpu_discovery_reg_base_init(adev);
>>> -           if (!r) {
>>> -                   amdgpu_discovery_harvest_ip(adev);
>>> -                   amdgpu_discovery_get_gfx_info(adev);
>>> -                   amdgpu_discovery_get_mall_info(adev);
>>> -                   amdgpu_discovery_get_vcn_info(adev);
>>> -           }
>>> -           break;
>>> -   default:
>>> -           r = amdgpu_discovery_reg_base_init(adev);
>>> -           if (r) {
>>> -                   drm_err(&adev->ddev, "discovery failed: %d\n", r);
>>> -                   return r;
>>> -           }
>>> -
>>> -           amdgpu_discovery_harvest_ip(adev);
>>> -           amdgpu_discovery_get_gfx_info(adev);
>>> -           amdgpu_discovery_get_mall_info(adev);
>>> -           amdgpu_discovery_get_vcn_info(adev);
>>> -           break;
>>> -   }
>>> -
>>> -   switch (adev->asic_type) {
>>> -   case CHIP_VEGA10:
>>> +           amdgpu_discovery_init(adev);
>>>             vega10_reg_base_init(adev);
>>>             adev->sdma.num_instances = 2;
>>>             adev->gmc.num_umc = 4;
>>> @@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct
>> amdgpu_device *adev)
>>>             adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
>>>             break;
>>>     case CHIP_VEGA12:
>>> +           /* This is not fatal.  We only need the discovery
>>> +            * binary for sysfs.  We don't need it for a
>>> +            * functional system.
>>> +            */
>>> +           amdgpu_discovery_init(adev);
>>>             vega10_reg_base_init(adev);
>>>             adev->sdma.num_instances = 2;
>>>             adev->gmc.num_umc = 4;
>>> @@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct
>> amdgpu_device *adev)
>>>             adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
>>>             break;
>>>     case CHIP_RAVEN:
>>> +           /* This is not fatal.  We only need the discovery
>>> +            * binary for sysfs.  We don't need it for a
>>> +            * functional system.
>>> +            */
>>> +           amdgpu_discovery_init(adev);
>>>             vega10_reg_base_init(adev);
>>>             adev->sdma.num_instances = 1;
>>>             adev->vcn.num_vcn_inst = 1;
>>> @@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct
>> amdgpu_device *adev)
>>>             }
>>>             break;
>>>     case CHIP_VEGA20:
>>> +           /* This is not fatal.  We only need the discovery
>>> +            * binary for sysfs.  We don't need it for a
>>> +            * functional system.
>>> +            */
>>> +           amdgpu_discovery_init(adev);
>>>             vega20_reg_base_init(adev);
>>>             adev->sdma.num_instances = 2;
>>>             adev->gmc.num_umc = 8;
>>> @@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct
>> amdgpu_device *adev)
>>>             adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
>>>             break;
>>>     case CHIP_ARCTURUS:
>>> +           /* This is not fatal.  We only need the discovery
>>> +            * binary for sysfs.  We don't need it for a
>>> +            * functional system.
>>> +            */
>>> +           amdgpu_discovery_init(adev);
>>>             arct_reg_base_init(adev);
>>>             adev->sdma.num_instances = 8;
>>>             adev->vcn.num_vcn_inst = 2;
>>> @@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct
>> amdgpu_device *adev)
>>>             adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
>>>             break;
>>>     case CHIP_ALDEBARAN:
>>> +           /* This is not fatal.  We only need the discovery
>>> +            * binary for sysfs.  We don't need it for a
>>> +            * functional system.
>>> +            */
>>> +           amdgpu_discovery_init(adev);
>>>             aldebaran_reg_base_init(adev);
>>>             adev->sdma.num_instances = 5;
>>>             adev->vcn.num_vcn_inst = 2;
>>> @@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct
>> amdgpu_device *adev)
>>>             adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
>>>             break;
>>>     default:
>>> +           r = amdgpu_discovery_reg_base_init(adev);
>>> +           if (r) {
>>> +                   drm_err(&adev->ddev, "discovery failed: %d\n", r);
>>> +                   return r;
>>> +           }
>>> +
>>> +           amdgpu_discovery_harvest_ip(adev);
>>> +           amdgpu_discovery_get_gfx_info(adev);
>>> +           amdgpu_discovery_get_mall_info(adev);
>>> +           amdgpu_discovery_get_vcn_info(adev);
>>>             break;
>>>     }
>>>
> 

Reply via email to