Re: [PATCH] drm/amdkfd: select CONFIG_CRC16

2024-05-28 Thread Lazar, Lijo



On 5/28/2024 5:20 PM, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> The amdkfd support fails to link when CONFIG_CRC16 is disabled:
> 
> aarch64-linux-ld: drivers/gpu/drm/amd/amdkfd/kfd_topology.o: in function 
> `kfd_topology_add_device':
> kfd_topology.c:(.text+0x3a4c): undefined reference to `crc16'
> 
> This is a library module that needs to be selected from every user.
> 
> Fixes: 3ed181b8ff43 ("drm/amdkfd: Ensure gpu_id is unique")
> Signed-off-by: Arnd Bergmann 

Thanks for the patch; this is already addressed with -
https://patchwork.freedesktop.org/patch/594816/

Thanks,
Lijo

> ---
>  drivers/gpu/drm/amd/amdkfd/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig 
> b/drivers/gpu/drm/amd/amdkfd/Kconfig
> index d3c3d3ab7225..f82595af34bf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Kconfig
> +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
> @@ -6,6 +6,7 @@
>  config HSA_AMD
>   bool "HSA kernel driver for AMD GPU devices"
>   depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
> + select CRC16
>   select HMM_MIRROR
>   select MMU_NOTIFIER
>   select DRM_AMDGPU_USERPTR


Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Lazar, Lijo



On 3/19/2024 7:27 PM, Khatri, Sunil wrote:
> 
> On 3/19/2024 7:19 PM, Lazar, Lijo wrote:
>>
>> On 3/19/2024 6:02 PM, Sunil Khatri wrote:
>>> Refactor the code so debugfs and devcoredump can reuse
>>> the common information and avoid unnecessary copy of it.
>>>
>>> created a new file which would be the right place to
>>> hold functions which will be used between sysfs, debugfs
>>> and devcoredump.
>>>
>>> Cc: Christian König 
>>> Cc: Alex Deucher 
>>> Signed-off-by: Sunil Khatri 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
>>>   4 files changed, 157 insertions(+), 115 deletions(-)
>>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 4536c8ad0e11..05d34f4b18f5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o
>>> amdgpu_kms.o \
>>>   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>>   amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>>>   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>>> -    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
>>> +    amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
>>> amdgpu_devinfo.o
>>>     amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 9c62552bec34..0267870aa9b1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1609,4 +1609,5 @@ extern const struct attribute_group
>>> amdgpu_vram_mgr_attr_group;
>>>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>>>   extern const struct attribute_group amdgpu_flash_attr_group;
>>>   +int amdgpu_device_info(struct amdgpu_device *adev, struct
>>> drm_amdgpu_info_device *dev_info);
>>>   #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>> new file mode 100644
>>> index ..d2c15a1dcb0d
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
>>> @@ -0,0 +1,151 @@
>>> +// SPDX-License-Identifier: MIT
>>> +/*
>>> + * Copyright 2024 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> "Software"),
>>> + * to deal in the Software without restriction, including without
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom
>>> the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#include "amdgpu.h"
>>> +#include "amd_pcie.h"
>>> +
>>> +#include 
>>> +
>>> +int amdgpu_device_info(struct amdgpu_device *adev, struct
>>> drm_amdgpu_info_device *dev_info)
>>> +{
>>> +    int ret;
>>> +    uint64_t vm_size;
>&

Re: [PATCH] drm/amdgpu: refactor code to reuse system information

2024-03-19 Thread Lazar, Lijo



On 3/19/2024 6:02 PM, Sunil Khatri wrote:
> Refactor the code so debugfs and devcoredump can reuse
> the common information and avoid unnecessary copy of it.
> 
> created a new file which would be the right place to
> hold functions which will be used between sysfs, debugfs
> and devcoredump.
> 
> Cc: Christian König 
> Cc: Alex Deucher 
> Signed-off-by: Sunil Khatri 
> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c | 151 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 118 +--
>  4 files changed, 157 insertions(+), 115 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 4536c8ad0e11..05d34f4b18f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o 
> amdgpu_kms.o \
>   amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>   amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o 
> amdgpu_devinfo.o
>  
>  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9c62552bec34..0267870aa9b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1609,4 +1609,5 @@ extern const struct attribute_group 
> amdgpu_vram_mgr_attr_group;
>  extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>  extern const struct attribute_group amdgpu_flash_attr_group;
>  
> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> drm_amdgpu_info_device *dev_info);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> new file mode 100644
> index ..d2c15a1dcb0d
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_devinfo.c
> @@ -0,0 +1,151 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2024 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "amd_pcie.h"
> +
> +#include 
> +
> +int amdgpu_device_info(struct amdgpu_device *adev, struct 
> drm_amdgpu_info_device *dev_info)
> +{
> + int ret;
> + uint64_t vm_size;
> + uint32_t pcie_gen_mask;
> +
> + if (dev_info == NULL)
> + return -EINVAL;
> +
> + dev_info->device_id = adev->pdev->device;
> + dev_info->chip_rev = adev->rev_id;
> + dev_info->external_rev = adev->external_rev_id;
> + dev_info->pci_rev = adev->pdev->revision;
> + dev_info->family = adev->family;
> + dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
> + dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
> + /* return all clocks in KHz */
> + dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
> + if (adev->pm.dpm_enabled) {
> + dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 
> 10;
> + dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 
> 10;
> + dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 
> 10;
> + dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 
> 10;
> + } else {
> + dev_info->max_engine_clock =
> + dev_info->min_engine_clock =
> + adev->clock.default_sclk * 10;
> + dev_info->max_memory_clock =
> + dev_info->min_memo

Re: [PATCH v3 7/7] PCI: Exclude PCIe ports used for virtual links in pcie_bandwidth_available()

2023-11-15 Thread Lazar, Lijo




On 11/16/2023 2:39 AM, Mario Limonciello wrote:

On 11/15/2023 11:04, Mario Limonciello wrote:

On 11/14/2023 21:23, Lazar, Lijo wrote:



On 11/15/2023 1:37 AM, Mario Limonciello wrote:

The USB4 spec specifies that PCIe ports that are used for tunneling
PCIe traffic over USB4 fabric will be hardcoded to advertise 2.5GT/s 
and

behave as a PCIe Gen1 device. The actual performance of these ports is
controlled by the fabric implementation.

Callers for pcie_bandwidth_available() will always find the PCIe ports
used for tunneling as a limiting factor potentially leading to 
incorrect

performance decisions.

To prevent such problems check explicitly for ports that are marked as
virtual links or as thunderbolt controllers and skip them when looking
for bandwidth limitations of the hierarchy. If the only device 
connected

is a port used for tunneling then report that device.

Callers to pcie_bandwidth_available() could make this change on their
own as well but then they wouldn't be able to detect other potential
speed bottlenecks from the hierarchy without duplicating
pcie_bandwidth_available() logic.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925#note_2145860
Link: https://www.usb.org/document-library/usb4r-specification-v20
   USB4 V2 with Errata and ECN through June 2023
   Section 11.2.1
Signed-off-by: Mario Limonciello 
---
v2->v3:
  * Split from previous patch version
  * Look for thunderbolt or virtual link
---
  drivers/pci/pci.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0ff7883cc774..b1fb2258b211 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6269,11 +6269,20 @@ static u32 pcie_calc_bw_limits(struct 
pci_dev *dev, u32 bw,
   * limiting_dev, speed, and width pointers are supplied) 
information about
   * that point.  The bandwidth returned is in Mb/s, i.e., 
megabits/second of

   * raw bandwidth.
+ *
+ * This excludes the bandwidth calculation that has been returned 
from a
+ * PCIe device that is used for transmitting tunneled PCIe traffic 
over a virtual
+ * link part of larger hierarchy. Examples include Thunderbolt3 and 
USB4 links.
+ * The calculation is excluded because the USB4 specification 
specifies that the
+ * max speed returned from PCIe configuration registers for the 
tunneling link is
+ * always PCI 1x 2.5 GT/s.  When only tunneled devices are present, 
the bandwidth

+ * returned is the bandwidth available from the first tunneled device.
   */
  u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev 
**limiting_dev,

   enum pci_bus_speed *speed,
   enum pcie_link_width *width)
  {
+    struct pci_dev *vdev = NULL;
  u32 bw = 0;
  if (speed)
@@ -6282,10 +6291,20 @@ u32 pcie_bandwidth_available(struct pci_dev 
*dev, struct pci_dev **limiting_dev,

  *width = PCIE_LNK_WIDTH_UNKNOWN;
  while (dev) {
+    if (dev->is_virtual_link || dev->is_thunderbolt) {
+    if (!vdev)
+    vdev = dev;
+    goto skip;
+    }


One problem with this is it *silently* ignores the bandwidth limiting 
device - the bandwidth may not be really available if there are 
virtual links in between. That is a change in behavior from the 
messages shown in __pcie_print_link_status.


That's a good point.  How about a matching behavioral change to 
__pcie_print_link_status() where it looks at the entire hierarchy for 
any links marked as virtual and prints a message along the lines of:


"This value may be further limited by virtual links".


I'll wait for some more feedback on the series before posting another 
version, but I did put this together and this is a sample from dmesg of 
the wording I'm planning on using for the next version:


31.504 Gb/s available PCIe bandwidth, this may be further limited by 
conditions of virtual link :00:03.1




This will cover the the message, but for any real user of the API this 
is not good enough as the speed returned doesn't really indicate the 
bandwidth available. Or, modify the description such that users know 
that the value cannot be trusted when there is virtual link in between 
(probably the API should indicate that through some param/return code) 
and act accordingly.


Thanks,
Lijo





Thanks,
Lijo

  bw = pcie_calc_bw_limits(dev, bw, limiting_dev, speed, 
width);

+skip:
  dev = pci_upstream_bridge(dev);
  }
+    /* If nothing "faster" found on hierarchy, limit to first 
virtual link */

+    if (vdev && !bw)
+    bw = pcie_calc_bw_limits(vdev, bw, limiting_dev, speed, 
width);

+
  return bw;
  }
  EXPORT_SYMBOL(pcie_bandwidth_available);






Re: [PATCH v3 7/7] PCI: Exclude PCIe ports used for virtual links in pcie_bandwidth_available()

2023-11-14 Thread Lazar, Lijo




On 11/15/2023 1:37 AM, Mario Limonciello wrote:

The USB4 spec specifies that PCIe ports that are used for tunneling
PCIe traffic over USB4 fabric will be hardcoded to advertise 2.5GT/s and
behave as a PCIe Gen1 device. The actual performance of these ports is
controlled by the fabric implementation.

Callers for pcie_bandwidth_available() will always find the PCIe ports
used for tunneling as a limiting factor potentially leading to incorrect
performance decisions.

To prevent such problems check explicitly for ports that are marked as
virtual links or as thunderbolt controllers and skip them when looking
for bandwidth limitations of the hierarchy. If the only device connected
is a port used for tunneling then report that device.

Callers to pcie_bandwidth_available() could make this change on their
own as well but then they wouldn't be able to detect other potential
speed bottlenecks from the hierarchy without duplicating
pcie_bandwidth_available() logic.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925#note_2145860
Link: https://www.usb.org/document-library/usb4r-specification-v20
   USB4 V2 with Errata and ECN through June 2023
   Section 11.2.1
Signed-off-by: Mario Limonciello 
---
v2->v3:
  * Split from previous patch version
  * Look for thunderbolt or virtual link
---
  drivers/pci/pci.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0ff7883cc774..b1fb2258b211 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6269,11 +6269,20 @@ static u32 pcie_calc_bw_limits(struct pci_dev *dev, u32 
bw,
   * limiting_dev, speed, and width pointers are supplied) information about
   * that point.  The bandwidth returned is in Mb/s, i.e., megabits/second of
   * raw bandwidth.
+ *
+ * This excludes the bandwidth calculation that has been returned from a
+ * PCIe device that is used for transmitting tunneled PCIe traffic over a 
virtual
+ * link part of larger hierarchy. Examples include Thunderbolt3 and USB4 links.
+ * The calculation is excluded because the USB4 specification specifies that 
the
+ * max speed returned from PCIe configuration registers for the tunneling link 
is
+ * always PCI 1x 2.5 GT/s.  When only tunneled devices are present, the 
bandwidth
+ * returned is the bandwidth available from the first tunneled device.
   */
  u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev 
**limiting_dev,
 enum pci_bus_speed *speed,
 enum pcie_link_width *width)
  {
+   struct pci_dev *vdev = NULL;
u32 bw = 0;
  
  	if (speed)

@@ -6282,10 +6291,20 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, 
struct pci_dev **limiting_dev,
*width = PCIE_LNK_WIDTH_UNKNOWN;
  
  	while (dev) {

+   if (dev->is_virtual_link || dev->is_thunderbolt) {
+   if (!vdev)
+   vdev = dev;
+   goto skip;
+   }


One problem with this is it *silently* ignores the bandwidth limiting 
device - the bandwidth may not be really available if there are virtual 
links in between. That is a change in behavior from the messages shown 
in __pcie_print_link_status.


Thanks,
Lijo


bw = pcie_calc_bw_limits(dev, bw, limiting_dev, speed, width);
+skip:
dev = pci_upstream_bridge(dev);
}
  
+	/* If nothing "faster" found on hierarchy, limit to first virtual link */

+   if (vdev && !bw)
+   bw = pcie_calc_bw_limits(vdev, bw, limiting_dev, speed, width);
+
return bw;
  }
  EXPORT_SYMBOL(pcie_bandwidth_available);


Re: [PATCH] drm/amd/pm: make power values signed

2023-11-10 Thread Lazar, Lijo




On 11/9/2023 2:11 PM, José Pekkarinen wrote:

The following patch will convert the power values returned by
amdgpu_hwmon_get_power to signed, fixing the following warnings reported
by coccinelle:

drivers/gpu/drm/amd/pm/amdgpu_pm.c:2801:5-8: WARNING: Unsigned expression compared 
with zero: val < 0
drivers/gpu/drm/amd/pm/amdgpu_pm.c:2814:5-8: WARNING: Unsigned expression compared 
with zero: val < 0

Signed-off-by: José Pekkarinen 
---
  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index e7bb1d324084..913ff62d5d5e 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2795,7 +2795,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device 
*dev,
   struct device_attribute *attr,
   char *buf)
  {
-   unsigned int val;
+   int val;


Hi Alex,

It's a different code in drm-next.

https://gitlab.freedesktop.org/agd5f/linux/-/blob/amd-staging-drm-next/drivers/gpu/drm/amd/pm/amdgpu_pm.c#L2936

Thanks,
Lijo

  
  	val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_AVG_POWER);
  
@@ -2806,7 +2806,7 @@ static ssize_t amdgpu_hwmon_show_power_input(struct device *dev,

 struct device_attribute *attr,
 char *buf)
  {
-   unsigned int val;
+   int val;
  
  	val = amdgpu_hwmon_get_power(dev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER);
  


Re: [PATCH v3] drm: amd: Resolve Sphinx unexpected indentation warning

2023-11-07 Thread Lazar, Lijo




On 11/7/2023 9:58 PM, Hunter Chasens wrote:

Resolves Sphinx unexpected indentation warning when compiling
documentation (e.g. `make htmldocs`). Replaces tabs with spaces and adds
a literal block to keep vertical formatting of the
example power state list.

Signed-off-by: Hunter Chasens 


Reviewed-by: Lijo Lazar 

Thanks,
Lijo


---
  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 13 +++--
  1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 517b9fb4624c..576202bf64f3 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -989,12 +989,13 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
   * Reading back the files will show you the available power levels within
   * the power state and the clock information for those levels. If deep sleep 
is
   * applied to a clock, the level will be denoted by a special level 'S:'
- * E.g.,
- * S: 19Mhz *
- * 0: 615Mhz
- * 1: 800Mhz
- * 2: 888Mhz
- * 3: 1000Mhz
+ * E.g., ::
+ *
+ *  S: 19Mhz *
+ *  0: 615Mhz
+ *  1: 800Mhz
+ *  2: 888Mhz
+ *  3: 1000Mhz
   *
   *
   * To manually adjust these states, first select manual using


Re: [RFC v4 0/5] Proposal to use netlink for RAS and Telemetry across drm subsystem

2023-11-06 Thread Lazar, Lijo




On 11/1/2023 1:36 PM, Aravind Iddamsetty wrote:


On 30/10/23 20:41, Lazar, Lijo wrote:



On 10/30/2023 11:49 AM, Aravind Iddamsetty wrote:


On 26/10/23 15:34, Lazar, Lijo wrote:

Hi Lijo,

Thank you for your comments.




On 10/23/2023 8:59 PM, Alex Deucher wrote:

On Fri, Oct 20, 2023 at 7:42 PM Aravind Iddamsetty
 wrote:


Our hardware supports RAS(Reliability, Availability, Serviceability) by
reporting the errors to the host, which the KMD processes and exposes a
set of error counters which can be used by observability tools to take
corrective actions or repairs. Traditionally there were being exposed
via PMU (for relative counters) and sysfs interface (for absolute
value) in our internal branch. But, due to the limitations in this
approach to use two interfaces and also not able to have an event based
reporting or configurability, an alternative approach to try netlink
was suggested by community for drm subsystem wide UAPI for RAS and
telemetry as discussed in [1].

This [1] is the inspiration to this series. It uses the generic
netlink(genl) family subsystem and exposes a set of commands that can
be used by every drm driver, the framework provides a means to have
custom commands too. Each drm driver instance in this example xe driver
instance registers a family and operations to the genl subsystem through
which it enumerates and reports the error counters. An event based
notification is also supported to which userpace can subscribe to and
be notified when any error occurs and read the error counter this avoids
continuous polling on error counter. This can also be extended to
threshold based notification.


The commands used seems very limited. In AMD SOCs, IP blocks, instances of IP 
blocks, block types which support RAS will change across generations.

This series has a single command to query the counters supported. Within that 
it seems to assign unique ids for every combination of error type, IP block 
type and then another for each instance. Not sure how good this kind of 
approach is for an end user. The Ids won't necessarily the stay the same across 
multiple generations. Users will generally be interested in specific IP blocks.


Exactly the IDs are UAPI and won't change once defined for a platform and any 
new SKU or platform will add on top of existing ones. Userspace can include the 
header and use the defines. The query is used to know what all errors exists on 
a platform and userspace can process the IDs of IP block of interest. I believe 
even if we list block wise a query will be needed without which userspace 
wouldn't know which blocks exist on a platform.



What I meant is - assigning an id for every combination of IP block/ instance 
number/error type is not maintainable across different SOCs.

Instead, can we have  something like -
 Query -> returns IP block ids, number of instances, error types supported 
by each IP block.
 Read Error -> IP block id | Instance number /Instance ALL | Error type 
id/Error type ALL.


Hi Lijo,

Would you please elaborate more on what is the issue you fore see with the 
maintainability. But I have a query on the model suggested

This might work well with user input based tools, but don't think it suits if 
we want to periodically read a particular counter.

The inspiration to have ID for each is taken from PMU subsystem where every 
event has an ID and a flat list so no multiple queries and we can read them 
individually or group together
which can be achieved via READ_MULTI command I proposed earlier.



The problem is mainly with maintaining a static list including all ip_id 
| instance | err_type combinations.  Instead, preference is for client 
to query the capabilities -> instance/error types supported and then use 
that info later to fetch error info.


Capability query could return something like ip block, total instance 
available and error types supported. This doesn't require to maintain an 
ID list for each combination.


The instances per SOC could be variable. For ex: it's not required that 
all SKUs of your SOC type to have have ss0-ss3 HBMs. For the same SOC 
type or for new SOC type, it could be more or less.


Roughly something like ..

enum ip_block_id
{
block1,
block2,
block3,

block_all
}

enum ip_sub_block_id (if required)
{
sub_block1,
sub_block2,

sub_block_all
}

#define INSTANCE_ALL  -1

enum ras_error_type
{
correctable,
uncorrectable,
deferred,
fatal,
...
err_all
}

Then define something like below while querying error details.

<31:24> = Block Id
<23:16> subblock id
<15:8> - interested instance
<7:0> - error_type

Instance number could be 'inst_all' or specific IP instance.

Thanks,
Lijo


Thanks,
Aravind.


Thanks,
Lijo



For ex: to get HBM errors, it looks like the curre

Re: [PATCH v2] drm: amd: Resolve Sphinx unexpected indentation warning

2023-11-06 Thread Lazar, Lijo




On 11/7/2023 1:47 AM, Hunter Chasens wrote:

Resolves Sphinx unexpected indentation warning when compiling
documentation (e.g. `make htmldocs`). Replaces tabs with spaces and adds
a literal block to keep vertical formatting of the
example power state list.

Signed-off-by: Hunter Chasens 
---
  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 13 +++--
  1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 517b9fb4624c..81b8ceb26890 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -989,12 +989,13 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
   * Reading back the files will show you the available power levels within
   * the power state and the clock information for those levels. If deep sleep 
is
   * applied to a clock, the level will be denoted by a special level 'S:'
- * E.g.,
- * S: 19Mhz *
- * 0: 615Mhz
- * 1: 800Mhz
- * 2: 888Mhz
- * 3: 1000Mhz
+ * E.g.::


This will be like E.g.: Could you keep it like E.g.,:: so that :: 
is taken out?


Thanks,
Lijo


+ *
+ *  S: 19Mhz *
+ *  0: 615Mhz
+ *  1: 800Mhz
+ *  2: 888Mhz
+ *  3: 1000Mhz
   *
   *
   * To manually adjust these states, first select manual using


Re: [PATCH v1] drm: amd: Resolve Sphinx unexpected indentation warning

2023-11-06 Thread Lazar, Lijo




On 11/6/2023 2:30 AM, Hunter Chasens wrote:

Resolves Sphinx unexpected indentation warning when compiling
documentation (e.g. `make htmldocs`). Replaces tabs with spaces and adds
a literal block to keep vertical formatting of the
example power state list.

Signed-off-by: Hunter Chasens 


Thanks!
Reviewed-by: Lijo Lazar 

---
  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 13 -
  1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 517b9fb4624c..703fe2542258 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -990,11 +990,14 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
   * the power state and the clock information for those levels. If deep sleep 
is
   * applied to a clock, the level will be denoted by a special level 'S:'
   * E.g.,
- * S: 19Mhz *
- * 0: 615Mhz
- * 1: 800Mhz
- * 2: 888Mhz
- * 3: 1000Mhz
+ *
+ * ::
+ *
+ *  S: 19Mhz *
+ *  0: 615Mhz
+ *  1: 800Mhz
+ *  2: 888Mhz
+ *  3: 1000Mhz
   *
   *
   * To manually adjust these states, first select manual using


Re: [PATCH v2 8/9] PCI: Exclude PCIe ports used for tunneling in pcie_bandwidth_available()

2023-11-04 Thread Lazar, Lijo




On 11/4/2023 12:37 AM, Mario Limonciello wrote:

The USB4 spec specifies that PCIe ports that are used for tunneling
PCIe traffic over USB4 fabric will be hardcoded to advertise 2.5GT/s and
behave as a PCIe Gen1 device. The actual performance of these ports is
controlled by the fabric implementation.


The code below ties a generic term 'tunneling' to USB4 spec. I think it 
should be something like if (is_USB4 && is_tunneled), exclude from 
bandwidth calculations - it should specifically identify usb4 based 
tunneling rather than applying to all 'tunneled' cases.


Thanks,
Lijo



Downstream drivers such as amdgpu which utilize pcie_bandwidth_available()
to program the device will always find the PCIe ports used for
tunneling as a limiting factor potentially leading to incorrect
performance decisions.

To prevent problems in downstream drivers check explicitly for ports
being used for PCIe tunneling and skip them when looking for bandwidth
limitations of the hierarchy. If the only device connected is a root port
used for tunneling then report that device.

Downstream drivers could make this change on their own but then they
wouldn't be able to detect other potential speed bottlenecks from the
hierarchy without duplicating pcie_bandwidth_available() logic.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925#note_2145860
Link: https://www.usb.org/document-library/usb4r-specification-v20
   USB4 V2 with Errata and ECN through June 2023
   Section 11.2.1
Signed-off-by: Mario Limonciello 
---
  drivers/pci/pci.c | 74 +++
  1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d9aa5a39f585..15e37164ce56 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6223,6 +6223,35 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
  }
  EXPORT_SYMBOL(pcie_set_mps);
  
+static u32 pcie_calc_bw_limits(struct pci_dev *dev, u32 bw,

+  struct pci_dev **limiting_dev,
+  enum pci_bus_speed *speed,
+  enum pcie_link_width *width)
+{
+   enum pcie_link_width next_width;
+   enum pci_bus_speed next_speed;
+   u32 next_bw;
+   u16 lnksta;
+
+   pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
+   next_speed = pcie_link_speed[lnksta & PCI_EXP_LNKSTA_CLS];
+   next_width = (lnksta & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT;
+   next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed);
+
+   /* Check if current device limits the total bandwidth */
+   if (!bw || next_bw <= bw) {
+   bw = next_bw;
+   if (limiting_dev)
+   *limiting_dev = dev;
+   if (speed)
+   *speed = next_speed;
+   if (width)
+   *width = next_width;
+   }
+
+   return bw;
+}
+
  /**
   * pcie_bandwidth_available - determine minimum link settings of a PCIe
   *  device and its bandwidth limitation
@@ -6236,47 +6265,42 @@ EXPORT_SYMBOL(pcie_set_mps);
   * limiting_dev, speed, and width pointers are supplied) information about
   * that point.  The bandwidth returned is in Mb/s, i.e., megabits/second of
   * raw bandwidth.
+ *
+ * This excludes the bandwidth calculation that has been returned from a
+ * PCIe device used for transmitting tunneled PCIe traffic over a Thunderbolt
+ * or USB4 link that is part of larger hierarchy. The calculation is excluded
+ * because the USB4 specification specifies that the max speed returned from
+ * PCIe configuration registers for the tunneling link is always PCI 1x 2.5 
GT/s.
+ * When only tunneled devices are present, the bandwidth returned is the
+ * bandwidth available from the first tunneled device.
   */
  u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev 
**limiting_dev,
 enum pci_bus_speed *speed,
 enum pcie_link_width *width)
  {
-   u16 lnksta;
-   enum pci_bus_speed next_speed;
-   enum pcie_link_width next_width;
-   u32 bw, next_bw;
+   struct pci_dev *tdev = NULL;
+   u32 bw = 0;
  
  	if (speed)

*speed = PCI_SPEED_UNKNOWN;
if (width)
*width = PCIE_LNK_WIDTH_UNKNOWN;
  
-	bw = 0;

-
while (dev) {
-   pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta);
-
-   next_speed = pcie_link_speed[lnksta & PCI_EXP_LNKSTA_CLS];
-   next_width = (lnksta & PCI_EXP_LNKSTA_NLW) >>
-   PCI_EXP_LNKSTA_NLW_SHIFT;
-
-   next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed);
-
-   /* Check if current device limits the total bandwidth */
-   if (!bw || next_bw <= bw) {
-   bw = next_bw;
-
-   if (limiting_dev)
-   *limiting_dev = dev;
-

Re: [RFC v4 0/5] Proposal to use netlink for RAS and Telemetry across drm subsystem

2023-10-30 Thread Lazar, Lijo




On 10/30/2023 11:49 AM, Aravind Iddamsetty wrote:


On 26/10/23 15:34, Lazar, Lijo wrote:

Hi Lijo,

Thank you for your comments.




On 10/23/2023 8:59 PM, Alex Deucher wrote:

On Fri, Oct 20, 2023 at 7:42 PM Aravind Iddamsetty
 wrote:


Our hardware supports RAS(Reliability, Availability, Serviceability) by
reporting the errors to the host, which the KMD processes and exposes a
set of error counters which can be used by observability tools to take
corrective actions or repairs. Traditionally there were being exposed
via PMU (for relative counters) and sysfs interface (for absolute
value) in our internal branch. But, due to the limitations in this
approach to use two interfaces and also not able to have an event based
reporting or configurability, an alternative approach to try netlink
was suggested by community for drm subsystem wide UAPI for RAS and
telemetry as discussed in [1].

This [1] is the inspiration to this series. It uses the generic
netlink(genl) family subsystem and exposes a set of commands that can
be used by every drm driver, the framework provides a means to have
custom commands too. Each drm driver instance in this example xe driver
instance registers a family and operations to the genl subsystem through
which it enumerates and reports the error counters. An event based
notification is also supported to which userpace can subscribe to and
be notified when any error occurs and read the error counter this avoids
continuous polling on error counter. This can also be extended to
threshold based notification.


The commands used seems very limited. In AMD SOCs, IP blocks, instances of IP 
blocks, block types which support RAS will change across generations.

This series has a single command to query the counters supported. Within that 
it seems to assign unique ids for every combination of error type, IP block 
type and then another for each instance. Not sure how good this kind of 
approach is for an end user. The Ids won't necessarily the stay the same across 
multiple generations. Users will generally be interested in specific IP blocks.


Exactly the IDs are UAPI and won't change once defined for a platform and any 
new SKU or platform will add on top of existing ones. Userspace can include the 
header and use the defines. The query is used to know what all errors exists on 
a platform and userspace can process the IDs of IP block of interest. I believe 
even if we list block wise a query will be needed without which userspace 
wouldn't know which blocks exist on a platform.



What I meant is - assigning an id for every combination of IP block/ 
instance number/error type is not maintainable across different SOCs.


Instead, can we have  something like -
	Query -> returns IP block ids, number of instances, error types 
supported by each IP block.
	Read Error -> IP block id | Instance number /Instance ALL | Error type 
id/Error type ALL.


Thanks,
Lijo



For ex: to get HBM errors, it looks like the current patch series supports 
READALL which dumps the whole set of errors. Or, users have to figure out the 
ids of HBM stack instance (whose capacity can change depending on the SOC and 
within a single family multiple configurations can exist) errors and do 
multiple READ_ONE calls. Both don't look good.

It would be better if the command argument format can be well defined so that 
it can be queried based on IP block type, instance, and error types supported 
(CE/UE/fatal/parity/deferred etc.).


so to mitigate multiple read limitation, we can introduce a new GENL command 
like READ_MULTI which accepts a list of errors ids which userspace can pass and 
get all interested error counter as response at once. Also, listing individual 
errors helps if userspace wants to read a particular error at regular 
intervals. The intention is also to keep KMD logic simple, userspace can build 
required model on top of flat enumeration.

Please let me know if this sounds reasonable to you.

Thanks,
Aravind.


Thanks,
Lijo



@Hawking Zhang, @Lazar, Lijo

Can you take a look at this series and API and see if it would align
with our RAS requirements going forward?

Alex




[1]: https://airlied.blogspot.com/2022/09/accelerators-bof-outcomes-summary.html

this series is on top of https://patchwork.freedesktop.org/series/125373/,

v4:
1. Rebase
2. rename drm_genl_send to drm_genl_reply
3. catch error from xa_store and handle appropriately
4. presently xe_list_errors fills blank data for IGFX, prevent it by
having an early check of IS_DGFX (Michael J. Ruhl)

v3:
1. Rebase on latest RAS series for XE
2. drop DRIVER_NETLINK flag and use the driver_genl_ops structure to
register to netlink subsystem

v2: define common interfaces to genl netlink subsystem that all drm drivers
can leverage.

Below is an example tool drm_ras which demonstrates the use of the
supported commands. The tool will be sent to ML with the subject
"[RFC i-g-t v2 0/1] A tool to demonstrate use of ne

Re: [RFC v4 0/5] Proposal to use netlink for RAS and Telemetry across drm subsystem

2023-10-26 Thread Lazar, Lijo




On 10/23/2023 8:59 PM, Alex Deucher wrote:

On Fri, Oct 20, 2023 at 7:42 PM Aravind Iddamsetty
 wrote:


Our hardware supports RAS(Reliability, Availability, Serviceability) by
reporting the errors to the host, which the KMD processes and exposes a
set of error counters which can be used by observability tools to take
corrective actions or repairs. Traditionally there were being exposed
via PMU (for relative counters) and sysfs interface (for absolute
value) in our internal branch. But, due to the limitations in this
approach to use two interfaces and also not able to have an event based
reporting or configurability, an alternative approach to try netlink
was suggested by community for drm subsystem wide UAPI for RAS and
telemetry as discussed in [1].

This [1] is the inspiration to this series. It uses the generic
netlink(genl) family subsystem and exposes a set of commands that can
be used by every drm driver, the framework provides a means to have
custom commands too. Each drm driver instance in this example xe driver
instance registers a family and operations to the genl subsystem through
which it enumerates and reports the error counters. An event based
notification is also supported to which userpace can subscribe to and
be notified when any error occurs and read the error counter this avoids
continuous polling on error counter. This can also be extended to
threshold based notification.


The commands used seems very limited. In AMD SOCs, IP blocks, instances 
of IP blocks, block types which support RAS will change across generations.


This series has a single command to query the counters supported. Within 
that it seems to assign unique ids for every combination of error type, 
IP block type and then another for each instance. Not sure how good this 
kind of approach is for an end user. The Ids won't necessarily the stay 
the same across multiple generations. Users will generally be interested 
in specific IP blocks.


For ex: to get HBM errors, it looks like the current patch series 
supports READALL which dumps the whole set of errors. Or, users have to 
figure out the ids of HBM stack instance (whose capacity can change 
depending on the SOC and within a single family multiple configurations 
can exist) errors and do multiple READ_ONE calls. Both don't look good.


It would be better if the command argument format can be well defined so 
that it can be queried based on IP block type, instance, and error types 
supported (CE/UE/fatal/parity/deferred etc.).


Thanks,
Lijo



@Hawking Zhang, @Lazar, Lijo

Can you take a look at this series and API and see if it would align
with our RAS requirements going forward?

Alex




[1]: https://airlied.blogspot.com/2022/09/accelerators-bof-outcomes-summary.html

this series is on top of https://patchwork.freedesktop.org/series/125373/,

v4:
1. Rebase
2. rename drm_genl_send to drm_genl_reply
3. catch error from xa_store and handle appropriately
4. presently xe_list_errors fills blank data for IGFX, prevent it by
having an early check of IS_DGFX (Michael J. Ruhl)

v3:
1. Rebase on latest RAS series for XE
2. drop DRIVER_NETLINK flag and use the driver_genl_ops structure to
register to netlink subsystem

v2: define common interfaces to genl netlink subsystem that all drm drivers
can leverage.

Below is an example tool drm_ras which demonstrates the use of the
supported commands. The tool will be sent to ML with the subject
"[RFC i-g-t v2 0/1] A tool to demonstrate use of netlink sockets to read RAS error 
counters"
https://patchwork.freedesktop.org/series/118437/#rev2

read single error counter:

$ ./drm_ras READ_ONE --device=drm:/dev/dri/card1 --error_id=0x0005
counter value 0

read all error counters:

$ ./drm_ras READ_ALL --device=drm:/dev/dri/card1
nameconfig-id   
counter

error-gt0-correctable-guc   0x0001  0
error-gt0-correctable-slm   0x0003  0
error-gt0-correctable-eu-ic 0x0004  0
error-gt0-correctable-eu-grf0x0005  0
error-gt0-fatal-guc 0x0009  0
error-gt0-fatal-slm 0x000d  0
error-gt0-fatal-eu-grf  0x000f  0
error-gt0-fatal-fpu 0x0010  0
error-gt0-fatal-tlb 0x0011  0
error-gt0-fatal-l3-fabric   0x0012  0
error-gt0-correctable-subslice  0x0013  0
error-gt0-correctable-l3bank0x0014  0
error-gt0-fatal-subslice0x0015  0
er

Re: [PATCH v3 0/7] GPU workload hints for better performance

2023-08-28 Thread Lazar, Lijo
[AMD Official Use Only - General]

As mentioned with an older version of this series, this is an 'abuse' of power 
profile interface.

This series is oversimplifying what PMFW algorithms are supposed to be doing. 
Whatever this series is doing, FW can do it better.

To explain in simpler terms - it just tries to boost a profile based on ring 
type without even knowing how much of activity a job can trigger on a 
particular ring. A job scheduled to a GFX ring doesn't deserve a profile boost 
unless it can create a certain level of activity. In CPU terms, a job scheduled 
to a processor doesn't mean it deserves a frequency boost of that CPU.  At 
minimum it depends on more details like whether that job is compute bound or 
memory bound or memory bound.

While FW algorithms are designed to do that, this series tries to trivialise 
all such things.

Unless you are able to show the tangible benefits in some terms like 
performance, power, or performance per watt,  I don't think this should be the 
default behaviour where driver tries to override FW just based on job 
submissions to rings.

Thanks,
Lijo

From: amd-gfx  on behalf of Arvind Yadav 

Sent: Monday, August 28, 2023 5:56:07 PM
To: Koenig, Christian ; Deucher, Alexander 
; Sharma, Shashank ; Pan, 
Xinhui ; airl...@gmail.com ; 
dan...@ffwll.ch ; Kuehling, Felix ; 
amd-...@lists.freedesktop.org 
Cc: Yadav, Arvind ; linux-ker...@vger.kernel.org 
; dri-devel@lists.freedesktop.org 

Subject: [PATCH v3 0/7] GPU workload hints for better performance

AMDGPU SOCs supports dynamic workload based power profiles, which can
provide fine-tuned performance for a particular type of workload.
This patch series adds an interface to set/reset these power profiles
based on the submitted job. The driver can dynamically switch
the power profiles based on submitted job. This can optimize the power
performance when the particular workload is on.

v2:
- Splitting workload_profile_set and workload_profile_put
  into two separate patches.
- Addressed review comment.
- Added new suspend function.
- Added patch to switches the GPU workload mode for KFD.

v3:
- Addressed all review comment.
- Changed the function name from *_set() to *_get().
- Now clearing all the profile in work handler.
- Added *_clear_all function to clear all the power profile.


Arvind Yadav (7):
  drm/amdgpu: Added init/fini functions for workload
  drm/amdgpu: Add new function to set GPU power profile
  drm/amdgpu: Add new function to put GPU power profile
  drm/amdgpu: Add suspend function to clear the GPU power profile.
  drm/amdgpu: Set/Reset GPU workload profile
  drm/amdgpu: switch workload context to/from compute
  Revert "drm/amd/amdgpu: switch on/off vcn power profile mode"

 drivers/gpu/drm/amd/amdgpu/Makefile   |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |   5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 226 ++
 drivers/gpu/drm/amd/include/amdgpu_workload.h |  61 +
 8 files changed, 309 insertions(+), 16 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
 create mode 100644 drivers/gpu/drm/amd/include/amdgpu_workload.h

--
2.34.1



Re: [V10 5/8] drm/amd/pm: setup the framework to support Wifi RFI mitigation feature

2023-08-27 Thread Lazar, Lijo
[AMD Official Use Only - General]

> 'j' was initially set as 'num_of_wbrf_ranges - 1'. So, I suppose 
> 'num_of_wbrf_ranges' should be set as 'j' instead of 'j - 1'. Right?

Yes.

Thanks,
Lijo

From: Quan, Evan 
Sent: Monday, August 28, 2023 7:23:55 AM
To: Lazar, Lijo ; l...@kernel.org ; 
johan...@sipsolutions.net ; da...@davemloft.net 
; eduma...@google.com ; 
k...@kernel.org ; pab...@redhat.com ; 
Deucher, Alexander ; raf...@kernel.org 
; Limonciello, Mario 
Cc: linux-ker...@vger.kernel.org ; 
linux-a...@vger.kernel.org ; 
amd-...@lists.freedesktop.org ; 
dri-devel@lists.freedesktop.org ; 
linux-wirel...@vger.kernel.org ; 
net...@vger.kernel.org 
Subject: RE: [V10 5/8] drm/amd/pm: setup the framework to support Wifi RFI 
mitigation feature

[AMD Official Use Only - General]

> -Original Message-
> From: Lazar, Lijo 
> Sent: Friday, August 25, 2023 10:09 PM
> To: Quan, Evan ; l...@kernel.org;
> johan...@sipsolutions.net; da...@davemloft.net; eduma...@google.com;
> k...@kernel.org; pab...@redhat.com; Deucher, Alexander
> ; raf...@kernel.org; Limonciello, Mario
> 
> Cc: linux-ker...@vger.kernel.org; linux-a...@vger.kernel.org; amd-
> g...@lists.freedesktop.org; dri-devel@lists.freedesktop.org; linux-
> wirel...@vger.kernel.org; net...@vger.kernel.org
> Subject: Re: [V10 5/8] drm/amd/pm: setup the framework to support Wifi
> RFI mitigation feature
>
>
>
> On 8/25/2023 2:08 PM, Evan Quan wrote:
> > With WBRF feature supported, as a driver responding to the
> > frequencies, amdgpu driver is able to do shadow pstate switching to
> > mitigate possible interference(between its (G-)DDR memory clocks and
> > local radio module frequency bands used by Wifi 6/6e/7).
> >
> > Signed-off-by: Evan Quan 
> > Reviewed-by: Mario Limonciello 
> > --
> > v1->v2:
> >- update the prompt for feature support(Lijo)
> > v8->v9:
> >- update parameter document for smu_wbrf_event_handler(Simon)
> > v9->v10:
> >   - correct the logics for wbrf range sorting(Lijo)
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  17 ++
> >   drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 195
> ++
> >   drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  23 +++
> >   drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
> >   5 files changed, 240 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index a3b86b86dc47..2bfc9111ab00 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -247,6 +247,8 @@ extern int amdgpu_sg_display;
> >
> >   extern int amdgpu_user_partt_mode;
> >
> > +extern int amdgpu_wbrf;
> > +
> >   #define AMDGPU_VM_MAX_NUM_CTX 4096
> >   #define AMDGPU_SG_THRESHOLD   (256*1024*1024)
> >   #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS3000
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 0593ef8fe0a6..1c574bd3b60d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -195,6 +195,7 @@ int amdgpu_use_xgmi_p2p = 1;
> >   int amdgpu_vcnfw_log;
> >   int amdgpu_sg_display = -1; /* auto */
> >   int amdgpu_user_partt_mode =
> AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
> > +int amdgpu_wbrf = -1;
> >
> >   static void amdgpu_drv_delayed_reset_work_handler(struct work_struct
> > *work);
> >
> > @@ -981,6 +982,22 @@ module_param_named(user_partt_mode,
> amdgpu_user_partt_mode, uint, 0444);
> >   module_param(enforce_isolation, bool, 0444);
> >   MODULE_PARM_DESC(enforce_isolation, "enforce process isolation
> > between graphics and compute . enforce_isolation = on");
> >
> > +/**
> > + * DOC: wbrf (int)
> > + * Enable Wifi RFI interference mitigation feature.
> > + * Due to electrical and mechanical constraints there may be likely
> > +interference of
> > + * relatively high-powered harmonics of the (G-)DDR memory clocks
> > +with local radio
> > + * module frequency bands used by Wifi 6/6e/7. To mitigate the
> > +possible RFI interference,
> > + * with this feature enabled, PMFW will use either “shadowed P-State”
> > +or “P-State” based
> > + * on active list of frequencies in-use (to be avoided) as part of
> > +initial setting or
> > + * P-state transition. However, there may be potential performa

Re: [V10 7/8] drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0

2023-08-25 Thread Lazar, Lijo




On 8/25/2023 2:08 PM, Evan Quan wrote:

Fulfill the SMU13.0.0 support for Wifi RFI mitigation feature.

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
---
  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  3 +
  drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |  3 +-
  drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  3 +
  .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c|  9 +++
  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 60 +++
  5 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 60d595344c45..a081e6bb27c4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -325,6 +325,7 @@ enum smu_table_id
SMU_TABLE_PACE,
SMU_TABLE_ECCINFO,
SMU_TABLE_COMBO_PPTABLE,
+   SMU_TABLE_WIFIBAND,
SMU_TABLE_COUNT,
  };
  
@@ -1501,6 +1502,8 @@ enum smu_baco_seq {

 __dst_size);  \
  })
  
+#define HZ_IN_MHZ		100U

+
  #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && 
!defined(SWSMU_CODE_LAYER_L4)
  int smu_get_power_limit(void *handle,
uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 297b70b9388f..5bbb60289a79 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -245,7 +245,8 @@
__SMU_DUMMY_MAP(AllowGpo),  \
__SMU_DUMMY_MAP(Mode2Reset),\
__SMU_DUMMY_MAP(RequestI2cTransaction), \
-   __SMU_DUMMY_MAP(GetMetricsTable),
+   __SMU_DUMMY_MAP(GetMetricsTable), \
+   __SMU_DUMMY_MAP(EnableUCLKShadow),
  
  #undef __SMU_DUMMY_MAP

  #define __SMU_DUMMY_MAP(type) SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 355c156d871a..dd70b56aa71e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -299,5 +299,8 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
 uint32_t pcie_gen_cap,
 uint32_t pcie_width_cap);
  
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu,

+bool enablement);
+
  #endif
  #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 9b62b45ebb7f..6a5cb582aa92 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2472,3 +2472,12 @@ int smu_v13_0_update_pcie_parameters(struct smu_context 
*smu,
  
  	return 0;

  }
+
+int smu_v13_0_enable_uclk_shadow(struct smu_context *smu,
+bool enablement)
+{
+   return smu_cmn_send_smc_msg_with_param(smu,
+  SMU_MSG_EnableUCLKShadow,
+  enablement,
+  NULL);
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 3d188616ba24..fd3ac18653ed 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -154,6 +154,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(AllowGpo,   PPSMC_MSG_SetGpoAllow,  
 0),
MSG_MAP(AllowIHHostInterrupt,   PPSMC_MSG_AllowIHHostInterrupt, 
  0),
MSG_MAP(ReenableAcDcInterrupt,  
PPSMC_MSG_ReenableAcDcInterrupt,   0),
+   MSG_MAP(EnableUCLKShadow,   PPSMC_MSG_EnableUCLKShadow, 
   0),
  };
  
  static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {

@@ -237,6 +238,7 @@ static struct cmn2asic_mapping 
smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
TAB_MAP(I2C_COMMANDS),
TAB_MAP(ECCINFO),
TAB_MAP(OVERDRIVE),
+   TAB_MAP(WIFIBAND),
  };
  
  static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = {

@@ -481,6 +483,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+   SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND,
+  sizeof(WifiBandEntryTable_t), PAGE_SIZE,
+  AMDGPU_GEM_DOMAIN_VRAM);
  
  	smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);

if (!smu_table->metrics_table)
@@ -2593,6 +2598,58 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct 
smu_context *smu,
return 

Re: [V10 5/8] drm/amd/pm: setup the framework to support Wifi RFI mitigation feature

2023-08-25 Thread Lazar, Lijo




On 8/25/2023 2:08 PM, Evan Quan wrote:

With WBRF feature supported, as a driver responding to the frequencies,
amdgpu driver is able to do shadow pstate switching to mitigate possible
interference(between its (G-)DDR memory clocks and local radio module
frequency bands used by Wifi 6/6e/7).

Signed-off-by: Evan Quan 
Reviewed-by: Mario Limonciello 
--
v1->v2:
   - update the prompt for feature support(Lijo)
v8->v9:
   - update parameter document for smu_wbrf_event_handler(Simon)
v9->v10:
  - correct the logics for wbrf range sorting(Lijo)
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  17 ++
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 195 ++
  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  23 +++
  drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
  5 files changed, 240 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3b86b86dc47..2bfc9111ab00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -247,6 +247,8 @@ extern int amdgpu_sg_display;
  
  extern int amdgpu_user_partt_mode;
  
+extern int amdgpu_wbrf;

+
  #define AMDGPU_VM_MAX_NUM_CTX 4096
  #define AMDGPU_SG_THRESHOLD   (256*1024*1024)
  #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS3000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0593ef8fe0a6..1c574bd3b60d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -195,6 +195,7 @@ int amdgpu_use_xgmi_p2p = 1;
  int amdgpu_vcnfw_log;
  int amdgpu_sg_display = -1; /* auto */
  int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+int amdgpu_wbrf = -1;
  
  static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
  
@@ -981,6 +982,22 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);

  module_param(enforce_isolation, bool, 0444);
  MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics 
and compute . enforce_isolation = on");
  
+/**

+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely 
interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local 
radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI 
interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or 
“P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial 
setting or
+ * P-state transition. However, there may be potential performance impact with 
this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if 
supported))
+ */
+MODULE_PARM_DESC(wbrf,
+   "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = 
auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
  /* These devices are not supported by amdgpu.
   * They are supported by the mach64, r128, radeon drivers
   */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ce41a8309582..bdfd234d1558 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1228,6 +1228,174 @@ static int smu_get_thermal_temperature_range(struct 
smu_context *smu)
return ret;
  }
  
+/**

+ * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges
+ *
+ * @smu: smu_context pointer
+ *
+ * Retrieve the wbrf exclusion ranges and send them to PMFW for proper 
handling.
+ * Returns 0 on success, error on failure.
+ */
+static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu)
+{
+   struct wbrf_ranges_in_out wbrf_exclusion = {0};
+   struct exclusion_range *wifi_bands = wbrf_exclusion.band_list;
+   struct amdgpu_device *adev = smu->adev;
+   uint32_t num_of_wbrf_ranges = MAX_NUM_OF_WBRF_RANGES;
+   uint64_t start, end;
+   int ret, i, j;
+
+   ret = acpi_amd_wbrf_retrieve_exclusions(adev->dev, &wbrf_exclusion);
+   if (ret) {
+   dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n");
+   return ret;
+   }
+
+   /*
+* The exclusion ranges array we got might be filled with holes and 
duplicate
+* entries. For example:
+* {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 
6189), (0, 0)...}
+* We need to do some sortups to eliminate those holes and duplicate 
entries.
+* Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 
0)...}
+*/
+   for (i = 0; i < num_of_wbrf_ranges; i++) {
+   start = wifi_bands[i].start;
+   end = wifi_bands[i].end;
+
+   /* get the last valid entry to fill the interm

Re: [PATCH v2 3/7] drm/amdgpu: Add new function to put GPU power profile

2023-08-25 Thread Lazar, Lijo




On 8/25/2023 4:48 PM, Yadav, Arvind wrote:


On 8/22/2023 6:16 PM, Lazar, Lijo wrote:



On 8/22/2023 5:41 PM, Yadav, Arvind wrote:

Hi Lijo,

The *_set function will set the GPU power profile and the *_put 
function will  schedule the
smu_delayed_work task after 100ms delay. This smu_delayed_work task 
will clear a GPU
power profile if any new jobs are not scheduled within 100 ms. But if 
any new job  comes within 100ms
then the *_workload_profile_set function  will cancel this work and 
set the GPU power profile based on

preferences.

Please see the below case.

case 1 - only same profile jobs run. It will take 100ms to clear the 
profile once all jobs complete.


                                        wl = VIDEO <100ms>
workload _|`|

Jobs (VIDEO) |```|__|```|___||___


Case2 - two jobs of two different profile. job1 profile will be set 
but when job2 will arrive it will be moved

     to higher profile.

                  wl = VIDEO  ->    wl = COMPUTE <100ms>
workload 
___|``| 



Jobs (VIDEO) ___|```|__|```|___||___||___

Jobs (COMPUTE) __|```|___||___||_



Case3 - two jobs of two different profile. job1 profile will be set 
but when job2 will arrive it will not be moved
to lower profile. When compute job2 will complete then only it will 
move to lower profile.


                                      wl = COMPUTE 
->   wl = VIDEO  <100ms>
workload 
_|``| 



Jobs (COMPUTE)    |```|__|```|___||___||___

Jobs (VIDEO) 
___|```|___||___||___||___




swsmu layer maintains a workload mask based on priority. So once you 
have set the mask, until you unset it (i.e when refcount = 0), the 
mask will be set in the lower layer. swsmu layer will take care of 
requesting FW the highest priority. I don't think that needs to be 
repeated at this level.


At this layer, all you need is to refcount the requests and make the 
request.


When refcount of a profile becomes non-zero (only one-time), place one 
request for that profile. As swsmu layer maintains the workload mask, 
it will take the new profile also into consideration while requesting 
for the one  with the highest priority.


When refcount of a profile becomes zero, place a request to clear it. 
This is controlled by your idle work. As I see, it keeps an additional 
100ms tolerance before placing a clear request. In that way, there is 
no need to cancel that work.


Inside idle work handler -
Loop through the profiles that are set and clear those profiles whose 
refcount is zero.


Thus if a job starts during the 100ms delay, idle work won't see the 
ref count as zero and then it won't place a request to clear out that 
profile.



Hi Liji,

Thank you for your comment. We would be considering your comment but we 
would retain the same design.




All things aside, the entire idea of switching power profile for every 
job submission on a ring looks like an 'abuse' of the power profile 
design. The goal of power profile is to keep a specific profile for a 
sustained workload - like gaming mode, cinema mode etc. It's not meant 
for like switch profile with every job submission which lasts ms or 
lesser (though you may argue it takes only highest priority profile). 
This design is to keep interrupting FW every now and then thinking 
driver is doing better. For any normal/mixed use scenarios, FW 
algorithms could handle it better with all the activity monitors they have.


If you are going ahead, please also make sure to post the improved 
performance numbers you are getting with this.


Thanks,
Lijo


~Arvind.


On 8/22/2023 10:21 AM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a function which will clear the GPU
power profile after job finished.

This is how it works:
- schedular will set the GPU power profile based on ring_type.
- Schedular will clear the GPU Power profile once job finished.
- Here, the *_workload_profile_set function will set the GPU
   power profile and the *_workload_profile_put function will
   schedule the smu_delayed_work task after 100ms delay. This
   smu_delayed_work task will clear a GPU power profile if any
   new jobs are not scheduled within 100 ms. But if any new job
   comes within 100ms then the *_workload_profile_set function
   will cancel this work and set the GPU power profile based on
   preferences.

v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 97 
++

Re: [PATCH v2 4/7] drm/amdgpu: Add suspend function to clear the GPU power profile.

2023-08-22 Thread Lazar, Lijo




On 8/22/2023 5:52 PM, Yadav, Arvind wrote:


On 8/22/2023 12:01 PM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a suspend function that will clear the GPU
power profile before going into suspend state.

v2:
- Add the new suspend function based on review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 23 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  2 ++
  3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index cd3bf641b630..3b70e657b439 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4212,6 +4212,8 @@ int amdgpu_device_suspend(struct drm_device 
*dev, bool fbcon)

    amdgpu_ras_suspend(adev);
  +    amdgpu_workload_profile_suspend(adev);
+
  amdgpu_device_ip_suspend_phase1(adev);
    if (!adev->in_s0ix)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index 6367eb88a44d..44ca8e986984 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -174,6 +174,29 @@ void amdgpu_workload_profile_set(struct 
amdgpu_device *adev,

  mutex_unlock(&workload->workload_lock);
  }
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev)
+{
+    struct amdgpu_smu_workload *workload = &adev->smu_workload;
+    int ret;
+
+    mutex_lock(&workload->workload_lock);
+ cancel_delayed_work_sync(&workload->smu_delayed_work);


Another deadlock candidate. Between fini() and suspend(), the only 
difference probably could be initialization status. If so, just use a 
helper that is used during fini() and suspend().


Before going to suspend(), we need to cancel the work and clear all the 
profiles but in fini() we are destroying the mutex. also it will be 
called when we are unloading everything.




What I meant is for both suspend/fini, you need to cancel any work 
scheduled, clear refcounts and set the profile back to default profile. 
Keep this in a helper and reuse.


Thanks,
Lijo


~Arvind


Thanks,
Lijo


+
+    /* Clear all the set GPU power profile*/
+    for (int index = fls(workload->submit_workload_status);
+ index > 0; index--) {
+    if (workload->submit_workload_status & (1 << index)) {
+ atomic_set(&workload->power_profile_ref[index], 0);
+    ret = amdgpu_power_profile_clear(adev, index);
+    if (ret)
+    DRM_WARN("Failed to clear power profile %s, err = 
%d\n",

+ amdgpu_workload_mode_name[index], ret);
+    }
+    }
+    workload->submit_workload_status = 0;
+    mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev)
  {
  adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h

index ee1f87257f2d..0acd8769ec52 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -52,6 +52,8 @@ void amdgpu_workload_profile_put(struct 
amdgpu_device *adev,

  void amdgpu_workload_profile_set(struct amdgpu_device *adev,
   uint32_t ring_type);
  +void amdgpu_workload_profile_suspend(struct amdgpu_device *adev);
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev);
    void amdgpu_workload_profile_fini(struct amdgpu_device *adev);


Re: [PATCH v2 3/7] drm/amdgpu: Add new function to put GPU power profile

2023-08-22 Thread Lazar, Lijo




On 8/22/2023 5:41 PM, Yadav, Arvind wrote:

Hi Lijo,

The *_set function will set the GPU power profile and the *_put function 
will  schedule the
smu_delayed_work task after 100ms delay. This smu_delayed_work task will 
clear a GPU
power profile if any new jobs are not scheduled within 100 ms. But if 
any new job  comes within 100ms
then the *_workload_profile_set function  will cancel this work and set 
the GPU power profile based on

preferences.

Please see the below case.

case 1 - only same profile jobs run. It will take 100ms to clear the 
profile once all jobs complete.


                                        wl = VIDEO <100ms>
workload _|`|

Jobs (VIDEO) |```|__|```|___||___


Case2 - two jobs of two different profile. job1 profile will be set but 
when job2 will arrive it will be moved

     to higher profile.

                  wl = VIDEO  ->    wl = COMPUTE   <100ms>
workload 
___|``|


Jobs (VIDEO) ___|```|__|```|___||___||___

Jobs (COMPUTE) __|```|___||___||_



Case3 - two jobs of two different profile. job1 profile will be set but 
when job2 will arrive it will not be moved
to lower profile. When compute job2 will complete then only it will move 
to lower profile.


                                      wl = COMPUTE 
->   wl = VIDEO  <100ms>
workload 
_|``| 



Jobs (COMPUTE)    |```|__|```|___||___||___

Jobs (VIDEO) ___|```|___||___||___||___



swsmu layer maintains a workload mask based on priority. So once you 
have set the mask, until you unset it (i.e when refcount = 0), the mask 
will be set in the lower layer. swsmu layer will take care of requesting 
FW the highest priority. I don't think that needs to be repeated at this 
level.


At this layer, all you need is to refcount the requests and make the 
request.


When refcount of a profile becomes non-zero (only one-time), place one 
request for that profile. As swsmu layer maintains the workload mask, it 
will take the new profile also into consideration while requesting for 
the one  with the highest priority.


When refcount of a profile becomes zero, place a request to clear it. 
This is controlled by your idle work. As I see, it keeps an additional 
100ms tolerance before placing a clear request. In that way, there is no 
need to cancel that work.


Inside idle work handler -
Loop through the profiles that are set and clear those profiles whose 
refcount is zero.


Thus if a job starts during the 100ms delay, idle work won't see the ref 
count as zero and then it won't place a request to clear out that profile.



On 8/22/2023 10:21 AM, Lazar, Lijo wrote:



On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a function which will clear the GPU
power profile after job finished.

This is how it works:
- schedular will set the GPU power profile based on ring_type.
- Schedular will clear the GPU Power profile once job finished.
- Here, the *_workload_profile_set function will set the GPU
   power profile and the *_workload_profile_put function will
   schedule the smu_delayed_work task after 100ms delay. This
   smu_delayed_work task will clear a GPU power profile if any
   new jobs are not scheduled within 100 ms. But if any new job
   comes within 100ms then the *_workload_profile_set function
   will cancel this work and set the GPU power profile based on
   preferences.

v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 97 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
  2 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c

index e661cc5b3d92..6367eb88a44d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,9 @@
    #include "amdgpu.h"
  +/* 100 millsecond timeout */
+#define SMU_IDLE_TIMEOUT    msecs_to_jiffies(100)
+
  static enum PP_SMC_POWER_PROFILE
  ring_to_power_profile(uint32_t ring_type)
  {
@@ -59,6 +62,80 @@ amdgpu_power_profile_set(struct amdgpu_device *adev,
  return ret;
  }
  +static int
+amdgpu_power_profile_clear(struct amdgpu_device *adev,
+   enum PP_SMC_POWER_PROFILE profile)
+{
+    int ret = amdgpu_dpm_switch_power_profile(adev, profile, false);
+
+    if (!ret) {
+    /* Clear the bit for the submitted workload profile */
+    adev->smu_workload.submit_workload_status &=

Re: [PATCH v2 4/7] drm/amdgpu: Add suspend function to clear the GPU power profile.

2023-08-21 Thread Lazar, Lijo




On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a suspend function that will clear the GPU
power profile before going into suspend state.

v2:
- Add the new suspend function based on review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 23 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  2 ++
  3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index cd3bf641b630..3b70e657b439 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4212,6 +4212,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
  
  	amdgpu_ras_suspend(adev);
  
+	amdgpu_workload_profile_suspend(adev);

+
amdgpu_device_ip_suspend_phase1(adev);
  
  	if (!adev->in_s0ix)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
index 6367eb88a44d..44ca8e986984 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -174,6 +174,29 @@ void amdgpu_workload_profile_set(struct amdgpu_device 
*adev,
mutex_unlock(&workload->workload_lock);
  }
  
+void amdgpu_workload_profile_suspend(struct amdgpu_device *adev)

+{
+   struct amdgpu_smu_workload *workload = &adev->smu_workload;
+   int ret;
+
+   mutex_lock(&workload->workload_lock);
+   cancel_delayed_work_sync(&workload->smu_delayed_work);


Another deadlock candidate. Between fini() and suspend(), the only 
difference probably could be initialization status. If so, just use a 
helper that is used during fini() and suspend().


Thanks,
Lijo


+
+   /* Clear all the set GPU power profile*/
+   for (int index = fls(workload->submit_workload_status);
+index > 0; index--) {
+   if (workload->submit_workload_status & (1 << index)) {
+   atomic_set(&workload->power_profile_ref[index], 0);
+   ret = amdgpu_power_profile_clear(adev, index);
+   if (ret)
+   DRM_WARN("Failed to clear power profile %s, err = 
%d\n",
+amdgpu_workload_mode_name[index], ret);
+   }
+   }
+   workload->submit_workload_status = 0;
+   mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev)
  {
adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h
index ee1f87257f2d..0acd8769ec52 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -52,6 +52,8 @@ void amdgpu_workload_profile_put(struct amdgpu_device *adev,
  void amdgpu_workload_profile_set(struct amdgpu_device *adev,
 uint32_t ring_type);
  
+void amdgpu_workload_profile_suspend(struct amdgpu_device *adev);

+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev);
  
  void amdgpu_workload_profile_fini(struct amdgpu_device *adev);


Re: [PATCH v2 2/7] drm/amdgpu: Add new function to set GPU power profile

2023-08-21 Thread Lazar, Lijo




On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a function which will change the GPU
power profile based on a submitted job. This can optimize
the power performance when the workload is on.

v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 56 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
  2 files changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
index 32166f482f77..e661cc5b3d92 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,62 @@
  
  #include "amdgpu.h"
  
+static enum PP_SMC_POWER_PROFILE

+ring_to_power_profile(uint32_t ring_type)
+{
+   switch (ring_type) {
+   case AMDGPU_RING_TYPE_GFX:
+   return PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+   case AMDGPU_RING_TYPE_COMPUTE:
+   return PP_SMC_POWER_PROFILE_COMPUTE;
+   case AMDGPU_RING_TYPE_UVD:
+   case AMDGPU_RING_TYPE_VCE:
+   case AMDGPU_RING_TYPE_UVD_ENC:
+   case AMDGPU_RING_TYPE_VCN_DEC:
+   case AMDGPU_RING_TYPE_VCN_ENC:
+   case AMDGPU_RING_TYPE_VCN_JPEG:
+   return PP_SMC_POWER_PROFILE_VIDEO;
+   default:
+   return PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+   }
+}
+
+static int
+amdgpu_power_profile_set(struct amdgpu_device *adev,
+enum PP_SMC_POWER_PROFILE profile)
+{
+   int ret = amdgpu_dpm_switch_power_profile(adev, profile, true);
+


You don't need to interact with FW for every set() call. Only send the 
message if workload_status doesn't have the profile set or refcount is 
zero. Otherwise, only need to increment the refcount.


Thanks,
Lijo


+   if (!ret) {
+   /* Set the bit for the submitted workload profile */
+   adev->smu_workload.submit_workload_status |= (1 << profile);
+   atomic_inc(&adev->smu_workload.power_profile_ref[profile]);
+   }
+
+   return ret;
+}
+
+void amdgpu_workload_profile_set(struct amdgpu_device *adev,
+uint32_t ring_type)
+{
+   struct amdgpu_smu_workload *workload = &adev->smu_workload;
+   enum PP_SMC_POWER_PROFILE profile = ring_to_power_profile(ring_type);
+   int ret;
+
+   if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT)
+   return;
+
+   mutex_lock(&workload->workload_lock);
+
+   ret = amdgpu_power_profile_set(adev, profile);
+   if (ret) {
+   DRM_WARN("Failed to set workload profile to %s, error = %d\n",
+amdgpu_workload_mode_name[profile], ret);
+   }
+
+   mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev)
  {
adev->smu_workload.adev = adev;
diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h 
b/drivers/gpu/drm/amd/include/amdgpu_workload.h
index 5d0f068422d4..5022f28fc2f9 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
@@ -46,6 +46,9 @@ static const char * const amdgpu_workload_mode_name[] = {
"Window3D"
  };
  
+void amdgpu_workload_profile_set(struct amdgpu_device *adev,

+uint32_t ring_type);
+
  void amdgpu_workload_profile_init(struct amdgpu_device *adev);
  
  void amdgpu_workload_profile_fini(struct amdgpu_device *adev);


Re: [PATCH v2 3/7] drm/amdgpu: Add new function to put GPU power profile

2023-08-21 Thread Lazar, Lijo




On 8/21/2023 12:17 PM, Arvind Yadav wrote:

This patch adds a function which will clear the GPU
power profile after job finished.

This is how it works:
- schedular will set the GPU power profile based on ring_type.
- Schedular will clear the GPU Power profile once job finished.
- Here, the *_workload_profile_set function will set the GPU
   power profile and the *_workload_profile_put function will
   schedule the smu_delayed_work task after 100ms delay. This
   smu_delayed_work task will clear a GPU power profile if any
   new jobs are not scheduled within 100 ms. But if any new job
   comes within 100ms then the *_workload_profile_set function
   will cancel this work and set the GPU power profile based on
   preferences.

v2:
- Splitting workload_profile_set and workload_profile_put
   into two separate patches.
- Addressed review comment.

Cc: Shashank Sharma 
Cc: Christian Koenig 
Cc: Alex Deucher 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 97 +++
  drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
  2 files changed, 100 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
index e661cc5b3d92..6367eb88a44d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
@@ -24,6 +24,9 @@
  
  #include "amdgpu.h"
  
+/* 100 millsecond timeout */

+#define SMU_IDLE_TIMEOUT   msecs_to_jiffies(100)
+
  static enum PP_SMC_POWER_PROFILE
  ring_to_power_profile(uint32_t ring_type)
  {
@@ -59,6 +62,80 @@ amdgpu_power_profile_set(struct amdgpu_device *adev,
return ret;
  }
  
+static int

+amdgpu_power_profile_clear(struct amdgpu_device *adev,
+  enum PP_SMC_POWER_PROFILE profile)
+{
+   int ret = amdgpu_dpm_switch_power_profile(adev, profile, false);
+
+   if (!ret) {
+   /* Clear the bit for the submitted workload profile */
+   adev->smu_workload.submit_workload_status &= ~(1 << profile);
+   }
+
+   return ret;
+}
+
+static void
+amdgpu_power_profile_idle_work_handler(struct work_struct *work)
+{
+
+   struct amdgpu_smu_workload *workload = container_of(work,
+ struct 
amdgpu_smu_workload,
+ smu_delayed_work.work);
+   struct amdgpu_device *adev = workload->adev;
+   bool reschedule = false;
+   int index  = fls(workload->submit_workload_status);
+   int ret;
+
+   mutex_lock(&workload->workload_lock);
+   for (; index > 0; index--) {


Why not use for_each_set_bit?


+   int val = atomic_read(&workload->power_profile_ref[index]);
+
+   if (val) {
+   reschedule = true;


Why do you need to do reschedule? For each put(), a schedule is called. 
If refcount is not zero, that means some other job has already set the 
profile. It is supposed to call put() and at that time, this job will be 
run to clear it anyway, right?



+   } else {
+   if (workload->submit_workload_status &
+   (1 << index)) {
+   ret = amdgpu_power_profile_clear(adev, index);
+   if (ret) {
+   DRM_WARN("Failed to clear workload %s,error 
= %d\n",
+
amdgpu_workload_mode_name[index], ret);
+   goto exit;
+   }
+   }
+   }
+   }
+   if (reschedule)
+   schedule_delayed_work(&workload->smu_delayed_work,
+ SMU_IDLE_TIMEOUT);
+exit:
+   mutex_unlock(&workload->workload_lock);
+}
+
+void amdgpu_workload_profile_put(struct amdgpu_device *adev,
+uint32_t ring_type)
+{
+   struct amdgpu_smu_workload *workload = &adev->smu_workload;
+   enum PP_SMC_POWER_PROFILE profile = ring_to_power_profile(ring_type);
+
+   if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT)
+   return;
+
+   mutex_lock(&workload->workload_lock);
+
+   if (!atomic_read(&workload->power_profile_ref[profile])) {
+   DRM_WARN("Power profile %s ref. count error\n",
+amdgpu_workload_mode_name[profile]);
+   } else {
+   atomic_dec(&workload->power_profile_ref[profile]);
+   schedule_delayed_work(&workload->smu_delayed_work,
+ SMU_IDLE_TIMEOUT);
+   }
+
+   mutex_unlock(&workload->workload_lock);
+}
+
  void amdgpu_workload_profile_set(struct amdgpu_device *adev,
 uint32_t ring_type)
  {
@@ -70,13 +147,30 @@ void amdgpu_workload_profile_set(struct amdgpu_device 
*adev,
return;
  
  	mutex_lock(&workload

Re: [PATCH] Documentation/gpu: Update amdgpu documentation

2023-08-16 Thread Lazar, Lijo




On 8/17/2023 2:16 AM, Alex Deucher wrote:

On Wed, Aug 16, 2023 at 12:15 AM Lijo Lazar  wrote:


7957ec80ef97 ("drm/amdgpu: Add FRU sysfs nodes only if needed") moved
the documentation for some of the sysfs nodes to amdgpu_fru_eeprom.c.
Update the documentation accordingly.

Signed-off-by: Lijo Lazar 
---
  Documentation/gpu/amdgpu/driver-misc.rst | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/gpu/amdgpu/driver-misc.rst 
b/Documentation/gpu/amdgpu/driver-misc.rst
index be131e963d87..26334e54447b 100644
--- a/Documentation/gpu/amdgpu/driver-misc.rst
+++ b/Documentation/gpu/amdgpu/driver-misc.rst
@@ -11,19 +11,19 @@ via sysfs
  product_name
  

-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
 :doc: product_name

  product_number
  --

-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
 :doc: product_name


I think this should be product_number

Alex



Thanks, made the change while pushing.

Thanks,
Lijo



  serial_number
  -

-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
 :doc: serial_number

  unique_id
--
2.25.1



RE: [PATCH] drm/amdgpu: Remove gfxoff check in GFX v9.4.3

2023-08-15 Thread Lazar, Lijo
[AMD Official Use Only - General]

Sorry, for the spam. Please ignore this.

Thanks,
Lijo

-Original Message-
From: amd-gfx  On Behalf Of Lijo Lazar
Sent: Wednesday, August 16, 2023 9:37 AM
To: amd-...@lists.freedesktop.org
Cc: Deucher, Alexander ; s...@canb.auug.org.au; 
airl...@redhat.com; dri-devel@lists.freedesktop.org; Zhang, Hawking 

Subject: [PATCH] drm/amdgpu: Remove gfxoff check in GFX v9.4.3

GFXOFF feature is not there for GFX 9.4.3 ASICs.

Signed-off-by: Lijo Lazar 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index d8d6807e7b96..57ed4e5c294c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -337,13 +337,11 @@ static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct 
amdgpu_device *adev)  {
uint64_t clock;

-   amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), 
regRLC_GPU_CLOCK_COUNT_LSB) |
((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), 
regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
mutex_unlock(&adev->gfx.gpu_clock_mutex);
-   amdgpu_gfx_off_ctrl(adev, true);

return clock;
 }
--
2.25.1



RE: [PATCH] drm/amd/pm: Vangogh: Add new gpu_metrics_v2_4 to acquire gpu_metrics

2023-06-20 Thread Lazar, Lijo
[AMD Official Use Only - General]

Could you add the expected units of voltage/current in 2.4 metrics structure? 
Is it mV/mA or mV/A?

Thanks,
Lijo

-Original Message-
From: amd-gfx  On Behalf Of Wenyou Yang
Sent: Thursday, June 1, 2023 7:08 AM
To: Deucher, Alexander ; Limonciello, Mario 
; Koenig, Christian ; Pan, 
Xinhui ; Quan, Evan 
Cc: linux-ker...@vger.kernel.org; dri-devel@lists.freedesktop.org; Yang, WenYou 
; amd-...@lists.freedesktop.org; Yuan, Perry 
; Liang, Richard qi 
Subject: [PATCH] drm/amd/pm: Vangogh: Add new gpu_metrics_v2_4 to acquire 
gpu_metrics

To acquire the voltage and current info from gpu_metrics interface, but 
gpu_metrics_v2_3 doesn't contain them, and to be backward compatible, add new 
gpu_metrics_v2_4 structure.

Acked-by: Evan Quan 
Signed-off-by: Wenyou Yang 
---
 .../gpu/drm/amd/include/kgd_pp_interface.h|  69 +++
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 109 --
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c|   3 +
 3 files changed, 172 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 9f542f6e19ed..0f37dafafcf9 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -892,4 +892,73 @@ struct gpu_metrics_v2_3 {
uint16_taverage_temperature_core[8]; // average 
CPU core temperature on APUs
uint16_taverage_temperature_l3[2];
 };
+
+struct gpu_metrics_v2_4 {
+   struct metrics_table_header common_header;
+
+   /* Temperature */
+   uint16_ttemperature_gfx;
+   uint16_ttemperature_soc;
+   uint16_ttemperature_core[8];
+   uint16_ttemperature_l3[2];
+
+   /* Utilization */
+   uint16_taverage_gfx_activity;
+   uint16_taverage_mm_activity;
+
+   /* Driver attached timestamp (in ns) */
+   uint64_tsystem_clock_counter;
+
+   /* Power/Energy */
+   uint16_taverage_socket_power;
+   uint16_taverage_cpu_power;
+   uint16_taverage_soc_power;
+   uint16_taverage_gfx_power;
+   uint16_taverage_core_power[8];
+
+   /* Average clocks */
+   uint16_taverage_gfxclk_frequency;
+   uint16_taverage_socclk_frequency;
+   uint16_taverage_uclk_frequency;
+   uint16_taverage_fclk_frequency;
+   uint16_taverage_vclk_frequency;
+   uint16_taverage_dclk_frequency;
+
+   /* Current clocks */
+   uint16_tcurrent_gfxclk;
+   uint16_tcurrent_socclk;
+   uint16_tcurrent_uclk;
+   uint16_tcurrent_fclk;
+   uint16_tcurrent_vclk;
+   uint16_tcurrent_dclk;
+   uint16_tcurrent_coreclk[8];
+   uint16_tcurrent_l3clk[2];
+
+   /* Throttle status (ASIC dependent) */
+   uint32_tthrottle_status;
+
+   /* Fans */
+   uint16_tfan_pwm;
+
+   uint16_tpadding[3];
+
+   /* Throttle status (ASIC independent) */
+   uint64_tindep_throttle_status;
+
+   /* Average Temperature */
+   uint16_taverage_temperature_gfx;
+   uint16_taverage_temperature_soc;
+   uint16_taverage_temperature_core[8];
+   uint16_taverage_temperature_l3[2];
+
+   /* Power/Voltage */
+   uint16_taverage_cpu_voltage;
+   uint16_taverage_soc_voltage;
+   uint16_taverage_gfx_voltage;
+
+   /* Power/Current */
+   uint16_taverage_cpu_current;
+   uint16_taverage_soc_current;
+   uint16_taverage_gfx_current;
+};
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 067b4e0b026c..185d0b50ee8e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -1854,6 +1854,86 @@ static ssize_t vangogh_get_gpu_metrics_v2_3(struct 
smu_context *smu,
return sizeof(struct gpu_metrics_v2_3);  }

+static ssize_t vangogh_get_gpu_metrics_v2_4(struct smu_context *smu,
+   void **ta

Re: [PATCH V3 4/7] drm/amd/pm: setup the framework to support Wifi RFI mitigation feature

2023-06-19 Thread Lazar, Lijo




On 6/16/2023 12:27 PM, Evan Quan wrote:

With WBRF feature supported, as a driver responding to the frequencies,
amdgpu driver is able to do shadow pstate switching to mitigate possible
interference(between its (G-)DDR memory clocks and local radio module
frequency bands used by Wifi 6/6e/7).

To make WBRF feature functional, the kernel needs to be configured with
CONFIG_ACPI_WBRF and the platform is equipped with necessary ACPI based
mechanism to get amdgpu driver notified.

Signed-off-by: Evan Quan 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  26 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c  |  63 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  19 ++
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 184 ++
  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  20 ++
  drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
  6 files changed, 315 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 02b827785e39..2f2ec64ed1b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -50,6 +50,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -241,6 +242,7 @@ extern int amdgpu_num_kcq;
  #define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
  extern int amdgpu_vcnfw_log;
  extern int amdgpu_sg_display;
+extern int amdgpu_wbrf;
  
  #define AMDGPU_VM_MAX_NUM_CTX			4096

  #define AMDGPU_SG_THRESHOLD   (256*1024*1024)
@@ -741,6 +743,9 @@ struct amdgpu_reset_domain;
   */
  #define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size)
  
+typedef

+void (*wbrf_notify_handler) (struct amdgpu_device *adev);
+
  struct amdgpu_device {
struct device   *dev;
struct pci_dev  *pdev;
@@ -1050,6 +1055,8 @@ struct amdgpu_device {
  
  	booljob_hang;

booldc_enabled;
+
+   wbrf_notify_handler wbrf_event_handler;
  };
  
  static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)

@@ -1381,6 +1388,25 @@ static inline int amdgpu_acpi_smart_shift_update(struct 
drm_device *dev,
 enum amdgpu_ss ss_state) { 
return 0; }
  #endif
  
+#if defined(CONFIG_ACPI_WBRF)

+bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev);
+int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev,
+struct wbrf_ranges_out 
*exclusions_out);
+int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev,
+wbrf_notify_handler handler);
+int amdgpu_acpi_unregister_wbrf_notify_handler(struct amdgpu_device *adev);
+#else
+static inline bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev) { 
return false; }
+static inline
+int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev,
+struct wbrf_ranges_out 
*exclusions_out) { return 0; }
+static inline
+int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev,
+wbrf_notify_handler handler) { 
return 0; }
+static inline
+int amdgpu_acpi_unregister_wbrf_notify_handler(struct amdgpu_device *adev) { 
return 0; }
+#endif
+
  #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
  bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
  bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index aeeec211861c..efbe6dd91d1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1105,3 +1105,66 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device 
*adev)
  }
  
  #endif /* CONFIG_SUSPEND */

+
+#ifdef CONFIG_ACPI_WBRF
+bool amdgpu_acpi_is_wbrf_supported(struct amdgpu_device *adev)
+{
+   struct acpi_device *acpi_dev = ACPI_COMPANION(adev->dev);
+
+   if (!acpi_dev)
+   return false;
+
+   return wbrf_supported_consumer(acpi_dev);
+}
+
+int amdgpu_acpi_wbrf_retrieve_exclusions(struct amdgpu_device *adev,
+struct wbrf_ranges_out *exclusions_out)
+{
+   struct acpi_device *acpi_dev = ACPI_COMPANION(adev->dev);
+
+   if (!acpi_dev)
+   return -ENODEV;
+
+   return wbrf_retrieve_exclusions(acpi_dev, exclusions_out);
+}
+
+#define CPM_GPU_NOTIFY_COMMAND 0x55
+static void amdgpu_acpi_wbrf_event(acpi_handle handle, u32 event, void *data)
+{
+   struct amdgpu_device *adev = (struct amdgpu_device *)data;
+
+   if (event == CPM_GPU_NOTIFY_COMMAND &&
+   adev->wbrf_event_handler)
+   adev->wbrf_event_handler(adev); > +}
+
+int amdgpu_acpi_register_wbrf_notify_handler(struct amdgpu_device *adev,
+wbrf_notify_handler handler)

Re: [PATCH] drm/amdgpu: resove reboot exception for si oland

2023-03-10 Thread Lazar, Lijo
[AMD Official Use Only - General]

I recall that there was a previous discussion around this and that time we 
found that the range is already set earlier during DPM enablement.

The suspected root cause was enable/disable of thermal alert within this call 
to set range again.

Thanks,
Lijo

From: amd-gfx  on behalf of Alex Deucher 

Sent: Friday, March 10, 2023 8:51:06 PM
To: Chen, Guchun 
Cc: David Airlie ; Pan, Xinhui ; Zhenneng 
Li ; amd-...@lists.freedesktop.org 
; linux-ker...@vger.kernel.org 
; dri-devel@lists.freedesktop.org 
; Daniel Vetter ; Deucher, 
Alexander ; Koenig, Christian 

Subject: Re: [PATCH] drm/amdgpu: resove reboot exception for si oland

On Fri, Mar 10, 2023 at 3:18 AM Chen, Guchun  wrote:
>
>
> > -Original Message-
> > From: amd-gfx  On Behalf Of
> > Zhenneng Li
> > Sent: Friday, March 10, 2023 3:40 PM
> > To: Deucher, Alexander 
> > Cc: David Airlie ; Pan, Xinhui ;
> > linux-ker...@vger.kernel.org; dri-devel@lists.freedesktop.org; Zhenneng Li
> > ; amd-...@lists.freedesktop.org; Daniel Vetter
> > ; Koenig, Christian 
> > Subject: [PATCH] drm/amdgpu: resove reboot exception for si oland
> >
> > During reboot test on arm64 platform, it may failure on boot.
> >
> > The error message are as follows:
> > [6.996395][ 7] [  T295] [drm:amdgpu_device_ip_late_init [amdgpu]]
> > *ERROR*
> >   late_init of IP block  failed -22
> > [7.006919][ 7] [  T295] amdgpu :04:00.0: amdgpu_device_ip_late_init
> > failed
> > [7.014224][ 7] [  T295] amdgpu :04:00.0: Fatal error during GPU init
> > ---
> >  drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 3 ---
> >  1 file changed, 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > index d6d9e3b1b2c0..dee51c757ac0 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > @@ -7632,9 +7632,6 @@ static int si_dpm_late_init(void *handle)
> >   if (!adev->pm.dpm_enabled)
> >   return 0;
> >
> > - ret = si_set_temperature_range(adev);
> > - if (ret)
> > - return ret;
>
> si_set_temperature_range should be platform agnostic. Can you please 
> elaborate more?
>

Yes.  Not setting this means we won't get thermal interrupts.  We
shouldn't skip this.

Alex


> Regards,
> Guchun
>
> >  #if 0 //TODO ?
> >   si_dpm_powergate_uvd(adev, true);
> >  #endif
> > --
> > 2.25.1
>


Re: [PATCH v7 20/45] drm/amd: Parse both v1 and v2 TA microcode headers using same function

2023-01-05 Thread Lazar, Lijo




On 1/5/2023 10:31 PM, Mario Limonciello wrote:

Several IP versions duplicate code and can't use the common helpers.
Move this code into a single function so that the helpers can be used.

Signed-off-by: Mario Limonciello 
---
v6->v7:
  * Drop tags
  * Only set adev->psp.securedisplay_context.context on PSPv12 Renoir and
PSP v10 which matches previous behavior.  If it should match for Cezanne
and PSPv11 too we can undo this part of the check.
v5->v6:
  * Rebase on earlier patches
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 123 ++--
  drivers/gpu/drm/amd/amdgpu/psp_v10_0.c  |  64 +---
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  |  80 ++-
  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c  |  66 ++---
  4 files changed, 115 insertions(+), 218 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 7a2fc920739b..bdc2bf87a286 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3272,41 +3272,76 @@ static int parse_ta_bin_descriptor(struct psp_context 
*psp,
return 0;
  }
  
-int psp_init_ta_microcode(struct psp_context *psp,

- const char *chip_name)
+static int parse_ta_v1_microcode(struct psp_context *psp)
  {
+   const struct ta_firmware_header_v1_0 *ta_hdr;
struct amdgpu_device *adev = psp->adev;
-   char fw_name[PSP_FW_NAME_LEN];
-   const struct ta_firmware_header_v2_0 *ta_hdr;
-   int err = 0;
-   int ta_index = 0;
  
-	if (!chip_name) {

-   dev_err(adev->dev, "invalid chip name for ta microcode\n");
+   ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data;
+
+   if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
return -EINVAL;
-   }
  
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);

-   err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
+   adev->psp.xgmi_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->xgmi.fw_version);
+   adev->psp.xgmi_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->xgmi.size_bytes);
+   adev->psp.xgmi_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+   adev->psp.ras_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->ras.fw_version);
+   adev->psp.ras_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->ras.size_bytes);
+   adev->psp.ras_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->ras.offset_bytes);
+
+   adev->psp.hdcp_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->hdcp.fw_version);
+   adev->psp.hdcp_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->hdcp.size_bytes);
+   adev->psp.hdcp_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+   adev->psp.dtm_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->dtm.fw_version);
+   adev->psp.dtm_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->dtm.size_bytes);
+   adev->psp.dtm_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+   adev->psp.securedisplay_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->securedisplay.fw_version);
+   adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+   adev->psp.securedisplay_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
  
-	err = amdgpu_ucode_validate(adev->psp.ta_fw);

-   if (err)
-   goto out;
+   return 0;
+}
+
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+   const struct ta_firmware_header_v2_0 *ta_hdr;
+   struct amdgpu_device *adev = psp->adev;
+   int err = 0;
+   int ta_index = 0;
  
  	ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
  
-	if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) {

-   dev_err(adev->dev, "unsupported TA header version\n");
-   err = -EINVAL;
-   goto out;
-   }
+   if (le16_to_cpu(ta_hdr->header.header_version_major) != 2)
+   return -EINVAL;
  
  	if (le32_to_cpu(ta_hdr->ta_fw_bin_coun

Re: [PATCH v6 20/45] drm/amd: Parse both v1 and v2 TA microcode headers using same function

2023-01-05 Thread Lazar, Lijo
[AMD Official Use Only - General]

In general, one file carries multiple binaries. Maybe the check is there as the 
same binary file is used in a derivative of Renoir or PSP v12 based SOCs.

Keeping size of the binary as 0 prevents the load (I think there's a check).

Apart from this one, rest of the series is

Reviewed-by: Lijo Lazar mailto:lijo.la...@amd.com>>


Thanks,
Lijo

From: Limonciello, Mario 
Sent: Thursday, January 5, 2023 9:34:49 PM
To: Lazar, Lijo ; Deucher, Alexander 
; linux-ker...@vger.kernel.org 

Cc: Javier Martinez Canillas ; Carlos Soriano Sanchez 
; amd-...@lists.freedesktop.org 
; dri-devel@lists.freedesktop.org 
; David Airlie ; Daniel 
Vetter ; Koenig, Christian ; Pan, 
Xinhui 
Subject: RE: [PATCH v6 20/45] drm/amd: Parse both v1 and v2 TA microcode 
headers using same function

[AMD Official Use Only - General]



> -Original Message-
> From: Lazar, Lijo 
> Sent: Thursday, January 5, 2023 07:22
> To: Limonciello, Mario ; Deucher, Alexander
> ; linux-ker...@vger.kernel.org
> Cc: Javier Martinez Canillas ; Carlos Soriano Sanchez
> ; amd-...@lists.freedesktop.org; dri-
> de...@lists.freedesktop.org; David Airlie ; Daniel Vetter
> ; Koenig, Christian ; Pan,
> Xinhui 
> Subject: Re: [PATCH v6 20/45] drm/amd: Parse both v1 and v2 TA microcode
> headers using same function
>
>
>
> On 1/5/2023 9:12 AM, Mario Limonciello wrote:
> > Several IP versions duplicate code and can't use the common helpers.
> > Move this code into a single function so that the helpers can be used.
> >
> > Reviewed-by: Alex Deucher 
> > Signed-off-by: Mario Limonciello 
> > ---
> > v5->v6:
> >   * Rebase on earlier patches
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 120
> ++--
> >   drivers/gpu/drm/amd/amdgpu/psp_v10_0.c  |  64 +
> >   drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  |  77 ++-
> >   drivers/gpu/drm/amd/amdgpu/psp_v12_0.c  |  62 +---
> >   4 files changed, 109 insertions(+), 214 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > index 7a2fc920739b..d971e3785eaf 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > @@ -3272,41 +3272,75 @@ static int parse_ta_bin_descriptor(struct
> psp_context *psp,
> >  return 0;
> >   }
> >
> > -int psp_init_ta_microcode(struct psp_context *psp,
> > - const char *chip_name)
> > +static int parse_ta_v1_microcode(struct psp_context *psp)
> >   {
> > +   const struct ta_firmware_header_v1_0 *ta_hdr;
> >  struct amdgpu_device *adev = psp->adev;
> > -   char fw_name[PSP_FW_NAME_LEN];
> > -   const struct ta_firmware_header_v2_0 *ta_hdr;
> > -   int err = 0;
> > -   int ta_index = 0;
> >
> > -   if (!chip_name) {
> > -   dev_err(adev->dev, "invalid chip name for ta microcode\n");
> > +   ta_hdr = (const struct ta_firmware_header_v1_0 *)
> > +adev->psp.ta_fw->data;
> > +
> > +   if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
> >  return -EINVAL;
> > +
> > +   adev->psp.xgmi_context.context.bin_desc.fw_version =
> > +   le32_to_cpu(ta_hdr->xgmi.fw_version);
> > +   adev->psp.xgmi_context.context.bin_desc.size_bytes =
> > +   le32_to_cpu(ta_hdr->xgmi.size_bytes);
> > +   adev->psp.xgmi_context.context.bin_desc.start_addr =
> > +   (uint8_t *)ta_hdr +
> > +   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
> > +   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr-
> >header.ucode_version);
> > +   adev->psp.ras_context.context.bin_desc.fw_version =
> > +   le32_to_cpu(ta_hdr->ras.fw_version);
> > +   adev->psp.ras_context.context.bin_desc.size_bytes =
> > +   le32_to_cpu(ta_hdr->ras.size_bytes);
> > +   adev->psp.ras_context.context.bin_desc.start_addr =
> > +   (uint8_t *)adev-
> >psp.xgmi_context.context.bin_desc.start_addr +
> > +   le32_to_cpu(ta_hdr->ras.offset_bytes);
> > +   adev->psp.hdcp_context.context.bin_desc.fw_version =
> > +   le32_to_cpu(ta_hdr->hdcp.fw_version);
> > +   adev->psp.hdcp_context.context.bin_desc.size_bytes =
> > +   le32_to_cpu(ta_hdr->hdcp.size_bytes);
> > +   adev->psp.hdcp_context.context.bin_desc.start_addr =
> > +   (uint8_t *)ta_hdr +
> > +   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes

Re: [PATCH v6 20/45] drm/amd: Parse both v1 and v2 TA microcode headers using same function

2023-01-05 Thread Lazar, Lijo




On 1/5/2023 9:12 AM, Mario Limonciello wrote:

Several IP versions duplicate code and can't use the common helpers.
Move this code into a single function so that the helpers can be used.

Reviewed-by: Alex Deucher 
Signed-off-by: Mario Limonciello 
---
v5->v6:
  * Rebase on earlier patches
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 120 ++--
  drivers/gpu/drm/amd/amdgpu/psp_v10_0.c  |  64 +
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  |  77 ++-
  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c  |  62 +---
  4 files changed, 109 insertions(+), 214 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 7a2fc920739b..d971e3785eaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3272,41 +3272,75 @@ static int parse_ta_bin_descriptor(struct psp_context 
*psp,
return 0;
  }
  
-int psp_init_ta_microcode(struct psp_context *psp,

- const char *chip_name)
+static int parse_ta_v1_microcode(struct psp_context *psp)
  {
+   const struct ta_firmware_header_v1_0 *ta_hdr;
struct amdgpu_device *adev = psp->adev;
-   char fw_name[PSP_FW_NAME_LEN];
-   const struct ta_firmware_header_v2_0 *ta_hdr;
-   int err = 0;
-   int ta_index = 0;
  
-	if (!chip_name) {

-   dev_err(adev->dev, "invalid chip name for ta microcode\n");
+   ta_hdr = (const struct ta_firmware_header_v1_0 *)
+adev->psp.ta_fw->data;
+
+   if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
return -EINVAL;
+
+   adev->psp.xgmi_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->xgmi.fw_version);
+   adev->psp.xgmi_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->xgmi.size_bytes);
+   adev->psp.xgmi_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.ras_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->ras.fw_version);
+   adev->psp.ras_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->ras.size_bytes);
+   adev->psp.ras_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->ras.offset_bytes);
+   adev->psp.hdcp_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->hdcp.fw_version);
+   adev->psp.hdcp_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->hdcp.size_bytes);
+   adev->psp.hdcp_context.context.bin_desc.start_addr =
+   (uint8_t *)ta_hdr +
+   le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+   adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+   adev->psp.dtm_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->dtm.fw_version);
+   adev->psp.dtm_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->dtm.size_bytes);
+   adev->psp.dtm_context.context.bin_desc.start_addr =
+   (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->dtm.offset_bytes);
+   if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+   adev->psp.securedisplay_context.context.bin_desc.fw_version =
+   le32_to_cpu(ta_hdr->securedisplay.fw_version);
+   adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+   le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+   adev->psp.securedisplay_context.context.bin_desc.start_addr =
+   (uint8_t 
*)adev->psp.hdcp_context.context.bin_desc.start_addr +
+   le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
}


psp_v10_0_init_microcode used to get securedisplay_context 
unconditionally and now this is restricted to RENOIR following the logic 
in psp v12. Better is to fetch all FW details unconditionally and make 
the size_bytes to 0 (just to be sure) in specific PSP versions to 
prevent their load.


Thanks,
Lijo

  
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);

-   err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
-   if (err)
-   goto out;
+   return 0;
+}
  
-	err = amdgpu_ucode_validate(adev->psp.ta_fw);

-   if (err)
-   goto out;
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+   const struct ta_firmware_header_v2_0 *ta_hdr;
+   struct amdgpu_device *adev = psp->adev;
+   int err = 0;
+   int ta_index = 0;
  
  	ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
  
-	if (le16_to_cpu(ta_hdr->header.hea

Re: [PATCH v6 05/45] drm/amd: Add a new helper for loading/validating microcode

2023-01-04 Thread Lazar, Lijo




On 1/5/2023 10:53 AM, Mario Limonciello wrote:

On 1/4/23 23:07, Lazar, Lijo wrote:



On 1/5/2023 9:12 AM, Mario Limonciello wrote:

All microcode runs a basic validation after it's been loaded. Each
IP block as part of init will run both.

Introduce a wrapper for request_firmware and amdgpu_ucode_validate.
This wrapper will also remap any error codes from request_firmware
to -ENODEV.  This is so that early_init will fail if firmware couldn't
be loaded instead of the IP block being disabled.

Signed-off-by: Mario Limonciello 
---
v5->v6:
  * Fix argument to be ** not *
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 36 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  3 ++
  2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c

index eafcddce58d3..8ebfec12da87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1312,3 +1312,39 @@ void amdgpu_ucode_ip_version_decode(struct 
amdgpu_device *adev, int block_type,
  snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, 
rev);

  }
+
+/*
+ * amdgpu_ucode_request - Fetch and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and 
amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, 
remap
+ * the error code to -ENODEV, so that early_init functions will fail 
to load.

+ */
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct 
firmware **fw,

+ const char *fw_name)
+{
+    int err = request_firmware(fw, fw_name, adev->dev);
+
+    if (err)
+    return -ENODEV;
+    err = amdgpu_ucode_validate(*fw);
+    if (err)
+    dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+


Missed this earlier. If validate fails, shouldn't this undo the 
request operation by calling release?


Actually that was original design, but there is one place in the 
codebase that expects that ucode validation can fail, and so leave the 
evaluate of error code and cleanup outside of helper.




I see. Does request_firmware assure that fw pointer be always NULL if it 
fails? Or should that be done here if request_ fails? In subsequent 
patches, I see clients calling release without checking what caused the 
failure.


Thanks,
Lijo



Thanks,
Lijo


+    return err;
+}
+
+/*
+ * amdgpu_ucode_release - Release firmware microcode
+ *
+ * @fw: pointer to firmware to release
+ */
+void amdgpu_ucode_release(const struct firmware **fw)
+{
+    release_firmware(*fw);
+    *fw = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h

index 552e06929229..848579d4988b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -544,6 +544,9 @@ void amdgpu_ucode_print_sdma_hdr(const struct 
common_firmware_header *hdr);
  void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header 
*hdr);
  void amdgpu_ucode_print_gpu_info_hdr(const struct 
common_firmware_header *hdr);

  int amdgpu_ucode_validate(const struct firmware *fw);
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct 
firmware **fw,

+ const char *fw_name);
+void amdgpu_ucode_release(const struct firmware **fw);
  bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
  uint16_t hdr_major, uint16_t hdr_minor);




Re: [PATCH v6 05/45] drm/amd: Add a new helper for loading/validating microcode

2023-01-04 Thread Lazar, Lijo




On 1/5/2023 9:12 AM, Mario Limonciello wrote:

All microcode runs a basic validation after it's been loaded. Each
IP block as part of init will run both.

Introduce a wrapper for request_firmware and amdgpu_ucode_validate.
This wrapper will also remap any error codes from request_firmware
to -ENODEV.  This is so that early_init will fail if firmware couldn't
be loaded instead of the IP block being disabled.

Signed-off-by: Mario Limonciello 
---
v5->v6:
  * Fix argument to be ** not *
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 36 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  3 ++
  2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index eafcddce58d3..8ebfec12da87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1312,3 +1312,39 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device 
*adev, int block_type,
  
  	snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);

  }
+
+/*
+ * amdgpu_ucode_request - Fetch and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware 
**fw,
+const char *fw_name)
+{
+   int err = request_firmware(fw, fw_name, adev->dev);
+
+   if (err)
+   return -ENODEV;
+   err = amdgpu_ucode_validate(*fw);
+   if (err)
+   dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+


Missed this earlier. If validate fails, shouldn't this undo the request 
operation by calling release?


Thanks,
Lijo


+   return err;
+}
+
+/*
+ * amdgpu_ucode_release - Release firmware microcode
+ *
+ * @fw: pointer to firmware to release
+ */
+void amdgpu_ucode_release(const struct firmware **fw)
+{
+   release_firmware(*fw);
+   *fw = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 552e06929229..848579d4988b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -544,6 +544,9 @@ void amdgpu_ucode_print_sdma_hdr(const struct 
common_firmware_header *hdr);
  void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
  void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header 
*hdr);
  int amdgpu_ucode_validate(const struct firmware *fw);
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware 
**fw,
+const char *fw_name);
+void amdgpu_ucode_release(const struct firmware **fw);
  bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
  


Re: [PATCH v4 07/27] drm/amd: Convert SDMA to use `amdgpu_ucode_ip_version_decode`

2023-01-03 Thread Lazar, Lijo




On 1/4/2023 3:48 AM, Mario Limonciello wrote:

Simplifies the code so that all SDMA versions will get the firmware
name from `amdgpu_ucode_ip_version_decode`.

Signed-off-by: Mario Limonciello 
---
v3->v4:
  * Move out of IP discovery and instead simplify early_init
v2->v3:
  * Fix dGPU naming scheme
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c |  7 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  4 +-
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 47 +---
  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 30 +
  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 55 +---
  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 25 +--
  6 files changed, 13 insertions(+), 155 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 9e85a078d918..83e8f0dae647 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -200,15 +200,18 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device 
*adev,
  }
  
  int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,

-  char *fw_name, u32 instance,
-  bool duplicate)
+  u32 instance, bool duplicate)
  {
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
int err = 0, i;
const struct sdma_firmware_header_v2_0 *sdma_hdr;
uint16_t version_major;
+   char ucode_prefix[30];
+   char fw_name[40];
  
+	amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));

+   snprintf(fw_name, sizeof(fw_name), "amdgpu/%s%s.bin", ucode_prefix, !instance ? 
"" : "1");


It is safer to keep the original logic with instance number as suffix 
rather than hardcoding to 1.


Thanks,
Lijo


err = amdgpu_ucode_load(adev, &adev->sdma.instance[instance].fw, 
fw_name);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 7d99205c2e01..2d16e6d36728 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -124,8 +124,8 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device 
*adev,
  int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
  struct amdgpu_irq_src *source,
  struct amdgpu_iv_entry *entry);
-int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
-char *fw_name, u32 instance, bool duplicate);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+  bool duplicate);
  void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
  bool duplicate);
  void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4d780e4430e7..017ae298558e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -575,60 +575,17 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device 
*adev)
  // vega10 real chip need to use PSP to load firmware
  static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
  {
-   const char *chip_name;
-   char fw_name[30];
int ret, i;
  
-	DRM_DEBUG("\n");

-
-   switch (adev->ip_versions[SDMA0_HWIP][0]) {
-   case IP_VERSION(4, 0, 0):
-   chip_name = "vega10";
-   break;
-   case IP_VERSION(4, 0, 1):
-   chip_name = "vega12";
-   break;
-   case IP_VERSION(4, 2, 0):
-   chip_name = "vega20";
-   break;
-   case IP_VERSION(4, 1, 0):
-   case IP_VERSION(4, 1, 1):
-   if (adev->apu_flags & AMD_APU_IS_RAVEN2)
-   chip_name = "raven2";
-   else if (adev->apu_flags & AMD_APU_IS_PICASSO)
-   chip_name = "picasso";
-   else
-   chip_name = "raven";
-   break;
-   case IP_VERSION(4, 2, 2):
-   chip_name = "arcturus";
-   break;
-   case IP_VERSION(4, 1, 2):
-   if (adev->apu_flags & AMD_APU_IS_RENOIR)
-   chip_name = "renoir";
-   else
-   chip_name = "green_sardine";
-   break;
-   case IP_VERSION(4, 4, 0):
-   chip_name = "aldebaran";
-   break;
-   default:
-   BUG();
-   }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
-   if (i == 0)
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma.bin", chip_name);
-   else
-   snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma%d.bin", chip_name, i);
if (adev->ip_versions[SDMA0_HWIP]

Re: [PATCH v4 05/27] drm/amd: Add a new helper for loading/validating microcode

2023-01-03 Thread Lazar, Lijo




On 1/4/2023 3:48 AM, Mario Limonciello wrote:

All microcode runs a basic validation after it's been loaded. Each
IP block as part of init will run both.

Introduce a wrapper for request_firmware and amdgpu_ucode_validate.
This wrapper will also remap any error codes from request_firmware
to -ENODEV.  This is so that early_init will fail if firmware couldn't
be loaded instead of the IP block being disabled.

Signed-off-by: Mario Limonciello 
---
v3-v4:
  * New patch
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 24 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h |  1 +
  2 files changed, 25 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index eafcddce58d3..8c4a7b09e344 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1312,3 +1312,27 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device 
*adev, int block_type,
  
  	snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);

  }
+
+/*
+ * amdgpu_ucode_load - Load and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @fw_name: firmware to load
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name)


'load' also takes a different meaning of loading firmware to ASIC. Maybe 
keep it as 'get' and keep another corresponding common 'put' for 
release_firmware?


Thanks,
Lijo


+{
+   int err = request_firmware(fw, fw_name, adev->dev);
+
+   if (err)
+   return -ENODEV;
+   err = amdgpu_ucode_validate(*fw);
+   if (err)
+   dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
+
+   return err;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 552e06929229..b9139fb44506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -544,6 +544,7 @@ void amdgpu_ucode_print_sdma_hdr(const struct 
common_firmware_header *hdr);
  void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
  void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header 
*hdr);
  int amdgpu_ucode_validate(const struct firmware *fw);
+int amdgpu_ucode_load(struct amdgpu_device *adev, const struct firmware **fw, 
char *fw_name);
  bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
  


Re: [PATCH v2 00/11] Recover from failure to probe GPU

2023-01-03 Thread Lazar, Lijo




On 12/28/2022 10:00 PM, Mario Limonciello wrote:

One of the first thing that KMS drivers do during initialization is
destroy the system firmware framebuffer by means of
`drm_aperture_remove_conflicting_pci_framebuffers`

This means that if for any reason the GPU failed to probe the user
will be stuck with at best a screen frozen at the last thing that
was shown before the KMS driver continued it's probe.

The problem is most pronounced when new GPU support is introduced
because users will need to have a recent linux-firmware snapshot
on their system when they boot a kernel with matching support.

However the problem is further exaggerated in the case of amdgpu because
it has migrated to "IP discovery" where amdgpu will attempt to load
on "ALL" AMD GPUs even if the driver is missing support for IP blocks
contained in that GPU.

IP discovery requires some probing and isn't run until after the
framebuffer has been destroyed.

This means a situation can occur where a user purchases a new GPU not
yet supported by a distribution and when booting the installer it will
"freeze" even if the distribution doesn't have the matching kernel support
for those IP blocks.

The perfect example of this is Ubuntu 22.10 and the new dGPUs just
launched by AMD.  The installation media ships with kernel 5.19 (which
has IP discovery) but the amdgpu support for those IP blocks landed in
kernel 6.0. The matching linux-firmware was released after 22.10's launch.
The screen will freeze without nomodeset. Even if a user manages to install
and then upgrades to kernel 6.0 after install they'll still have the
problem of missing firmware, and the same experience.

This is quite jarring for users, particularly if they don't know
that they have to use "nomodeset" to install.

To help the situation make changes to GPU discovery:
1) Delay releasing the firmware framebuffer until after IP discovery has
completed.  This will help the situation of an older kernel that doesn't
yet support the IP blocks probing a new GPU.
2) Request loading all PSP, VCN, SDMA, MES and GC microcode into memory
during IP discovery. This will help the situation of new enough kernel for
the IP discovery phase to otherwise pass but missing microcode from
linux-firmware.git.

Not all requested firmware will be loaded during IP discovery as some of it
will require larger driver architecture changes. For example SMU firmware
isn't loaded on certain products, but that's not known until later on when
the early_init phase of the SMU load occurs.

v1->v2:
  * Take the suggestion from v1 thread to delay the framebuffer release until
ip discovery is done. This patch is CC to stable to that older stable
kernels with IP discovery won't try to probe unknown IP.
  * Drop changes to drm aperature.
  * Fetch SDMA, VCN, MES, GC and PSP microcode during IP discovery.



What is the gain here in just checking if firmware files are available? 
It can fail anywhere during sw_init and it's the same situation.


Restricting IP FWs to IP specific files looks better to me than 
centralizing and creating interdependencies.


Thanks,
Lijo


Mario Limonciello (11):
   drm/amd: Delay removal of the firmware framebuffer
   drm/amd: Add a legacy mapping to "amdgpu_ucode_ip_version_decode"
   drm/amd: Convert SMUv11 microcode init to use
 `amdgpu_ucode_ip_version_decode`
   drm/amd: Convert SMU v13 to use `amdgpu_ucode_ip_version_decode`
   drm/amd: Request SDMA microcode during IP discovery
   drm/amd: Request VCN microcode during IP discovery
   drm/amd: Request MES microcode during IP discovery
   drm/amd: Request GFX9 microcode during IP discovery
   drm/amd: Request GFX10 microcode during IP discovery
   drm/amd: Request GFX11 microcode during IP discovery
   drm/amd: Request PSP microcode during IP discovery

  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|   8 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 590 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   6 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   |   2 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c  |   9 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h  |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 208 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  85 +--
  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 180 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c|  64 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 143 +
  drivers/gpu/drm/amd/amdgpu/mes_v10_1.c|  28 -
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  25 +-
  drivers/gpu/drm/amd/amdgpu/psp_v10_0.c| 106 +---
  drivers/gpu/drm/amd/amdgpu/psp_v11_0.c| 165 +
  drivers/gpu/drm/amd/amdgpu/psp_v12_0.c| 102 +--
  drivers/gpu/drm/amd/amdgpu/psp_v13_0.c|  82 ---
  drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c  |  36 --
  drivers/gpu/drm/amd/amdgpu/psp_v3_1.c |  36 --
  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c|  61 +-
  drivers/gpu/drm/am

Re: [PATCH] drm/amdgpu: add mb for si

2022-11-24 Thread Lazar, Lijo



On 11/25/2022 7:43 AM, Quan, Evan wrote:

[AMD Official Use Only - General]




-Original Message-
From: Lazar, Lijo 
Sent: Thursday, November 24, 2022 6:49 PM
To: Quan, Evan ; 李真能 ;
Michel Dänzer ; Koenig, Christian
; Deucher, Alexander

Cc: amd-...@lists.freedesktop.org; Pan, Xinhui ;
linux-ker...@vger.kernel.org; dri-devel@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: add mb for si



On 11/24/2022 4:11 PM, Lazar, Lijo wrote:


On 11/24/2022 3:34 PM, Quan, Evan wrote:

[AMD Official Use Only - General]

Could the attached patch help?

Evan

-Original Message-
From: amd-gfx  On Behalf

Of ???

Sent: Friday, November 18, 2022 5:25 PM
To: Michel Dänzer ; Koenig, Christian
; Deucher, Alexander

Cc: amd-...@lists.freedesktop.org; Pan, Xinhui ;
linux-ker...@vger.kernel.org; dri-devel@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: add mb for si


在 2022/11/18 17:18, Michel Dänzer 写道:

On 11/18/22 09:01, Christian König wrote:

Am 18.11.22 um 08:48 schrieb Zhenneng Li:

During reboot test on arm64 platform, it may failure on boot, so
add this mb in smc.

The error message are as follows:
[    6.996395][ 7] [  T295] [drm:amdgpu_device_ip_late_init
[amdgpu]] *ERROR*
   late_init of IP block  failed -22 [
7.006919][ 7] [  T295] amdgpu :04:00.0:

The issue is happening in late_init() which eventually does

  ret = si_thermal_enable_alert(adev, false);

Just before this, si_thermal_start_thermal_controller is called in
hw_init and that enables thermal alert.

Maybe the issue is with enable/disable of thermal alerts in quick
succession. Adding a delay inside si_thermal_start_thermal_controller
might help.


On a second look, temperature range is already set as part of
si_thermal_start_thermal_controller in hw_init
https://elixir.bootlin.com/linux/v6.1-
rc6/source/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c#L6780

There is no need to set it again here -

https://elixir.bootlin.com/linux/v6.1-
rc6/source/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c#L7635

I think it is safe to remove the call from late_init altogether. Alex/Evan?


[Quan, Evan] Yes, it makes sense to me. But I'm not sure whether that’s related 
with the issue here.
Since per my understandings, if the issue is caused by double calling of 
thermal_alert enablement, it will fail every time.
That cannot explain why adding some delays or a mb() calling can help.


The side effect of the patch is just some random delay introduced for 
every SMC message


The issue happens in late_init(). Between late_init() and dpm 
enablement, there are many smc messages sent which don't have this 
issue. So I think the issue is not with FW not running.


Thus the only case I see is enable/disable of thermal alert in random 
succession.


Thanks,

Lijo


BR
Evan

Thanks,
Lijo


Thanks,
Lijo


amdgpu_device_ip_late_init failed [    7.014224][ 7] [  T295] amdgpu
:04:00.0: Fatal error during GPU init

Memory barries are not supposed to be sprinkled around like this,

you

need to give a detailed explanation why this is necessary.

Regards,
Christian.


Signed-off-by: Zhenneng Li 
---
     drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c | 2 ++
     1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
index 8f994ffa9cd1..c7656f22278d 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
@@ -155,6 +155,8 @@ bool amdgpu_si_is_smc_running(struct
amdgpu_device *adev)
     u32 rst = RREG32_SMC(SMC_SYSCON_RESET_CNTL);
     u32 clk = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
     +    mb();
+
     if (!(rst & RST_REG) && !(clk & CK_DISABLE))
     return true;

In particular, it makes no sense in this specific place, since it
cannot directly

affect the values of rst & clk.

I thinks so too.

But when I do reboot test using nine desktop machines,  there maybe
report
this error on one or two machines after Hundreds of times or
Thousands of
times reboot test, at the beginning, I use msleep() instead of mb(),
these
two methods are all works, but I don't know what is the root case.

I use this method on other verdor's oland card, this error message are
reported again.

What could be the root reason?

test environmen:

graphics card: OLAND 0x1002:0x6611 0x1642:0x1869 0x87

driver: amdgpu

os: ubuntu 2004

platform: arm64

kernel: 5.4.18



Re: [PATCH] drm/amdgpu: add mb for si

2022-11-24 Thread Lazar, Lijo




On 11/24/2022 4:11 PM, Lazar, Lijo wrote:



On 11/24/2022 3:34 PM, Quan, Evan wrote:

[AMD Official Use Only - General]

Could the attached patch help?

Evan

-Original Message-
From: amd-gfx  On Behalf Of ???
Sent: Friday, November 18, 2022 5:25 PM
To: Michel Dänzer ; Koenig, Christian
; Deucher, Alexander

Cc: amd-...@lists.freedesktop.org; Pan, Xinhui ;
linux-ker...@vger.kernel.org; dri-devel@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: add mb for si


在 2022/11/18 17:18, Michel Dänzer 写道:

On 11/18/22 09:01, Christian König wrote:

Am 18.11.22 um 08:48 schrieb Zhenneng Li:

During reboot test on arm64 platform, it may failure on boot, so add
this mb in smc.

The error message are as follows:
[    6.996395][ 7] [  T295] [drm:amdgpu_device_ip_late_init
[amdgpu]] *ERROR*
  late_init of IP block  failed -22 [
7.006919][ 7] [  T295] amdgpu :04:00.0:


The issue is happening in late_init() which eventually does

 ret = si_thermal_enable_alert(adev, false);

Just before this, si_thermal_start_thermal_controller is called in 
hw_init and that enables thermal alert.


Maybe the issue is with enable/disable of thermal alerts in quick 
succession. Adding a delay inside si_thermal_start_thermal_controller 
might help.




On a second look, temperature range is already set as part of 
si_thermal_start_thermal_controller in hw_init

https://elixir.bootlin.com/linux/v6.1-rc6/source/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c#L6780

There is no need to set it again here -

https://elixir.bootlin.com/linux/v6.1-rc6/source/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c#L7635

I think it is safe to remove the call from late_init altogether. Alex/Evan?

Thanks,
Lijo


Thanks,
Lijo


amdgpu_device_ip_late_init failed [    7.014224][ 7] [  T295] amdgpu
:04:00.0: Fatal error during GPU init

Memory barries are not supposed to be sprinkled around like this, you

need to give a detailed explanation why this is necessary.


Regards,
Christian.


Signed-off-by: Zhenneng Li 
---
    drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c | 2 ++
    1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
index 8f994ffa9cd1..c7656f22278d 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
@@ -155,6 +155,8 @@ bool amdgpu_si_is_smc_running(struct
amdgpu_device *adev)
    u32 rst = RREG32_SMC(SMC_SYSCON_RESET_CNTL);
    u32 clk = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
    +    mb();
+
    if (!(rst & RST_REG) && !(clk & CK_DISABLE))
    return true;
In particular, it makes no sense in this specific place, since it 
cannot directly

affect the values of rst & clk.

I thinks so too.

But when I do reboot test using nine desktop machines,  there maybe 
report
this error on one or two machines after Hundreds of times or 
Thousands of
times reboot test, at the beginning, I use msleep() instead of mb(), 
these

two methods are all works, but I don't know what is the root case.

I use this method on other verdor's oland card, this error message are
reported again.

What could be the root reason?

test environmen:

graphics card: OLAND 0x1002:0x6611 0x1642:0x1869 0x87

driver: amdgpu

os: ubuntu 2004

platform: arm64

kernel: 5.4.18





Re: [PATCH] drm/amdgpu: add mb for si

2022-11-24 Thread Lazar, Lijo




On 11/24/2022 3:34 PM, Quan, Evan wrote:

[AMD Official Use Only - General]

Could the attached patch help?

Evan

-Original Message-
From: amd-gfx  On Behalf Of ???
Sent: Friday, November 18, 2022 5:25 PM
To: Michel Dänzer ; Koenig, Christian
; Deucher, Alexander

Cc: amd-...@lists.freedesktop.org; Pan, Xinhui ;
linux-ker...@vger.kernel.org; dri-devel@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: add mb for si


在 2022/11/18 17:18, Michel Dänzer 写道:

On 11/18/22 09:01, Christian König wrote:

Am 18.11.22 um 08:48 schrieb Zhenneng Li:

During reboot test on arm64 platform, it may failure on boot, so add
this mb in smc.

The error message are as follows:
[6.996395][ 7] [  T295] [drm:amdgpu_device_ip_late_init
[amdgpu]] *ERROR*
  late_init of IP block  failed -22 [
7.006919][ 7] [  T295] amdgpu :04:00.0:


The issue is happening in late_init() which eventually does

ret = si_thermal_enable_alert(adev, false);

Just before this, si_thermal_start_thermal_controller is called in 
hw_init and that enables thermal alert.


Maybe the issue is with enable/disable of thermal alerts in quick 
succession. Adding a delay inside si_thermal_start_thermal_controller 
might help.


Thanks,
Lijo


amdgpu_device_ip_late_init failed [7.014224][ 7] [  T295] amdgpu
:04:00.0: Fatal error during GPU init

Memory barries are not supposed to be sprinkled around like this, you

need to give a detailed explanation why this is necessary.


Regards,
Christian.


Signed-off-by: Zhenneng Li 
---
drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
index 8f994ffa9cd1..c7656f22278d 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
@@ -155,6 +155,8 @@ bool amdgpu_si_is_smc_running(struct
amdgpu_device *adev)
u32 rst = RREG32_SMC(SMC_SYSCON_RESET_CNTL);
u32 clk = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
+mb();
+
if (!(rst & RST_REG) && !(clk & CK_DISABLE))
return true;

In particular, it makes no sense in this specific place, since it cannot 
directly

affect the values of rst & clk.

I thinks so too.

But when I do reboot test using nine desktop machines,  there maybe report
this error on one or two machines after Hundreds of times or Thousands of
times reboot test, at the beginning, I use msleep() instead of mb(), these
two methods are all works, but I don't know what is the root case.

I use this method on other verdor's oland card, this error message are
reported again.

What could be the root reason?

test environmen:

graphics card: OLAND 0x1002:0x6611 0x1642:0x1869 0x87

driver: amdgpu

os: ubuntu 2004

platform: arm64

kernel: 5.4.18





Re: [PATCH 1/2] drm/radeon: Fix PCI device refcount leak in radeon_atrm_get_bios()

2022-11-22 Thread Lazar, Lijo
[AMD Official Use Only - General]

When only second GPU has valid ATRM handle -
then it stays inside the loop and in the next call to pci_get_class(), it 
passes pdev reference to first GPU as the "from" param. That time it drops the 
reference count of "from" device.

Thanks,
Lijo

From: Alex Deucher 
Sent: Tuesday, November 22, 2022 9:55:33 PM
To: Lazar, Lijo 
Cc: Xiongfeng Wang ; Deucher, Alexander 
; Koenig, Christian ; Pan, 
Xinhui ; airl...@gmail.com ; 
dan...@ffwll.ch ; Zhang, Hawking ; 
dri-devel@lists.freedesktop.org ; 
amd-...@lists.freedesktop.org ; 
yangyingli...@huawei.com 
Subject: Re: [PATCH 1/2] drm/radeon: Fix PCI device refcount leak in 
radeon_atrm_get_bios()

On Tue, Nov 22, 2022 at 9:59 AM Lazar, Lijo  wrote:
>
>
>
> On 11/22/2022 8:19 PM, Alex Deucher wrote:
> > On Tue, Nov 22, 2022 at 6:12 AM Xiongfeng Wang
> >  wrote:
> >>
> >> As comment of pci_get_class() says, it returns a pci_device with its
> >> refcount increased and decreased the refcount for the input parameter
> >> @from if it is not NULL.
> >>
> >> If we break the loop in radeon_atrm_get_bios() with 'pdev' not NULL, we
> >> need to call pci_dev_put() to decrease the refcount. Add the missing
> >> pci_dev_put() to avoid refcount leak.
> >
> > For both patches, I think pci_dev_put() needs to go into the loops.
> > There are 2 or more GPUs on the systems where this is relevant.
> >
>
> As per the logic, the loop breaks when it finds a valid ATRM handle. So
> dev_put is required only for that device.

Sure, but what if the handle is on the second DISPLAY_VGA or
DISPLAY_OTHER class PCI device on the system?  We've already called
pci_get_class() for the first PCI device that matched.

Alex

>
> When inside the loop this happens -  "decreased the refcount for the
> input parameter @from if it is not NULL"
>
> Thanks,
> Lijo
>
> > Alex
> >
> >>
> >> Fixes: d8ade3526b2a ("drm/radeon: handle non-VGA class pci devices with 
> >> ATRM")
> >> Fixes: c61e2775873f ("drm/radeon: split ATRM support out from the ATPX 
> >> handler (v3)")
> >> Signed-off-by: Xiongfeng Wang 
> >> ---
> >>   drivers/gpu/drm/radeon/radeon_bios.c | 1 +
> >>   1 file changed, 1 insertion(+)
> >>
> >> diff --git a/drivers/gpu/drm/radeon/radeon_bios.c 
> >> b/drivers/gpu/drm/radeon/radeon_bios.c
> >> index 33121655d50b..2df6ce3e32cb 100644
> >> --- a/drivers/gpu/drm/radeon/radeon_bios.c
> >> +++ b/drivers/gpu/drm/radeon/radeon_bios.c
> >> @@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device 
> >> *rdev)
> >>
> >>  if (!found)
> >>  return false;
> >> +   pci_dev_put(pdev);
> >>
> >>  rdev->bios = kmalloc(size, GFP_KERNEL);
> >>  if (!rdev->bios) {
> >> --
> >> 2.20.1
> >>
<>

Re: [PATCH 1/2] drm/radeon: Fix PCI device refcount leak in radeon_atrm_get_bios()

2022-11-22 Thread Lazar, Lijo




On 11/22/2022 8:19 PM, Alex Deucher wrote:

On Tue, Nov 22, 2022 at 6:12 AM Xiongfeng Wang
 wrote:


As comment of pci_get_class() says, it returns a pci_device with its
refcount increased and decreased the refcount for the input parameter
@from if it is not NULL.

If we break the loop in radeon_atrm_get_bios() with 'pdev' not NULL, we
need to call pci_dev_put() to decrease the refcount. Add the missing
pci_dev_put() to avoid refcount leak.


For both patches, I think pci_dev_put() needs to go into the loops.
There are 2 or more GPUs on the systems where this is relevant.



As per the logic, the loop breaks when it finds a valid ATRM handle. So 
dev_put is required only for that device.


When inside the loop this happens -  "decreased the refcount for the 
input parameter @from if it is not NULL"


Thanks,
Lijo


Alex



Fixes: d8ade3526b2a ("drm/radeon: handle non-VGA class pci devices with ATRM")
Fixes: c61e2775873f ("drm/radeon: split ATRM support out from the ATPX handler 
(v3)")
Signed-off-by: Xiongfeng Wang 
---
  drivers/gpu/drm/radeon/radeon_bios.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/radeon_bios.c 
b/drivers/gpu/drm/radeon/radeon_bios.c
index 33121655d50b..2df6ce3e32cb 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)

 if (!found)
 return false;
+   pci_dev_put(pdev);

 rdev->bios = kmalloc(size, GFP_KERNEL);
 if (!rdev->bios) {
--
2.20.1



Re: [PATCH v1] drivers/amd/kv_dpm: check the return value of amdgpu_kv_smc_bapm_enable

2022-09-22 Thread Lazar, Lijo




On 9/23/2022 1:36 AM, Li Zhong wrote:

Check the return value of amdgpu_kv_smc_bapm_enable() and log the error
when it fails.

Signed-off-by: Li Zhong 
---
  drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c 
b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index 8fd0782a2b20..d392256effe2 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -1384,13 +1384,16 @@ static int kv_dpm_enable(struct amdgpu_device *adev)
  static void kv_dpm_disable(struct amdgpu_device *adev)
  {
struct kv_power_info *pi = kv_get_pi(adev);
+   int err;
  
  	amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,

   AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
   AMDGPU_THERMAL_IRQ_HIGH_TO_LOW);
  
-	amdgpu_kv_smc_bapm_enable(adev, false);

+   err = amdgpu_kv_smc_bapm_enable(adev, false);
+   if (ret)
+   DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n");


Return code is captured in 'err' and check is for ret' variable.

BTW, does this code compile?

Thanks,
Lijo

  
  	if (adev->asic_type == CHIP_MULLINS)

kv_enable_nb_dpm(adev, false);



Re: [PATCH] drm/ttm: fix ttm tt init fail when size exceeds kmalloc limit

2022-04-20 Thread Lazar, Lijo




On 4/20/2022 6:26 PM, Christian König wrote:

Am 20.04.22 um 14:54 schrieb Wang, Yang(Kevin):


[AMD Official Use Only]


Hi Chris,

1) Change the test case to use something larger than 1TiB.
sure, we can increase the size of BO and make test pass,
but if user really want to allocate 1TB GTT BO, we have no reason to 
let it fail? right?


No, the reason is the underlying core kernel doesn't allow kvmalloc 
allocations with GFP_ZERO which are large enough to hold the array of 
allocated pages for this.


We are working on top of the core Linux kernel and should *NEVER* ever 
add workarounds like what was suggested here. >


AFAIU, for the purpose of ttm use, fallback to vmalloc is fine.

 * Please note that any use of gfp flags outside of GFP_KERNEL is 
careful to not

 * fall back to vmalloc.
 *

Actually the current implementation documents the behavior, but it is 
deep inside the implementation to be noticeable - at least not obvious 
while using kvmalloc_array.


Thanks,
Lijo


Regards,
Christian.


the system availed memory about 2T, but it will still fail.

2) Change kvmalloc to allow GFP_ZERO allocations even in the vmalloc 
fallback path.

    the 5.18 kernel will add this patch to fix this issue .

Best Regards,
Kevin

*From:* Koenig, Christian 
*Sent:* Wednesday, April 20, 2022 8:42 PM
*To:* Wang, Yang(Kevin) ; Christian König 
; dri-devel@lists.freedesktop.org 
; amd-...@lists.freedesktop.org 

*Subject:* Re: [PATCH] drm/ttm: fix ttm tt init fail when size exceeds 
kmalloc limit

Hi Kevin,

yes and that is perfectly valid and expected behavior. There is 
absolutely no need to change anything in TTM here.


What we could do is:
1) Change the test case to use something larger than 1TiB.
2) Change kvmalloc to allow GFP_ZERO allocations even in the vmalloc 
fallback path.


Regards,
Christian.

Am 20.04.22 um 14:39 schrieb Wang, Yang(Kevin):


[AMD Official Use Only]


Hi Chirs,

yes, right, the amdgpu drive rwill use amdgpu_bo_validate_size() 
function to verify bo size,
but when driver try to allocate VRAM domain bo fail, the amdgpu 
driver will fall back to allocate domain = (GTT | VRAM)  bo.
please check following code, it will cause the 2nd time to allocate 
bo fail during allocate 256Mb buffer to store dma address (via 
kvmalloc()).


initial_domain = (u32)(0x & args->in.domains);
retry:
        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
                   initial_domain,
                   flags, ttm_bo_type_device, resv, &gobj);
        if (r && r != -ERESTARTSYS) {
                if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
      flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
      goto retry;
                }

                if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
      initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
      goto retry;
                }
DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
              size, initial_domain, args->in.alignment, r);
        }

Best Regards,
Kevin


*From:* Christian König  


*Sent:* Wednesday, April 20, 2022 7:55 PM
*To:* Wang, Yang(Kevin)  
; Koenig, Christian 
 ; 
dri-devel@lists.freedesktop.org 
 
 
; 
amd-...@lists.freedesktop.org  
 
*Subject:* Re: [PATCH] drm/ttm: fix ttm tt init fail when size 
exceeds kmalloc limit

Hi Kevin,

no, the test case should already fail in amdgpu_bo_validate_size().

If we have a system with 2TiB of memory where the test case could 
succeed then we should increase the requested size to something larger.


And if the underlying core Linux kernel functions don't allow 
allocations as large as the requested one we should *NEVER* ever add 
workarounds like this.


It is perfectly expected that this test case is not able to fulfill 
the desired allocation. That it fails during kvmalloc is unfortunate, 
but not a show stopper.


Regards,
Christian.


Am 20.04.22 um 13:32 schrieb Wang, Yang(Kevin):


[AMD Official Use Only]


Hi Chris,

you misunderstood background about this case.

although we expect this test case to fail, it should fail at the 
location where the Bo actual memory is actually allocated. now the 
code logic will cause the failure to allocate memory to store DMA 
address.


e.g: the case is failed in 2TB system ram machine, it should be 
allocated successful, but it is failed.


allocate 1TB BO, the ttm should allocate 1TB/4k * 8 buffer to store 
allocate result (page address), this should not be failed usually.


There is a similar fix in upstream kernel 5.18, before this fix 
entered the TTM code, this problem existed in TTM.


kernel/git/tor

Re: [PATCHv4] drm/amdgpu: disable ASPM on Intel Alder Lake based systems

2022-04-12 Thread Lazar, Lijo




On 4/13/2022 3:20 AM, Richard Gong wrote:

Active State Power Management (ASPM) feature is enabled since kernel 5.14.
There are some AMD GFX cards (such as WX3200 and RX640) that won't work
with ASPM-enabled Intel Alder Lake based systems. Using these GFX cards as
video/display output, Intel Alder Lake based systems will hang during
suspend/resume.

The issue was initially reported on one system (Dell Precision 3660 with
BIOS version 0.14.81), but was later confirmed to affect at least 4 Alder
Lake based systems.

Add extra check to disable ASPM on Intel Alder Lake based systems.

Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
Reported-by: kernel test robot 
Signed-off-by: Richard Gong 


Reviewed-by: Lijo Lazar 

Thanks,
Lijo


---
v4: s/CONFIG_X86_64/CONFIG_X86
 enhanced check logic
v3: s/intel_core_asom_chk/aspm_support_quirk_check
 correct build error with W=1 option
v2: correct commit description
 move the check from chip family to problematic platform
---
  drivers/gpu/drm/amd/amdgpu/vi.c | 17 -
  1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 039b90cdc3bc..b33e0a9bee65 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -81,6 +81,10 @@
  #include "mxgpu_vi.h"
  #include "amdgpu_dm.h"
  
+#if IS_ENABLED(CONFIG_X86)

+#include 
+#endif
+
  #define ixPCIE_LC_L1_PM_SUBSTATE  0x100100C6
  #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK  
0x0001L
  #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK  0x0002L
@@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
WREG32_PCIE(ixPCIE_LC_CNTL, data);
  }
  
+static bool aspm_support_quirk_check(void)

+{
+   if (IS_ENABLED(CONFIG_X86)) {
+   struct cpuinfo_x86 *c = &cpu_data(0);
+
+   return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+   }
+
+   return true;
+}
+
  static void vi_program_aspm(struct amdgpu_device *adev)
  {
u32 data, data1, orig;
bool bL1SS = false;
bool bClkReqSupport = true;
  
-	if (!amdgpu_device_should_use_aspm(adev))

+   if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
return;
  
  	if (adev->flags & AMD_IS_APU ||




Re: [PATCHv2] drm/amdgpu: disable ASPM on Intel AlderLake based systems

2022-04-10 Thread Lazar, Lijo




On 4/9/2022 12:35 AM, Richard Gong wrote:

Active State Power Management (ASPM) feature is enabled since kernel 5.14.
There are some AMD GFX cards (such as WX3200 and RX640) that cannot be
used with Intel AlderLake based systems to enable ASPM. Using these GFX
cards as video/display output, Intel Alder Lake based systems will hang
during suspend/resume.

Add extra check to disable ASPM on Intel AlderLake based systems.

Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
Signed-off-by: Richard Gong 
---
v2: correct commit description
 move the check from chip family to problematic platform
---
  drivers/gpu/drm/amd/amdgpu/vi.c | 17 -
  1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 039b90cdc3bc..8b4eaf54b23e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -81,6 +81,10 @@
  #include "mxgpu_vi.h"
  #include "amdgpu_dm.h"
  
+#if IS_ENABLED(CONFIG_X86_64)

+#include 
+#endif
+
  #define ixPCIE_LC_L1_PM_SUBSTATE  0x100100C6
  #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK  
0x0001L
  #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK  0x0002L
@@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
WREG32_PCIE(ixPCIE_LC_CNTL, data);
  }
  
+static bool intel_core_apsm_chk(void)


If this is only for Dell systems, use DMI_SYS_VENDOR/DMI_PRODUCT_NAME to 
identify the platform information from SMBIOS.


Better to rename to aspm_support_quirk_check() or similar, and return 
false on is_alderlake() or is_dell_xyz();


Thanks,
Lijo


+{
+#if IS_ENABLED(CONFIG_X86_64)
+   struct cpuinfo_x86 *c = &cpu_data(0);
+
+   return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+   return false;
+#endif
+}
+
  static void vi_program_aspm(struct amdgpu_device *adev)
  {
u32 data, data1, orig;
bool bL1SS = false;
bool bClkReqSupport = true;
  
-	if (!amdgpu_device_should_use_aspm(adev))

+   if (!amdgpu_device_should_use_aspm(adev) || intel_core_apsm_chk())
return;
  
  	if (adev->flags & AMD_IS_APU ||




Re: [RFC v4] drm/amdgpu: Rework reset domain to be refcounted.

2022-02-08 Thread Lazar, Lijo




On 2/2/2022 10:56 PM, Andrey Grodzovsky wrote:

The reset domain contains register access semaphor
now and so needs to be present as long as each device
in a hive needs it and so it cannot be binded to XGMI
hive life cycle.
Adress this by making reset domain refcounted and pointed
by each member of the hive and the hive itself.

v4:
Fix crash on boot with XGMI hive by adding type to reset_domain.
XGMI will only create a new reset_domain if prevoius was of single
device type meaning it's first boot. Otherwsie it will take a
refocunt to exsiting reset_domain from the amdgou device.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 44 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  | 38 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  | 18 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 29 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 +-
  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  |  4 +-
  drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  4 +-
  drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c  |  4 +-
  9 files changed, 118 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8e96b9a14452..f2ba460bfd59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -813,9 +813,7 @@ struct amd_powerplay {
  #define AMDGPU_RESET_MAGIC_NUM 64
  #define AMDGPU_MAX_DF_PERFMONS 4
  
-struct amdgpu_reset_domain {

-   struct workqueue_struct *wq;
-};
+struct amdgpu_reset_domain;
  
  struct amdgpu_device {

struct device   *dev;
@@ -1102,7 +1100,7 @@ struct amdgpu_device {
struct amdgpu_reset_control *reset_cntl;
uint32_t
ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE];
  
-	struct amdgpu_reset_domain	reset_domain;

+   struct amdgpu_reset_domain  *reset_domain;
  };
  
  static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fef952ca8db5..cd1b7af69c35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2313,7 +2313,7 @@ static int amdgpu_device_init_schedulers(struct 
amdgpu_device *adev)
  
  		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,

   ring->num_hw_submission, 
amdgpu_job_hang_limit,
-  timeout, adev->reset_domain.wq, 
ring->sched_score, ring->name);
+  timeout, adev->reset_domain->wq, 
ring->sched_score, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
  ring->name);
@@ -2432,24 +2432,22 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
if (r)
goto init_failed;
  
+	/**

+* In case of XGMI grab extra reference for reset domain for this device
+*/
if (adev->gmc.xgmi.num_physical_nodes > 1) {
-   struct amdgpu_hive_info *hive;
-
-   amdgpu_xgmi_add_device(adev);
+   if (amdgpu_xgmi_add_device(adev) == 0) {
+   struct amdgpu_hive_info *hive = 
amdgpu_get_xgmi_hive(adev);
  
-		hive = amdgpu_get_xgmi_hive(adev);

-   if (!hive || !hive->reset_domain.wq) {
-   DRM_ERROR("Failed to obtain reset domain info for XGMI 
hive:%llx", hive->hive_id);
-   r = -EINVAL;
-   goto init_failed;
-   }
+   if (!hive->reset_domain ||
+   
!kref_get_unless_zero(&hive->reset_domain->refcount)) {
+   r = -ENOENT;
+   goto init_failed;
+   }
  
-		adev->reset_domain.wq = hive->reset_domain.wq;

-   } else {
-   adev->reset_domain.wq = 
alloc_ordered_workqueue("amdgpu-reset-dev", 0);
-   if (!adev->reset_domain.wq) {
-   r = -ENOMEM;
-   goto init_failed;
+   /* Drop the early temporary reset domain we created for 
device */
+   kref_put(&adev->reset_domain->refcount, 
amdgpu_reset_destroy_reset_domain);
+   adev->reset_domain = hive->reset_domain;
}
}
  
@@ -3599,6 +3597,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,

return r;
}
  
+	/*

+* Reset domain needs to be present early, before XGMI hive discovered
+* (if any) and intitialized to use reset sem and in_gpu reset flag
+* early on during init.
+*/
+   adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE 
,"amdgpu-reset-dev");
+

Re: [RFC v3 10/12] drm/amdgpu: Move in_gpu_reset into reset_domain

2022-02-08 Thread Lazar, Lijo




On 1/26/2022 4:07 AM, Andrey Grodzovsky wrote:

We should have a single instance per entrire reset domain.

Signed-off-by: Andrey Grodzovsky 
Suggested-by: Lijo Lazar 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  7 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c  |  4 ++--
  drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c  |  4 ++--
  6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f021cd3c9d34..087796e389ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1056,7 +1056,6 @@ struct amdgpu_device {
boolin_s4;
boolin_s0ix;
  
-	atomic_t 			in_gpu_reset;

enum pp_mp1_state   mp1_state;
struct amdgpu_doorbell_index doorbell_index;
  
@@ -1461,8 +1460,6 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)

 return adev->gmc.tmz_enabled;
  }
  
-static inline int amdgpu_in_reset(struct amdgpu_device *adev)

-{
-   return atomic_read(&adev->in_gpu_reset);
-}
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6991ab4a8191..aa43af443ebe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3511,7 +3511,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
-   atomic_set(&adev->in_gpu_reset, 0);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
  
@@ -4775,7 +4774,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,

  static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
  {
-   atomic_set(&adev->in_gpu_reset, 1);
+   atomic_set(&adev->reset_domain->in_gpu_reset, 1);
  
  	if (hive) {

down_write_nest_lock(&adev->reset_domain->sem, 
&hive->hive_lock);
@@ -4800,7 +4799,7 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
  {
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
-   atomic_set(&adev->in_gpu_reset, 0);
+   atomic_set(&adev->reset_domain->in_gpu_reset, 0);
up_write(&adev->reset_domain->sem);
  }
  
@@ -5643,3 +5642,8 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
  
  	amdgpu_asic_invalidate_hdp(adev, ring);

  }
+
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+   return atomic_read(&adev->reset_domain->in_gpu_reset);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 011585e330f6..e9b804a89b34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -127,6 +127,7 @@ struct amdgpu_reset_domain 
*amdgpu_reset_create_reset_domain(char *wq_name)
  
  	}
  
+	atomic_set(&reset_domain->in_gpu_reset, 0);

init_rwsem(&reset_domain->sem);
  
  	return reset_domain;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 7451089b0c06..413982f4e1ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -74,6 +74,7 @@ struct amdgpu_reset_domain {
struct kref refcount;
struct workqueue_struct *wq;
struct rw_semaphore sem;
+   atomic_t in_gpu_reset;


Maybe 'active' (independent of gpu) just to indicate that a reset is 
ongoing in the domain?


Thanks,
Lijo


  };
  
  
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c

index 5dab06fce26a..6c79746d18db 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -258,7 +258,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
return;
  
  	amdgpu_virt_fini_data_exchange(adev);

-   atomic_set(&adev->in_gpu_reset, 1);
+   atomic_set(&adev->reset_domain->in_gpu_reset, 1);
  
  	xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
  
@@ -271,7 +271,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)

} while (timeout > 1);
  
  flr_done:

-   atomic_set(&adev->in_gpu_reset, 0);
+   atomic_set(&adev->reset_domain->in_gpu_reset, 0);
up_write(&adev->reset_domain->sem);
  
  	/* Trigger recovery for world switch failure if no TDR */

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 868144fff16a..39f7e1e9ab81 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/

Re: [RFC v3 06/12] drm/amdgpu: Drop hive->in_reset

2022-02-07 Thread Lazar, Lijo




On 1/26/2022 4:07 AM, Andrey Grodzovsky wrote:

Since we serialize all resets no need to protect from concurrent
resets.

Signed-off-by: Andrey Grodzovsky 
Reviewed-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  1 -
  3 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 258ec3c0b2af..107a393ebbfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5013,25 +5013,9 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
dev_info(adev->dev, "GPU %s begin!\n",
need_emergency_restart ? "jobs stop":"reset");
  
-	/*

-* Here we trylock to avoid chain of resets executing from
-* either trigger by jobs on different adevs in XGMI hive or jobs on
-* different schedulers for same device while this TO handler is 
running.
-* We always reset all schedulers for device and all devices for XGMI
-* hive so that should take care of them too.
-*/
hive = amdgpu_get_xgmi_hive(adev);
-   if (hive) {
-   if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
-   DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
-   job ? job->base.id : -1, hive->hive_id);
-   amdgpu_put_xgmi_hive(hive);
-   if (job && job->vm)
-   drm_sched_increase_karma(&job->base);
-   return 0;
-   }


This function in general will reset all devices in a hive.

In a situation like GPU0 in hive0 gets to this function first and GPU1 
in hive0 also hangs shortly (before GPU0 recovery process starts 
reseting other devices in hive), we don't want to execute work queued as 
part of GPU1's recovery also.Both GPU0 and GPU1 recovery process will 
try to reset all the devices in hive.


In short - if a reset domain is already active, probably we don't need 
to queue another work to the domain since all devices in the domain are 
expected to get reset shortly.


Thanks,
Lijo


+   if (hive)
mutex_lock(&hive->hive_lock);
-   }
  
  	reset_context.method = AMD_RESET_METHOD_NONE;

reset_context.reset_req_dev = adev;
@@ -5227,7 +5211,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device 
*adev,
  
  skip_recovery:

if (hive) {
-   atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a858e3457c5c..9ad742039ac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -404,7 +404,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct 
amdgpu_device *adev)
INIT_LIST_HEAD(&hive->device_list);
INIT_LIST_HEAD(&hive->node);
mutex_init(&hive->hive_lock);
-   atomic_set(&hive->in_reset, 0);
atomic_set(&hive->number_devices, 0);
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6121aaa292cb..2f2ce53645a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -33,7 +33,6 @@ struct amdgpu_hive_info {
struct list_head node;
atomic_t number_devices;
struct mutex hive_lock;
-   atomic_t in_reset;
int hi_req_count;
struct amdgpu_device *hi_req_gpu;
struct task_barrier tb;



Re: [PATCH] drm/amdgpu: initialize reg_access_ctrl

2022-01-30 Thread Lazar, Lijo
[Public]

Thanks, the patch is already submitted.

https://www.spinics.net/lists/amd-gfx/msg73613.html

Thanks,
Lijo


Re: [PATCH] drm/amd/pm: set min,max to 0 if there is no get_dpm_ultimate_freq function

2022-01-24 Thread Lazar, Lijo




On 1/24/2022 7:22 PM, t...@redhat.com wrote:

From: Tom Rix 

clang static analysis reports this represenative problem
amdgpu_smu.c:144:18: warning: The left operand of '*' is a garbage value
 return clk_freq * 100;
 ^

If there is no get_dpm_ultimate_freq function,
smu_get_dpm_freq_range returns success without setting the
output min,max parameters.  Because this is an extern function,
set the min,max to 0 when there is no get_dpm_ultimate_freq.

Fixes: e5ef784b1e17 ("drm/amd/powerplay: revise calling chain on retrieving 
frequency range")
Signed-off-by: Tom Rix 
---
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 5ace30434e603..35fbe51f52eaa 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -121,11 +121,17 @@ int smu_get_dpm_freq_range(struct smu_context *smu,
if (!min && !max)
return -EINVAL;
  
-	if (smu->ppt_funcs->get_dpm_ultimate_freq)

+   if (smu->ppt_funcs->get_dpm_ultimate_freq) {
ret = smu->ppt_funcs->get_dpm_ultimate_freq(smu,
clk_type,
min,
max);
+   } else {


return -ENOTSUPP; would be more appropriate.

Thanks,
Lijo


+   if (min)
+   *min = 0;
+   if (max)
+   *max = 0;
+   }
  
  	return ret;

  }



Re: [PATCH] drm/amdgpu: Add missing pm_runtime_put_autosuspend

2022-01-18 Thread Lazar, Lijo




On 1/18/2022 5:31 PM, Yongzhi Liu wrote:

pm_runtime_get_sync() increments the runtime PM usage counter even
when it returns an error code, thus a matching decrement is needed
on the error handling path to keep the counter balanced.

Signed-off-by: Yongzhi Liu 


Thanks!

Reviewed-by: Lijo Lazar 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 9aea1cc..4b950de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1120,8 +1120,10 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file 
*f, char __user *buf,
return -EINVAL;
  
  	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);

-   if (r < 0)
+   if (r < 0) {
+   pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
+   }
  
  	while (size) {

uint32_t value;



Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs

2022-01-05 Thread Lazar, Lijo




On 1/5/2022 6:45 PM, Christian König wrote:

Am 05.01.22 um 14:11 schrieb Lazar, Lijo:

On 1/5/2022 6:01 PM, Christian König wrote:

Am 05.01.22 um 10:54 schrieb Lazar, Lijo:

On 12/23/2021 3:35 AM, Andrey Grodzovsky wrote:

Use reset domain wq also for non TDR gpu recovery trigers
such as sysfs and RAS. We must serialize all possible
GPU recoveries to gurantee no concurrency there.
For TDR call the original recovery function directly since
it's already executed from within the wq. For others just
use a wrapper to qeueue work and wait on it to finish.

v2: Rename to amdgpu_recover_work_struct

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 
+-

  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  2 +-
  3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index b5ff76aae7e0..8e96b9a14452 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1296,6 +1296,8 @@ bool amdgpu_device_has_job_running(struct 
amdgpu_device *adev);

  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
    struct amdgpu_job* job);
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
+  struct amdgpu_job *job);
  void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
  int amdgpu_device_pci_reset(struct amdgpu_device *adev);
  bool amdgpu_device_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 7c063fd37389..258ec3c0b2af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4979,7 +4979,7 @@ static void amdgpu_device_recheck_guilty_jobs(
   * Returns 0 for success or an error on failure.
   */
  -int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
    struct amdgpu_job *job)
  {
  struct list_head device_list, *device_list_handle = NULL;
@@ -5237,6 +5237,37 @@ int amdgpu_device_gpu_recover(struct 
amdgpu_device *adev,

  return r;
  }
  +struct amdgpu_recover_work_struct {
+    struct work_struct base;
+    struct amdgpu_device *adev;
+    struct amdgpu_job *job;
+    int ret;
+};
+
+static void amdgpu_device_queue_gpu_recover_work(struct 
work_struct *work)

+{
+    struct amdgpu_recover_work_struct *recover_work = 
container_of(work, struct amdgpu_recover_work_struct, base);

+
+    recover_work->ret = 
amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);

+}
+/*
+ * Serialize gpu recover into reset domain single threaded wq
+ */
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+    struct amdgpu_job *job)
+{
+    struct amdgpu_recover_work_struct work = {.adev = adev, .job = 
job};

+
+    INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
+
+    if (!queue_work(adev->reset_domain.wq, &work.base))
+    return -EAGAIN;
+


The decision to schedule a reset is made at this point. Subsequent 
accesses to hardware may not be reliable. So should the flag 
in_reset be set here itself rather than waiting for the work to 
start execution?


No, when we race and lose the VM is completely lost and probably 
restarted by the hypervisor.


And when we race and win we properly set the flag before signaling 
the hypervisor that it can continue with the reset.




I was talking about baremetal case. When this was synchronous, 
in_reset flag is set as one of the first things and amdgpu_in_reset is 
checked to prevent further hardware accesses. This design only changes 
the recover part and doesn't change the hardware perspective. 


Potential accesses from other processes need to be blocked as soon as 
we determine a reset is required.


That's an incorrect assumption.

Accessing the hardware is perfectly ok as long as the reset hasn't 
started yet. In other words even when the hardware is locked up you can 
still happily read/write registers or access the VRAM BAR.




Not sure if that is 100% correct like a recovery triggered by RAS error 
(depends on the access done).


Thanks,
Lijo

Only when the hardware is currently performing a reset, then we can't 
touch it or there might be unfortunate consequences (usually complete 
system lockup).


Regards,
Christian.


Are we expecting the work to be immediately executed and set the flags?

Thanks,
Lijo

Also, what about having the reset_active or in_reset flag in the 
reset_domain itself?


Of hand that sounds like a good idea.

Regards,
Christian.



Thanks,
Lijo


+    flush_work(&work.base);
+
+    return work.ret;
+}
+
  /**
   * amdgpu_device_get_pcie_info - fence pcie info about th

Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs

2022-01-05 Thread Lazar, Lijo




On 1/5/2022 6:01 PM, Christian König wrote:

Am 05.01.22 um 10:54 schrieb Lazar, Lijo:

On 12/23/2021 3:35 AM, Andrey Grodzovsky wrote:

Use reset domain wq also for non TDR gpu recovery trigers
such as sysfs and RAS. We must serialize all possible
GPU recoveries to gurantee no concurrency there.
For TDR call the original recovery function directly since
it's already executed from within the wq. For others just
use a wrapper to qeueue work and wait on it to finish.

v2: Rename to amdgpu_recover_work_struct

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  2 +-
  3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index b5ff76aae7e0..8e96b9a14452 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1296,6 +1296,8 @@ bool amdgpu_device_has_job_running(struct 
amdgpu_device *adev);

  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
    struct amdgpu_job* job);
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
+  struct amdgpu_job *job);
  void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
  int amdgpu_device_pci_reset(struct amdgpu_device *adev);
  bool amdgpu_device_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 7c063fd37389..258ec3c0b2af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4979,7 +4979,7 @@ static void amdgpu_device_recheck_guilty_jobs(
   * Returns 0 for success or an error on failure.
   */
  -int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
    struct amdgpu_job *job)
  {
  struct list_head device_list, *device_list_handle =  NULL;
@@ -5237,6 +5237,37 @@ int amdgpu_device_gpu_recover(struct 
amdgpu_device *adev,

  return r;
  }
  +struct amdgpu_recover_work_struct {
+    struct work_struct base;
+    struct amdgpu_device *adev;
+    struct amdgpu_job *job;
+    int ret;
+};
+
+static void amdgpu_device_queue_gpu_recover_work(struct work_struct 
*work)

+{
+    struct amdgpu_recover_work_struct *recover_work = 
container_of(work, struct amdgpu_recover_work_struct, base);

+
+    recover_work->ret = 
amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);

+}
+/*
+ * Serialize gpu recover into reset domain single threaded wq
+ */
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+    struct amdgpu_job *job)
+{
+    struct amdgpu_recover_work_struct work = {.adev = adev, .job = 
job};

+
+    INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
+
+    if (!queue_work(adev->reset_domain.wq, &work.base))
+    return -EAGAIN;
+


The decision to schedule a reset is made at this point. Subsequent 
accesses to hardware may not be reliable. So should the flag in_reset 
be set here itself rather than waiting for the work to start execution?


No, when we race and lose the VM is completely lost and probably 
restarted by the hypervisor.


And when we race and win we properly set the flag before signaling the 
hypervisor that it can continue with the reset.




I was talking about baremetal case. When this was synchronous, in_reset 
flag is set as one of the first things and amdgpu_in_reset is checked to 
prevent further hardware accesses. This design only changes the recover 
part and doesn't change the hardware perspective. Potential accesses 
from other processes need to be blocked as soon as we determine a reset 
is required. Are we expecting the work to be immediately executed and 
set the flags?


Thanks,
Lijo

Also, what about having the reset_active or in_reset flag in the 
reset_domain itself?


Of hand that sounds like a good idea.

Regards,
Christian.



Thanks,
Lijo


+    flush_work(&work.base);
+
+    return work.ret;
+}
+
  /**
   * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index bfc47bea23db..38c9fd7b7ad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -63,7 +63,7 @@ static enum drm_gpu_sched_stat 
amdgpu_job_timedout(struct drm_sched_job *s_job)

    ti.process_name, ti.tgid, ti.task_name, ti.pid);
    if (amdgpu_device_should_recover_gpu(ring->adev)) {
-    amdgpu_device_gpu_recover(ring->adev, job);
+    amdgpu_device_gpu_recover_imp(ring->adev, job);
  } else {
  drm_sched_suspend_timeout(&ring->sched);
  if (amdgpu_sriov_vf(adev))





Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs

2022-01-05 Thread Lazar, Lijo




On 12/23/2021 3:35 AM, Andrey Grodzovsky wrote:

Use reset domain wq also for non TDR gpu recovery trigers
such as sysfs and RAS. We must serialize all possible
GPU recoveries to gurantee no concurrency there.
For TDR call the original recovery function directly since
it's already executed from within the wq. For others just
use a wrapper to qeueue work and wait on it to finish.

v2: Rename to amdgpu_recover_work_struct

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  2 +-
  3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b5ff76aae7e0..8e96b9a14452 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1296,6 +1296,8 @@ bool amdgpu_device_has_job_running(struct amdgpu_device 
*adev);
  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  struct amdgpu_job* job);
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
+ struct amdgpu_job *job);
  void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
  int amdgpu_device_pci_reset(struct amdgpu_device *adev);
  bool amdgpu_device_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7c063fd37389..258ec3c0b2af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4979,7 +4979,7 @@ static void amdgpu_device_recheck_guilty_jobs(
   * Returns 0 for success or an error on failure.
   */
  
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,

+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
  struct amdgpu_job *job)
  {
struct list_head device_list, *device_list_handle =  NULL;
@@ -5237,6 +5237,37 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return r;
  }
  
+struct amdgpu_recover_work_struct {

+   struct work_struct base;
+   struct amdgpu_device *adev;
+   struct amdgpu_job *job;
+   int ret;
+};
+
+static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
+{
+   struct amdgpu_recover_work_struct *recover_work = container_of(work, 
struct amdgpu_recover_work_struct, base);
+
+   recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, 
recover_work->job);
+}
+/*
+ * Serialize gpu recover into reset domain single threaded wq
+ */
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+   struct amdgpu_job *job)
+{
+   struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
+
+   INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
+
+   if (!queue_work(adev->reset_domain.wq, &work.base))
+   return -EAGAIN;
+


The decision to schedule a reset is made at this point. Subsequent 
accesses to hardware may not be reliable. So should the flag in_reset be 
set here itself rather than waiting for the work to start execution?


Also, what about having the reset_active or in_reset flag in the 
reset_domain itself?


Thanks,
Lijo


+   flush_work(&work.base);
+
+   return work.ret;
+}
+
  /**
   * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index bfc47bea23db..38c9fd7b7ad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -63,7 +63,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
  ti.process_name, ti.tgid, ti.task_name, ti.pid);
  
  	if (amdgpu_device_should_recover_gpu(ring->adev)) {

-   amdgpu_device_gpu_recover(ring->adev, job);
+   amdgpu_device_gpu_recover_imp(ring->adev, job);
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))



Re: [PATCH 1/1] drm/amdgpu: ignore -EPERM error from debugfs

2021-10-05 Thread Lazar, Lijo




On 10/6/2021 12:05 PM, Christian König wrote:

Am 06.10.21 um 08:32 schrieb Lazar, Lijo:



On 10/6/2021 11:49 AM, Christian König wrote:

Am 06.10.21 um 06:51 schrieb Lazar, Lijo:



On 10/5/2021 10:15 PM, Christian König wrote:

Am 05.10.21 um 15:49 schrieb Das, Nirmoy:


On 10/5/2021 3:22 PM, Christian König wrote:



Am 05.10.21 um 15:11 schrieb Nirmoy Das:

Debugfs core APIs will throw -EPERM when user disables debugfs
using CONFIG_DEBUG_FS_ALLOW_NONE or with kernel param. We shouldn't
see that as an error. Also validate drm root dentry before creating
amdgpu debugfs files.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

index 6611b3c7c149..d786072e918b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1617,6 +1617,16 @@ int amdgpu_debugfs_init(struct 
amdgpu_device *adev)

  struct dentry *ent;
  int r, i;
  +    if (IS_ERR(root)) {
+    /* When debugfs is disabled we get -EPERM which is not an
+ * error as this is user controllable.
+ */


Well setting primary->debugfs_root to an error code is probably 
not a good idea to begin with.


When debugfs is disabled that should most likely be NULL.



If we set primary->debugfs_root to  NULL then we need to add bunch 
of NULL checks everywhere before creating any debugfs files


because debugfs_create_{file|dir}() with NULL root is still valid. 
I am assuming a hypothetical case when debugfs_root dir creation 
fails even with debugfs enabled


but further calls are successful.  This wont be a problem if we 
propagate the error code.


Yeah, but an error code in members is ugly like hell and 
potentially causes crashes instead.


I strongly suggest to fix this so that root is NULL when debugfs 
isn't available and we add proper checks for that instead.


This shouldn't be done. A NULL is a valid parent for debugfs API. An 
invalid parent is always checked like this

  if (IS_ERR(parent))
    return parent;

Instead of adding redundant work like NULL checks, let the API do 
its work and don't break the API contract. For ex: usage of sample 
client, you may look at the drm usage; it does the same.


Yeah, but that is horrible API design and should be avoided.

ERR_PTR(), PTR_ERR(), IS_ERR() and similar are supposed to be used as 
alternative to signaling errors as return values from functions and 
should *never* ever be used to signal errors in pointer members.




One escape route may be - add another export from debugfs like 
debugfs_is_valid_node() which adheres to the current logic in debugfs 
API and use that in client code. Whenever debugfs changes to a 
different logic from IS_ERR, let that be changed.


Well that would then rather be drm_is_debugfs_enabled(), because that we 
separate debugfs handling into a drm core and individual drivers is drm 
specific.




Had one more look and looks like this will do the job. In other cases, 
API usage is allowed.


if (!debugfs_initialized())
return;

Thanks,
Lijo


Christian.



Thanks,
Lijo


Regards,
Christian.



Thanks,
Lijo



Regards,
Christian.




Regards,

Nirmoy



Regards,
Christian.


+    if (PTR_ERR(root) == -EPERM)
+    return 0;
+
+    return PTR_ERR(ent);
+    }
+
  ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, 
adev,

    &fops_ib_preempt);
  if (IS_ERR(ent)) {










Re: [PATCH 1/1] drm/amdgpu: ignore -EPERM error from debugfs

2021-10-05 Thread Lazar, Lijo




On 10/6/2021 11:49 AM, Christian König wrote:

Am 06.10.21 um 06:51 schrieb Lazar, Lijo:



On 10/5/2021 10:15 PM, Christian König wrote:

Am 05.10.21 um 15:49 schrieb Das, Nirmoy:


On 10/5/2021 3:22 PM, Christian König wrote:



Am 05.10.21 um 15:11 schrieb Nirmoy Das:

Debugfs core APIs will throw -EPERM when user disables debugfs
using CONFIG_DEBUG_FS_ALLOW_NONE or with kernel param. We shouldn't
see that as an error. Also validate drm root dentry before creating
amdgpu debugfs files.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

index 6611b3c7c149..d786072e918b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1617,6 +1617,16 @@ int amdgpu_debugfs_init(struct 
amdgpu_device *adev)

  struct dentry *ent;
  int r, i;
  +    if (IS_ERR(root)) {
+    /* When debugfs is disabled we get -EPERM which is not an
+ * error as this is user controllable.
+ */


Well setting primary->debugfs_root to an error code is probably not 
a good idea to begin with.


When debugfs is disabled that should most likely be NULL.



If we set primary->debugfs_root to  NULL then we need to add bunch 
of NULL checks everywhere before creating any debugfs files


because debugfs_create_{file|dir}() with NULL root is still valid. I 
am assuming a hypothetical case when debugfs_root dir creation fails 
even with debugfs enabled


but further calls are successful.  This wont be a problem if we 
propagate the error code.


Yeah, but an error code in members is ugly like hell and potentially 
causes crashes instead.


I strongly suggest to fix this so that root is NULL when debugfs 
isn't available and we add proper checks for that instead.


This shouldn't be done. A NULL is a valid parent for debugfs API. An 
invalid parent is always checked like this

  if (IS_ERR(parent))
    return parent;

Instead of adding redundant work like NULL checks, let the API do its 
work and don't break the API contract. For ex: usage of sample client, 
you may look at the drm usage; it does the same.


Yeah, but that is horrible API design and should be avoided.

ERR_PTR(), PTR_ERR(), IS_ERR() and similar are supposed to be used as 
alternative to signaling errors as return values from functions and 
should *never* ever be used to signal errors in pointer members.




One escape route may be - add another export from debugfs like 
debugfs_is_valid_node() which adheres to the current logic in debugfs 
API and use that in client code. Whenever debugfs changes to a different 
logic from IS_ERR, let that be changed.


Thanks,
Lijo


Regards,
Christian.



Thanks,
Lijo



Regards,
Christian.




Regards,

Nirmoy



Regards,
Christian.


+    if (PTR_ERR(root) == -EPERM)
+    return 0;
+
+    return PTR_ERR(ent);
+    }
+
  ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, 
adev,

    &fops_ib_preempt);
  if (IS_ERR(ent)) {








Re: [PATCH 1/1] drm/amdgpu: ignore -EPERM error from debugfs

2021-10-05 Thread Lazar, Lijo




On 10/5/2021 10:15 PM, Christian König wrote:

Am 05.10.21 um 15:49 schrieb Das, Nirmoy:


On 10/5/2021 3:22 PM, Christian König wrote:



Am 05.10.21 um 15:11 schrieb Nirmoy Das:

Debugfs core APIs will throw -EPERM when user disables debugfs
using CONFIG_DEBUG_FS_ALLOW_NONE or with kernel param. We shouldn't
see that as an error. Also validate drm root dentry before creating
amdgpu debugfs files.

Signed-off-by: Nirmoy Das 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

index 6611b3c7c149..d786072e918b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1617,6 +1617,16 @@ int amdgpu_debugfs_init(struct amdgpu_device 
*adev)

  struct dentry *ent;
  int r, i;
  +    if (IS_ERR(root)) {
+    /* When debugfs is disabled we get -EPERM which is not an
+ * error as this is user controllable.
+ */


Well setting primary->debugfs_root to an error code is probably not a 
good idea to begin with.


When debugfs is disabled that should most likely be NULL.



If we set primary->debugfs_root to  NULL then we need to add bunch of 
NULL checks everywhere before creating any debugfs files


because debugfs_create_{file|dir}() with NULL root is still valid.  I 
am assuming a hypothetical case when debugfs_root dir creation fails 
even with debugfs enabled


but further calls are successful.  This wont be a problem if we 
propagate the error code.


Yeah, but an error code in members is ugly like hell and potentially 
causes crashes instead.


I strongly suggest to fix this so that root is NULL when debugfs isn't 
available and we add proper checks for that instead.


This shouldn't be done. A NULL is a valid parent for debugfs API. An 
invalid parent is always checked like this

  if (IS_ERR(parent))
return parent;

Instead of adding redundant work like NULL checks, let the API do its 
work and don't break the API contract. For ex: usage of sample client, 
you may look at the drm usage; it does the same.


Thanks,
Lijo



Regards,
Christian.




Regards,

Nirmoy



Regards,
Christian.


+    if (PTR_ERR(root) == -EPERM)
+    return 0;
+
+    return PTR_ERR(ent);
+    }
+
  ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
    &fops_ib_preempt);
  if (IS_ERR(ent)) {






Re: [PATCH] drm/amd/pm: And destination bounds checking to struct copy

2021-08-22 Thread Lazar, Lijo

Thanks Kees!

Reviewed-by: Lijo Lazar 

Thanks,
Lijo

On 8/20/2021 1:44 AM, Kees Cook wrote:

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

The "Board Parameters" members of the structs:
struct atom_smc_dpm_info_v4_5
struct atom_smc_dpm_info_v4_6
struct atom_smc_dpm_info_v4_7
struct atom_smc_dpm_info_v4_10
are written to the corresponding members of the corresponding PPTable_t
variables, but they lack destination size bounds checking, which means
the compiler cannot verify at compile time that this is an intended and
safe memcpy().

Since the header files are effectively immutable[1] and a struct_group()
cannot be used, nor a common struct referenced by both sides of the
memcpy() arguments, add a new helper, memcpy_trailing(), to perform the
bounds checking at compile time. Replace the open-coded memcpy()s with
memcpy_trailing() which includes enough context for the bounds checking.

"objdump -d" shows no object code changes.

[1] 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flkml%2Fe56aad3c-a06f-da07-f491-a894a570d78f%40amd.com&data=04%7C01%7Clijo.lazar%40amd.com%7Cb0567a0252604c8c84cd08d9634dfe39%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637650008892964983%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=UikQRh9WCwr8H29bBqZu3iLJt095Es9mIG5mVwPJpC0%3D&reserved=0

Cc: Lijo Lazar 
Cc: "Christian König" 
Cc: "Pan, Xinhui" 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Hawking Zhang 
Cc: Feifei Xu 
Cc: Likun Gao 
Cc: Jiawei Gu 
Cc: Evan Quan 
Cc: amd-...@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Kees Cook 
Link: 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flkml%2FCADnq5_Npb8uYvd%2BR4UHgf-w8-cQj3JoODjviJR_Y9w9wqJ71mQ%40mail.gmail.com&data=04%7C01%7Clijo.lazar%40amd.com%7Cb0567a0252604c8c84cd08d9634dfe39%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637650008892964983%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=LVHdf9C1jx6eIB2BQ%2Bm5q4o5KcRzBWJi7PhNviKzKGM%3D&reserved=0
---
Alex, I dropped your prior Acked-by, since the implementation is very
different. If you're still happy with it, I can add it back. :)
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 25 +++
  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c |  6 ++---
  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  8 +++---
  .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c|  5 ++--
  4 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 96e895d6be35..4605934a4fb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1446,4 +1446,29 @@ static inline int amdgpu_in_reset(struct amdgpu_device 
*adev)
  {
return atomic_read(&adev->in_gpu_reset);
  }
+
+/**
+ * memcpy_trailing - Copy the end of one structure into the middle of another
+ *
+ * @dst: Pointer to destination struct
+ * @first_dst_member: The member name in @dst where the overwrite begins
+ * @last_dst_member: The member name in @dst where the overwrite ends after
+ * @src: Pointer to the source struct
+ * @first_src_member: The member name in @src where the copy begins
+ *
+ */
+#define memcpy_trailing(dst, first_dst_member, last_dst_member,
   \
+   src, first_src_member) \
+({\
+   size_t __src_offset = offsetof(typeof(*(src)), first_src_member);  \
+   size_t __src_size = sizeof(*(src)) - __src_offset; \
+   size_t __dst_offset = offsetof(typeof(*(dst)), first_dst_member);  \
+   size_t __dst_size = offsetofend(typeof(*(dst)), last_dst_member) - \
+   __dst_offset;  \
+   BUILD_BUG_ON(__src_size != __dst_size);\
+   __builtin_memcpy((u8 *)(dst) + __dst_offset,   \
+(u8 *)(src) + __src_offset,   \
+__dst_size);  \
+})
+
  #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 8ab58781ae13..1918e6232319 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -465,10 +465,8 @@ static int arcturus_append_powerplay_table(struct 
smu_context *smu)
  
  	if ((smc_dpm_table->table_header.format_revision == 4) &&

(smc_dpm_table->table_header.content_revision == 6))
-   memcpy(&smc_pptable->MaxVoltageStepGfx,
-

Re: [PATCH v2 18/63] drm/amd/pm: Use struct_group() for memcpy() region

2021-08-18 Thread Lazar, Lijo




On 8/19/2021 5:29 AM, Kees Cook wrote:

On Wed, Aug 18, 2021 at 05:12:28PM +0530, Lazar, Lijo wrote:


On 8/18/2021 11:34 AM, Kees Cook wrote:

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

Use struct_group() in structs:
struct atom_smc_dpm_info_v4_5
struct atom_smc_dpm_info_v4_6
struct atom_smc_dpm_info_v4_7
struct atom_smc_dpm_info_v4_10
PPTable_t
so the grouped members can be referenced together. This will allow
memcpy() and sizeof() to more easily reason about sizes, improve
readability, and avoid future warnings about writing beyond the end of
the first member.

"pahole" shows no size nor member offset changes to any structs.
"objdump -d" shows no object code changes.

Cc: "Christian König" 
Cc: "Pan, Xinhui" 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Hawking Zhang 
Cc: Feifei Xu 
Cc: Lijo Lazar 
Cc: Likun Gao 
Cc: Jiawei Gu 
Cc: Evan Quan 
Cc: amd-...@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Kees Cook 
Acked-by: Alex Deucher 
Link: 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flkml%2FCADnq5_Npb8uYvd%2BR4UHgf-w8-cQj3JoODjviJR_Y9w9wqJ71mQ%40mail.gmail.com&data=04%7C01%7Clijo.lazar%40amd.com%7C3861f20094074bf7328808d962a433f2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637649279701053991%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=386LcfJJGfQfHsXBuK17LMqxJ2nFtGoj%2FUjoN2ZtJd0%3D&reserved=0
---
   drivers/gpu/drm/amd/include/atomfirmware.h   |  9 -
   .../gpu/drm/amd/pm/inc/smu11_driver_if_arcturus.h|  3 ++-
   drivers/gpu/drm/amd/pm/inc/smu11_driver_if_navi10.h  |  3 ++-
   .../gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h   |  3 ++-


Hi Kees,


Hi! Thanks for looking into this.


The headers which define these structs are firmware/VBIOS interfaces and are
picked directly from those components. There are difficulties in grouping
them to structs at the original source as that involves other component
changes.


So, can you help me understand this a bit more? It sounds like these are
generated headers, yes? I'd like to understand your constraints and
weight them against various benefits that could be achieved here.

The groupings I made do appear to be roughly documented already,
for example:

struct   atom_common_table_header  table_header;
  // SECTION: BOARD PARAMETERS
+  struct_group(dpm_info,

Something emitted the "BOARD PARAMETERS" section heading as a comment,
so it likely also would know where it ends, yes? The good news here is
that for the dpm_info groups, they all end at the end of the existing
structs, see:
struct atom_smc_dpm_info_v4_5
struct atom_smc_dpm_info_v4_6
struct atom_smc_dpm_info_v4_7
struct atom_smc_dpm_info_v4_10

The matching regions in the PPTable_t structs are similarly marked with a
"BOARD PARAMETERS" section heading comment:

--- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_arcturus.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_arcturus.h
@@ -643,6 +643,7 @@ typedef struct {
// SECTION: BOARD PARAMETERS
  
// SVI2 Board Parameters

+  struct_group(v4_6,
uint16_t MaxVoltageStepGfx; // In mV(Q2) Max voltage step that SMU will 
request. Multiple steps are taken if voltage change exceeds this value.
uint16_t MaxVoltageStepSoc; // In mV(Q2) Max voltage step that SMU will 
request. Multiple steps are taken if voltage change exceeds this value.
  
@@ -728,10 +729,10 @@ typedef struct {

uint32_t BoardVoltageCoeffB;// decode by /1000
  
uint32_t BoardReserved[7];

+  );
  
// Padding for MMHUB - do not modify this

uint32_t MmHubPadding[8]; // SMU internal use
-
  } PPTable_t;

Where they end seems known as well (the padding switches from a "Board"
to "MmHub" prefix at exactly the matching size).

So, given that these regions are already known by the export tool, how
about just updating the export tool to emit a struct there? I imagine
the problem with this would be the identifier churn needed, but that's
entirely mechanical.

However, I'm curious about another aspect of these regions: they are,
by definition, the same. Why isn't there a single struct describing
them already, given the existing redundancy? For example, look at the
member names: maxvoltagestepgfx vs MaxVoltageStepGfx. Why aren't these
the same? And then why aren't they described separately?

Fixing that would cut down on the redundancy here, and in the renaming,
you can fix the identifiers as well. It should be straight forward to
write a Coccinelle script to do this renaming for you after extracting
the structure.


The driver_if_* files updates are frequent and

Re: [PATCH v2 18/63] drm/amd/pm: Use struct_group() for memcpy() region

2021-08-18 Thread Lazar, Lijo



On 8/18/2021 11:34 AM, Kees Cook wrote:

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

Use struct_group() in structs:
struct atom_smc_dpm_info_v4_5
struct atom_smc_dpm_info_v4_6
struct atom_smc_dpm_info_v4_7
struct atom_smc_dpm_info_v4_10
PPTable_t
so the grouped members can be referenced together. This will allow
memcpy() and sizeof() to more easily reason about sizes, improve
readability, and avoid future warnings about writing beyond the end of
the first member.

"pahole" shows no size nor member offset changes to any structs.
"objdump -d" shows no object code changes.

Cc: "Christian König" 
Cc: "Pan, Xinhui" 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Hawking Zhang 
Cc: Feifei Xu 
Cc: Lijo Lazar 
Cc: Likun Gao 
Cc: Jiawei Gu 
Cc: Evan Quan 
Cc: amd-...@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Kees Cook 
Acked-by: Alex Deucher 
Link: 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Flkml%2FCADnq5_Npb8uYvd%2BR4UHgf-w8-cQj3JoODjviJR_Y9w9wqJ71mQ%40mail.gmail.com&data=04%7C01%7Clijo.lazar%40amd.com%7C92b8d2f072f0444b9f8508d9620f6971%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637648640625729624%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=rKh5LUXCRUsorYM3kSpG2tkB%2Fczwl9I9EBnWBCtbg6Q%3D&reserved=0
---
  drivers/gpu/drm/amd/include/atomfirmware.h   |  9 -
  .../gpu/drm/amd/pm/inc/smu11_driver_if_arcturus.h|  3 ++-
  drivers/gpu/drm/amd/pm/inc/smu11_driver_if_navi10.h  |  3 ++-
  .../gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h   |  3 ++-


Hi Kees,

The headers which define these structs are firmware/VBIOS interfaces and 
are picked directly from those components. There are difficulties in 
grouping them to structs at the original source as that involves other 
component changes.


The driver_if_* files updates are frequent and it is error prone to 
manually group them each time we pick them for any update. Our usage of 
memcpy in this way is restricted only to a very few places.


As another option - is it possible to have a helper function/macro like 
memcpy_fortify() which takes the extra arguments and does the extra 
compile time checks? We will use the helper whenever we have such kind 
of usage.


Thanks,
Lijo


  drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c|  6 +++---
  drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c  | 12 
  drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c   |  6 +++---
  7 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h 
b/drivers/gpu/drm/amd/include/atomfirmware.h
index 44955458fe38..7bf3edf15410 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -2081,6 +2081,7 @@ struct atom_smc_dpm_info_v4_5
  {
struct   atom_common_table_header  table_header;
  // SECTION: BOARD PARAMETERS
+  struct_group(dpm_info,
  // I2C Control
struct smudpm_i2c_controller_config_v2  I2cControllers[8];
  
@@ -2159,7 +2160,7 @@ struct atom_smc_dpm_info_v4_5

uint32_t MvddRatio; // This is used for MVDD Vid workaround. It has 16 
fractional bits (Q16.16)

uint32_t BoardReserved[9];

-
+  );
  };
  
  struct atom_smc_dpm_info_v4_6

@@ -2168,6 +2169,7 @@ struct atom_smc_dpm_info_v4_6
// section: board parameters
uint32_t i2c_padding[3];   // old i2c control are moved to new area
  
+  struct_group(dpm_info,

uint16_t maxvoltagestepgfx; // in mv(q2) max voltage step that smu will 
request. multiple steps are taken if voltage change exceeds this value.
uint16_t maxvoltagestepsoc; // in mv(q2) max voltage step that smu will 
request. multiple steps are taken if voltage change exceeds this value.
  
@@ -2246,12 +2248,14 @@ struct atom_smc_dpm_info_v4_6
  
// reserved

uint32_t   boardreserved[10];
+  );
  };
  
  struct atom_smc_dpm_info_v4_7

  {
struct   atom_common_table_header  table_header;
  // SECTION: BOARD PARAMETERS
+  struct_group(dpm_info,
  // I2C Control
struct smudpm_i2c_controller_config_v2  I2cControllers[8];
  
@@ -2348,6 +2352,7 @@ struct atom_smc_dpm_info_v4_7

uint8_t  Padding8_Psi2;
  
uint32_t BoardReserved[5];

+  );
  };
  
  struct smudpm_i2c_controller_config_v3

@@ -2478,6 +2483,7 @@ struct atom_smc_dpm_info_v4_10
struct   atom_common_table_header  table_header;
  
// SECTION: BOARD PARAMETERS

+  struct_group(dpm_info,
// Telemetry Settings
uint16_t GfxMaxCurrent; // in Amps
uint8_t   GfxOffset; // in Amps
@@ -2524,6 +2530,7 @@ struct atom_smc_dpm_info_v4_10
uint16_t spare5;
  
uint32_t reserved[16];

+  );
  };
  
  /*

diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_arcturus.h 
b/d

Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-17 Thread Lazar, Lijo




On 8/17/2021 5:19 PM, Lazar, Lijo wrote:



On 8/17/2021 4:36 PM, Michel Dänzer wrote:

On 2021-08-17 12:37 p.m., Lazar, Lijo wrote:



On 8/17/2021 3:29 PM, Michel Dänzer wrote:

On 2021-08-17 11:37 a.m., Lazar, Lijo wrote:



On 8/17/2021 2:56 PM, Michel Dänzer wrote:

On 2021-08-17 11:12 a.m., Lazar, Lijo wrote:



On 8/17/2021 1:53 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if 
GFXOFF

was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This 
makes
sure the delayed work doesn't run at unexpected times, and 
allows it to

be lock-free.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
  mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian 
König)

v4:
* Fix race condition between amdgpu_gfx_off_ctrl incrementing
  adev->gfx.gfx_off_req_count and 
amdgpu_device_delay_enable_gfx_off

  checking for it to be 0 (Evan Quan)

Cc: sta...@vger.kernel.org
Reviewed-by: Lijo Lazar  # v3
Acked-by: Christian König  # v3
Signed-off-by: Michel Dänzer 
---

Alex, probably best to wait a bit longer before picking this up. :)

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 36 
+++---

 2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void 
amdgpu_device_delay_enable_gfx_off(struct work_struct *work)

 struct amdgpu_device *adev =
 container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);

 -    mutex_lock(&adev->gfx.gfx_off_mutex);
-    if (!adev->gfx.gfx_off_state && 
!adev->gfx.gfx_off_req_count) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, true))

-    adev->gfx.gfx_off_state = true;
-    }
-    mutex_unlock(&adev->gfx.gfx_off_mutex);
+    WARN_ON_ONCE(adev->gfx.gfx_off_state);
+    WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+    if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, true))

+    adev->gfx.gfx_off_state = true;
 }
   /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index a0be0772c8b3..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct 
amdgpu_device *adev, bool enable)

   mutex_lock(&adev->gfx.gfx_off_mutex);
 -    if (!enable)
-    adev->gfx.gfx_off_req_count++;
-    else if (adev->gfx.gfx_off_req_count > 0)
+    if (enable) {
+    /* If the count is already 0, it means there's an 
imbalance bug somewhere.
+ * Note that the bug may be in a different caller than 
the one which triggers the

+ * WARN_ON_ONCE.
+ */
+    if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+    goto unlock;
+
 adev->gfx.gfx_off_req_count--;
 -    if (enable && !adev->gfx.gfx_off_state && 
!adev->gfx.gfx_off_req_count) {
-    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);

-    } else if (!enable && adev->gfx.gfx_off_state) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {

-    adev->gfx.gfx_off_state = false;
+    if (adev->gfx.gfx_off_req_count == 0 && 
!adev->gfx.gfx_off_state)
+
schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);

+    } else {
+    if (adev->gfx.gfx_off_req_count == 0) {
+
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);

+
+    if (adev->gfx.gfx_off_state &&


More of a question which I didn't check last time - Is this 
expected to be true when the disable call comes in first?


My assumption is that cancel_delayed_work_sync guarantees 
amdgpu_device_delay_enable_gfx_off's assignment is visible here.




To clarify - when nothing is scheduled. If enable() is called when 
the count is 0, it goes to unlock. Now the expectation is someone 
to 

Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-17 Thread Lazar, Lijo




On 8/17/2021 4:36 PM, Michel Dänzer wrote:

On 2021-08-17 12:37 p.m., Lazar, Lijo wrote:



On 8/17/2021 3:29 PM, Michel Dänzer wrote:

On 2021-08-17 11:37 a.m., Lazar, Lijo wrote:



On 8/17/2021 2:56 PM, Michel Dänzer wrote:

On 2021-08-17 11:12 a.m., Lazar, Lijo wrote:



On 8/17/2021 1:53 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This makes
sure the delayed work doesn't run at unexpected times, and allows it to
be lock-free.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
  mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König)
v4:
* Fix race condition between amdgpu_gfx_off_ctrl incrementing
  adev->gfx.gfx_off_req_count and amdgpu_device_delay_enable_gfx_off
  checking for it to be 0 (Evan Quan)

Cc: sta...@vger.kernel.org
Reviewed-by: Lijo Lazar  # v3
Acked-by: Christian König  # v3
Signed-off-by: Michel Dänzer 
---

Alex, probably best to wait a bit longer before picking this up. :)

     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
     drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 36 +++---
     2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
     struct amdgpu_device *adev =
     container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
     -    mutex_lock(&adev->gfx.gfx_off_mutex);
-    if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
true))
-    adev->gfx.gfx_off_state = true;
-    }
-    mutex_unlock(&adev->gfx.gfx_off_mutex);
+    WARN_ON_ONCE(adev->gfx.gfx_off_state);
+    WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+    adev->gfx.gfx_off_state = true;
     }
       /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
       mutex_lock(&adev->gfx.gfx_off_mutex);
     -    if (!enable)
-    adev->gfx.gfx_off_req_count++;
-    else if (adev->gfx.gfx_off_req_count > 0)
+    if (enable) {
+    /* If the count is already 0, it means there's an imbalance bug 
somewhere.
+ * Note that the bug may be in a different caller than the one which 
triggers the
+ * WARN_ON_ONCE.
+ */
+    if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+    goto unlock;
+
     adev->gfx.gfx_off_req_count--;
     -    if (enable && !adev->gfx.gfx_off_state && 
!adev->gfx.gfx_off_req_count) {
-    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-    } else if (!enable && adev->gfx.gfx_off_state) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
false)) {
-    adev->gfx.gfx_off_state = false;
+    if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
+    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
+    } else {
+    if (adev->gfx.gfx_off_req_count == 0) {
+    cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+    if (adev->gfx.gfx_off_state &&


More of a question which I didn't check last time - Is this expected to be true 
when the disable call comes in first?


My assumption is that cancel_delayed_work_sync guarantees 
amdgpu_device_delay_enable_gfx_off's assignment is visible here.



To clarify - when nothing is scheduled. If enable() is called when the count is 
0, it goes to unlock. Now the expectation is someone to call Disable first.


Yes, the very first amdgpu_gfx_off_ctrl call must p

Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-17 Thread Lazar, Lijo




On 8/17/2021 3:29 PM, Michel Dänzer wrote:

On 2021-08-17 11:37 a.m., Lazar, Lijo wrote:



On 8/17/2021 2:56 PM, Michel Dänzer wrote:

On 2021-08-17 11:12 a.m., Lazar, Lijo wrote:



On 8/17/2021 1:53 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This makes
sure the delayed work doesn't run at unexpected times, and allows it to
be lock-free.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
     mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König)
v4:
* Fix race condition between amdgpu_gfx_off_ctrl incrementing
     adev->gfx.gfx_off_req_count and amdgpu_device_delay_enable_gfx_off
     checking for it to be 0 (Evan Quan)

Cc: sta...@vger.kernel.org
Reviewed-by: Lijo Lazar  # v3
Acked-by: Christian König  # v3
Signed-off-by: Michel Dänzer 
---

Alex, probably best to wait a bit longer before picking this up. :)

    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 36 +++---
    2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
    struct amdgpu_device *adev =
    container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
    -    mutex_lock(&adev->gfx.gfx_off_mutex);
-    if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
true))
-    adev->gfx.gfx_off_state = true;
-    }
-    mutex_unlock(&adev->gfx.gfx_off_mutex);
+    WARN_ON_ONCE(adev->gfx.gfx_off_state);
+    WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+    adev->gfx.gfx_off_state = true;
    }
      /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
      mutex_lock(&adev->gfx.gfx_off_mutex);
    -    if (!enable)
-    adev->gfx.gfx_off_req_count++;
-    else if (adev->gfx.gfx_off_req_count > 0)
+    if (enable) {
+    /* If the count is already 0, it means there's an imbalance bug 
somewhere.
+ * Note that the bug may be in a different caller than the one which 
triggers the
+ * WARN_ON_ONCE.
+ */
+    if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+    goto unlock;
+
    adev->gfx.gfx_off_req_count--;
    -    if (enable && !adev->gfx.gfx_off_state && 
!adev->gfx.gfx_off_req_count) {
-    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-    } else if (!enable && adev->gfx.gfx_off_state) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
false)) {
-    adev->gfx.gfx_off_state = false;
+    if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
+    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
+    } else {
+    if (adev->gfx.gfx_off_req_count == 0) {
+    cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+    if (adev->gfx.gfx_off_state &&


More of a question which I didn't check last time - Is this expected to be true 
when the disable call comes in first?


My assumption is that cancel_delayed_work_sync guarantees 
amdgpu_device_delay_enable_gfx_off's assignment is visible here.



To clarify - when nothing is scheduled. If enable() is called when the count is 
0, it goes to unlock. Now the expectation is someone to call Disable first.


Yes, the very first amdgpu_gfx_off_ctrl call must pass enable=false, or it's a 
bug, which

 if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0

Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-17 Thread Lazar, Lijo




On 8/17/2021 2:56 PM, Michel Dänzer wrote:

On 2021-08-17 11:12 a.m., Lazar, Lijo wrote:



On 8/17/2021 1:53 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This makes
sure the delayed work doesn't run at unexpected times, and allows it to
be lock-free.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
    mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König)
v4:
* Fix race condition between amdgpu_gfx_off_ctrl incrementing
    adev->gfx.gfx_off_req_count and amdgpu_device_delay_enable_gfx_off
    checking for it to be 0 (Evan Quan)

Cc: sta...@vger.kernel.org
Reviewed-by: Lijo Lazar  # v3
Acked-by: Christian König  # v3
Signed-off-by: Michel Dänzer 
---

Alex, probably best to wait a bit longer before picking this up. :)

   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 36 +++---
   2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
   struct amdgpu_device *adev =
   container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
   -    mutex_lock(&adev->gfx.gfx_off_mutex);
-    if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
true))
-    adev->gfx.gfx_off_state = true;
-    }
-    mutex_unlock(&adev->gfx.gfx_off_mutex);
+    WARN_ON_ONCE(adev->gfx.gfx_off_state);
+    WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+    adev->gfx.gfx_off_state = true;
   }
     /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
     mutex_lock(&adev->gfx.gfx_off_mutex);
   -    if (!enable)
-    adev->gfx.gfx_off_req_count++;
-    else if (adev->gfx.gfx_off_req_count > 0)
+    if (enable) {
+    /* If the count is already 0, it means there's an imbalance bug 
somewhere.
+ * Note that the bug may be in a different caller than the one which 
triggers the
+ * WARN_ON_ONCE.
+ */
+    if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+    goto unlock;
+
   adev->gfx.gfx_off_req_count--;
   -    if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) 
{
-    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-    } else if (!enable && adev->gfx.gfx_off_state) {
-    if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
false)) {
-    adev->gfx.gfx_off_state = false;
+    if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
+    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
+    } else {
+    if (adev->gfx.gfx_off_req_count == 0) {
+    cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+    if (adev->gfx.gfx_off_state &&


More of a question which I didn't check last time - Is this expected to be true 
when the disable call comes in first?


My assumption is that cancel_delayed_work_sync guarantees 
amdgpu_device_delay_enable_gfx_off's assignment is visible here.



To clarify - when nothing is scheduled. If enable() is called when the 
count is 0, it goes to unlock. Now the expectation is someone to call 
Disable first.  Let's say  Disable() is called first, then the variable 
will be false, right?


Thanks,
Lijo


Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-17 Thread Lazar, Lijo




On 8/17/2021 1:53 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This makes
sure the delayed work doesn't run at unexpected times, and allows it to
be lock-free.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
   mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König)
v4:
* Fix race condition between amdgpu_gfx_off_ctrl incrementing
   adev->gfx.gfx_off_req_count and amdgpu_device_delay_enable_gfx_off
   checking for it to be 0 (Evan Quan)

Cc: sta...@vger.kernel.org
Reviewed-by: Lijo Lazar  # v3
Acked-by: Christian König  # v3
Signed-off-by: Michel Dänzer 
---

Alex, probably best to wait a bit longer before picking this up. :)

  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 36 +++---
  2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
  
-	mutex_lock(&adev->gfx.gfx_off_mutex);

-   if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
-   if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, true))
-   adev->gfx.gfx_off_state = true;
-   }
-   mutex_unlock(&adev->gfx.gfx_off_mutex);
+   WARN_ON_ONCE(adev->gfx.gfx_off_state);
+   WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+   if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
true))
+   adev->gfx.gfx_off_state = true;
  }
  
  /**

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
  
  	mutex_lock(&adev->gfx.gfx_off_mutex);
  
-	if (!enable)

-   adev->gfx.gfx_off_req_count++;
-   else if (adev->gfx.gfx_off_req_count > 0)
+   if (enable) {
+   /* If the count is already 0, it means there's an imbalance bug 
somewhere.
+* Note that the bug may be in a different caller than the one 
which triggers the
+* WARN_ON_ONCE.
+*/
+   if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+   goto unlock;
+
adev->gfx.gfx_off_req_count--;
  
-	if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {

-   schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-   } else if (!enable && adev->gfx.gfx_off_state) {
-   if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {
-   adev->gfx.gfx_off_state = false;
+   if (adev->gfx.gfx_off_req_count == 0 && 
!adev->gfx.gfx_off_state)
+   schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
+   } else {
+   if (adev->gfx.gfx_off_req_count == 0) {
+   cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+   if (adev->gfx.gfx_off_state &&


More of a question which I didn't check last time - Is this expected to 
be true when the disable call comes in first?


Thanks,
Lijo


+   !amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {
+   adev->gfx.gfx_off_state = false;
  
-			if (adev->gfx.funcs->init_spm_golden) {

-   dev_dbg(adev->dev, "GFXOFF is disabled, re-init SPM 
golden settings\n");
-   amdgpu_gfx_init_spm_golden(adev);
+   if (adev->gfx.funcs->init_spm_golden) {
+   dev_dbg(adev->dev,
+   "GFXOFF is disabled, re-init SPM 
golden settings\n");
+ 

Re: [PATCH v3] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-17 Thread Lazar, Lijo




On 8/17/2021 1:21 PM, Quan, Evan wrote:

[AMD Official Use Only]




-Original Message-
From: amd-gfx  On Behalf Of
Michel Dänzer
Sent: Monday, August 16, 2021 6:35 PM
To: Deucher, Alexander ; Koenig, Christian

Cc: Liu, Leo ; Zhu, James ; amd-
g...@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Subject: [PATCH v3] drm/amdgpu: Cancel delayed work when GFXOFF is
disabled

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This makes
sure the delayed work doesn't run at unexpected times, and allows it to
be lock-free.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
   mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König)

Cc: sta...@vger.kernel.org
Signed-off-by: Michel Dänzer 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 22 +-

  2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void
amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device,
gfx.gfx_off_delay_work.work);

-   mutex_lock(&adev->gfx.gfx_off_mutex);
-   if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
-   if (!amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GFX, true))
-   adev->gfx.gfx_off_state = true;
-   }
-   mutex_unlock(&adev->gfx.gfx_off_mutex);
+   WARN_ON_ONCE(adev->gfx.gfx_off_state);
+   WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+   if (!amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GFX, true))
+   adev->gfx.gfx_off_state = true;
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..ca91aafcb32b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,15 +563,26 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
*adev, bool enable)

mutex_lock(&adev->gfx.gfx_off_mutex);

-   if (!enable)
-   adev->gfx.gfx_off_req_count++;
-   else if (adev->gfx.gfx_off_req_count > 0)
+   if (enable) {
+   /* If the count is already 0, it means there's an imbalance bug
somewhere.
+* Note that the bug may be in a different caller than the one
which triggers the
+* WARN_ON_ONCE.
+*/
+   if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+   goto unlock;
+
adev->gfx.gfx_off_req_count--;
+   } else {
+   adev->gfx.gfx_off_req_count++;
+   }

if (enable && !adev->gfx.gfx_off_state && !adev-

gfx.gfx_off_req_count) {

schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
GFX_OFF_DELAY_ENABLE);
-   } else if (!enable && adev->gfx.gfx_off_state) {
-   if (!amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GFX, false)) {
+   } else if (!enable && adev->gfx.gfx_off_req_count == 1) {

[Quan, Evan] It seems here will leave a small time window for race condition. If 
amdgpu_device_delay_enable_gfx_off() happens to occur here, it will 
"WARN_ON_ONCE(adev->gfx.gfx_off_req_count);". How about something as below?
@@ -573,13 +573,11 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
 goto unlock;

 adev->gfx.gfx_off_req_count--;
-   } else {
-   adev->gfx.gfx_off_req_count++;
 }

 if (enable && !adev->gfx.gfx_off_state && 
!adev->gfx.gfx_off_req_count) {
 schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-   } else if (!enable && adev->gfx.gfx_off_req_count == 1) {
+   } else if (!enable && adev->gfx.gfx_off_req_count == 0) {
 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);

 if (adev->gfx.gfx_off_state &&
@@ -593,6 +591,9 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
 }
 }

+  

Re: [PATCH v3] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-16 Thread Lazar, Lijo




On 8/16/2021 4:05 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This makes
sure the delayed work doesn't run at unexpected times, and allows it to
be lock-free.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
   mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König)

Cc: sta...@vger.kernel.org
Signed-off-by: Michel Dänzer 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 22 +-
  2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
  
-	mutex_lock(&adev->gfx.gfx_off_mutex);

-   if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
-   if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, true))
-   adev->gfx.gfx_off_state = true;
-   }
-   mutex_unlock(&adev->gfx.gfx_off_mutex);
+   WARN_ON_ONCE(adev->gfx.gfx_off_state);


Don't see any case for this. It's not expected to be scheduled in this 
case, right?



+   WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+


Thinking about ON_ONCE here - this may happen more than once if it's 
completed as part of cancel_ call. Is the warning needed?


Anyway,
Reviewed-by: Lijo Lazar 


+   if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, 
true))
+   adev->gfx.gfx_off_state = true;
  }
  
  /**

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..ca91aafcb32b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,15 +563,26 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
  
  	mutex_lock(&adev->gfx.gfx_off_mutex);
  
-	if (!enable)

-   adev->gfx.gfx_off_req_count++;
-   else if (adev->gfx.gfx_off_req_count > 0)
+   if (enable) {
+   /* If the count is already 0, it means there's an imbalance bug 
somewhere.
+* Note that the bug may be in a different caller than the one 
which triggers the
+* WARN_ON_ONCE.
+*/
+   if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+   goto unlock;
+
adev->gfx.gfx_off_req_count--;
+   } else {
+   adev->gfx.gfx_off_req_count++;
+   }
  
  	if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {

schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-   } else if (!enable && adev->gfx.gfx_off_state) {
-   if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {
+   } else if (!enable && adev->gfx.gfx_off_req_count == 1) {
+   cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+   if (adev->gfx.gfx_off_state &&
+   !amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {
adev->gfx.gfx_off_state = false;
  
  			if (adev->gfx.funcs->init_spm_golden) {

@@ -581,6 +592,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
}
}
  
+unlock:

mutex_unlock(&adev->gfx.gfx_off_mutex);
  }
  



Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-15 Thread Lazar, Lijo




On 8/13/2021 9:30 PM, Michel Dänzer wrote:

On 2021-08-13 5:07 p.m., Lazar, Lijo wrote:



On 8/13/2021 8:10 PM, Michel Dänzer wrote:

On 2021-08-13 4:14 p.m., Lazar, Lijo wrote:

On 8/13/2021 7:04 PM, Michel Dänzer wrote:

On 2021-08-13 1:50 p.m., Lazar, Lijo wrote:

On 8/13/2021 3:59 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when GFXOFF transitions from
enabled to disabled. This makes sure the delayed work will be scheduled
as intended in the reverse case.

In order to avoid a deadlock, amdgpu_device_delay_enable_gfx_off needs
to use mutex_trylock instead of mutex_lock.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
  mod_delayed_work.

Signed-off-by: Michel Dänzer 
---
     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
     drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 13 +++--
     drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |  3 +++
     3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..8b025f70706c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,7 +2777,16 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
     struct amdgpu_device *adev =
     container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
     -    mutex_lock(&adev->gfx.gfx_off_mutex);
+    /* mutex_lock could deadlock with cancel_delayed_work_sync in 
amdgpu_gfx_off_ctrl. */
+    if (!mutex_trylock(&adev->gfx.gfx_off_mutex)) {
+    /* If there's a bug which causes amdgpu_gfx_off_ctrl to be called with 
enable=true
+ * when adev->gfx.gfx_off_req_count is already 0, we might race with 
that.
+ * Re-schedule to make sure gfx off will be re-enabled in the HW 
eventually.
+ */
+    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
AMDGPU_GFX_OFF_DELAY_ENABLE);
+    return;


This is not needed and is just creating another thread to contend for mutex.


Still not sure what you mean by that. What other thread?


Sorry, I meant it schedules another workitem and delays GFXOFF enablement 
further. For ex: if it was another function like gfx_off_status holding the 
lock at the time of check.




The checks below take care of enabling gfxoff correctly. If it's already in 
gfx_off state, it doesn't do anything. So I don't see why this change is needed.


mutex_trylock is needed to prevent the deadlock discussed before and below.

schedule_delayed_work is needed due to this scenario hinted at by the comment:

1. amdgpu_gfx_off_ctrl locks mutex, calls schedule_delayed_work
2. amdgpu_device_delay_enable_gfx_off runs, calls mutex_trylock, which fails

GFXOFF would never get re-enabled in HW in this case (until amdgpu_gfx_off_ctrl 
calls schedule_delayed_work again).

(cancel_delayed_work_sync guarantees there's no pending delayed work when it 
returns, even if amdgpu_device_delay_enable_gfx_off calls schedule_delayed_work)



I think we need to explain based on the original code before. There is an 
asssumption here that the only other contention of this mutex is with the 
gfx_off_ctrl function.


Not really.



As far as I understand if the work has already started running when 
schedule_delayed_work is called, it will insert another in the work queue after 
delay. Based on that understanding I didn't find a problem with the original 
code.


Original code as in without this patch or the mod_delayed_work patch? If so, 
the problem is not when the work has already started running. It's that when it 
hasn't started running yet, schedule_delayed_work doesn't change the timeout 
for the already scheduled work, so it ends up enabling GFXOFF earlier than 
intended (and thus at all in scenarios when it's not supposed to).



I meant the original implementation of amdgpu_device_delay_enable_gfx_off().


If you indeed want to use _sync, there is a small problem with this 
implementation also which is roughly equivalent to the original problem you 
faced.

amdgpu_gfx_off_ctrl(disable) locks mutex
calls cancel_delayed_work_sync
amdgpu_device_delay_enable_gfx_off already started running
 mutex_trylock fails and schedules another one
amdgpu_gfx_off_ctrl(enable)
 schedules_delayed_work() - Delay is not extended, it's the same as when 
it's rearmed from wor

Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-13 Thread Lazar, Lijo




On 8/13/2021 8:10 PM, Michel Dänzer wrote:

On 2021-08-13 4:14 p.m., Lazar, Lijo wrote:

On 8/13/2021 7:04 PM, Michel Dänzer wrote:

On 2021-08-13 1:50 p.m., Lazar, Lijo wrote:

On 8/13/2021 3:59 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when GFXOFF transitions from
enabled to disabled. This makes sure the delayed work will be scheduled
as intended in the reverse case.

In order to avoid a deadlock, amdgpu_device_delay_enable_gfx_off needs
to use mutex_trylock instead of mutex_lock.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
     mod_delayed_work.

Signed-off-by: Michel Dänzer 
---
    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 13 +++--
    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |  3 +++
    3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..8b025f70706c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,7 +2777,16 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
    struct amdgpu_device *adev =
    container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
    -    mutex_lock(&adev->gfx.gfx_off_mutex);
+    /* mutex_lock could deadlock with cancel_delayed_work_sync in 
amdgpu_gfx_off_ctrl. */
+    if (!mutex_trylock(&adev->gfx.gfx_off_mutex)) {
+    /* If there's a bug which causes amdgpu_gfx_off_ctrl to be called with 
enable=true
+ * when adev->gfx.gfx_off_req_count is already 0, we might race with 
that.
+ * Re-schedule to make sure gfx off will be re-enabled in the HW 
eventually.
+ */
+    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
AMDGPU_GFX_OFF_DELAY_ENABLE);
+    return;


This is not needed and is just creating another thread to contend for mutex.


Still not sure what you mean by that. What other thread?


Sorry, I meant it schedules another workitem and delays GFXOFF enablement 
further. For ex: if it was another function like gfx_off_status holding the 
lock at the time of check.




The checks below take care of enabling gfxoff correctly. If it's already in 
gfx_off state, it doesn't do anything. So I don't see why this change is needed.


mutex_trylock is needed to prevent the deadlock discussed before and below.

schedule_delayed_work is needed due to this scenario hinted at by the comment:

1. amdgpu_gfx_off_ctrl locks mutex, calls schedule_delayed_work
2. amdgpu_device_delay_enable_gfx_off runs, calls mutex_trylock, which fails

GFXOFF would never get re-enabled in HW in this case (until amdgpu_gfx_off_ctrl 
calls schedule_delayed_work again).

(cancel_delayed_work_sync guarantees there's no pending delayed work when it 
returns, even if amdgpu_device_delay_enable_gfx_off calls schedule_delayed_work)



I think we need to explain based on the original code before. There is an 
asssumption here that the only other contention of this mutex is with the 
gfx_off_ctrl function.


Not really.



As far as I understand if the work has already started running when 
schedule_delayed_work is called, it will insert another in the work queue after 
delay. Based on that understanding I didn't find a problem with the original 
code.


Original code as in without this patch or the mod_delayed_work patch? If so, 
the problem is not when the work has already started running. It's that when it 
hasn't started running yet, schedule_delayed_work doesn't change the timeout 
for the already scheduled work, so it ends up enabling GFXOFF earlier than 
intended (and thus at all in scenarios when it's not supposed to).



I meant the original implementation of 
amdgpu_device_delay_enable_gfx_off().



If you indeed want to use _sync, there is a small problem with this 
implementation also which is roughly equivalent to the original problem 
you faced.


amdgpu_gfx_off_ctrl(disable) locks mutex
calls cancel_delayed_work_sync
amdgpu_device_delay_enable_gfx_off already started running
mutex_trylock fails and schedules another one
amdgpu_gfx_off_ctrl(enable)
	schedules_delayed_work() - Delay is not extended, it's the same as when 
it's rearmed from work item.


Probably, overthinking about the solution. Looking back, mod_ version is 
simpler :).

Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-13 Thread Lazar, Lijo




On 8/13/2021 7:04 PM, Michel Dänzer wrote:

On 2021-08-13 1:50 p.m., Lazar, Lijo wrote:



On 8/13/2021 3:59 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when GFXOFF transitions from
enabled to disabled. This makes sure the delayed work will be scheduled
as intended in the reverse case.

In order to avoid a deadlock, amdgpu_device_delay_enable_gfx_off needs
to use mutex_trylock instead of mutex_lock.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
    mod_delayed_work.

Signed-off-by: Michel Dänzer 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 13 +++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |  3 +++
   3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..8b025f70706c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,7 +2777,16 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
   struct amdgpu_device *adev =
   container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
   -    mutex_lock(&adev->gfx.gfx_off_mutex);
+    /* mutex_lock could deadlock with cancel_delayed_work_sync in 
amdgpu_gfx_off_ctrl. */
+    if (!mutex_trylock(&adev->gfx.gfx_off_mutex)) {
+    /* If there's a bug which causes amdgpu_gfx_off_ctrl to be called with 
enable=true
+ * when adev->gfx.gfx_off_req_count is already 0, we might race with 
that.
+ * Re-schedule to make sure gfx off will be re-enabled in the HW 
eventually.
+ */
+    schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
AMDGPU_GFX_OFF_DELAY_ENABLE);
+    return;


This is not needed and is just creating another thread to contend for mutex.


Still not sure what you mean by that. What other thread?


Sorry, I meant it schedules another workitem and delays GFXOFF 
enablement further. For ex: if it was another function like 
gfx_off_status holding the lock at the time of check.





The checks below take care of enabling gfxoff correctly. If it's already in 
gfx_off state, it doesn't do anything. So I don't see why this change is needed.


mutex_trylock is needed to prevent the deadlock discussed before and below.

schedule_delayed_work is needed due to this scenario hinted at by the comment:

1. amdgpu_gfx_off_ctrl locks mutex, calls schedule_delayed_work
2. amdgpu_device_delay_enable_gfx_off runs, calls mutex_trylock, which fails

GFXOFF would never get re-enabled in HW in this case (until amdgpu_gfx_off_ctrl 
calls schedule_delayed_work again).

(cancel_delayed_work_sync guarantees there's no pending delayed work when it 
returns, even if amdgpu_device_delay_enable_gfx_off calls schedule_delayed_work)



I think we need to explain based on the original code before. There is 
an asssumption here that the only other contention of this mutex is with 
the gfx_off_ctrl function. That is not true, so this is not the only 
case where mutex_trylock can fail. It could be because gfx_off_status is 
holding the lock.


As far as I understand if the work has already started running when 
schedule_delayed_work is called, it will insert another in the work 
queue after delay. Based on that understanding I didn't find a problem 
with the original code. Maybe, mutex_trylock is added to call _sync to 
make sure work is cancelled or not running but that breaks other 
assumptions.



The other problem is amdgpu_get_gfx_off_status() also uses the same mutex.


Not sure what for TBH. AFAICT there's only one implementation of this for 
Renoir, which just reads a register. (It's only called from debugfs)



I'm not sure either :) But as long as there are other functions that 
contend for the same lock, it's not good to implement based on 
assumptions only about a particular scenario.



So it won't be knowing which thread it would be contending against and blindly 
creates more work items.


There is only ever at most one instance of the delayed work at any time. 
amdgpu_device_delay_enable_gfx_off doesn't care whether amdgpu_gfx_off_ctrl or 
amdgpu_get_gfx_off_status is holding the mutex, it just keeps re-scheduling 
itself 100 ms later until it succeeds.



Yes, that is the problem, there could be cases where it could have gone 
to gfxoff

Re: [PATCH] drm/amdgpu: Cancel delayed work when GFXOFF is disabled

2021-08-13 Thread Lazar, Lijo




On 8/13/2021 3:59 PM, Michel Dänzer wrote:

From: Michel Dänzer 

schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if GFXOFF
was disabled and re-enabled again during those 100 ms.

This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).

To fix this, call cancel_delayed_work_sync when GFXOFF transitions from
enabled to disabled. This makes sure the delayed work will be scheduled
as intended in the reverse case.

In order to avoid a deadlock, amdgpu_device_delay_enable_gfx_off needs
to use mutex_trylock instead of mutex_lock.

v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
   mod_delayed_work.

Signed-off-by: Michel Dänzer 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c| 13 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h|  3 +++
  3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..8b025f70706c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,7 +2777,16 @@ static void amdgpu_device_delay_enable_gfx_off(struct 
work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, 
gfx.gfx_off_delay_work.work);
  
-	mutex_lock(&adev->gfx.gfx_off_mutex);

+   /* mutex_lock could deadlock with cancel_delayed_work_sync in 
amdgpu_gfx_off_ctrl. */
+   if (!mutex_trylock(&adev->gfx.gfx_off_mutex)) {
+   /* If there's a bug which causes amdgpu_gfx_off_ctrl to be 
called with enable=true
+* when adev->gfx.gfx_off_req_count is already 0, we might race 
with that.
+* Re-schedule to make sure gfx off will be re-enabled in the 
HW eventually.
+*/
+   schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
AMDGPU_GFX_OFF_DELAY_ENABLE);
+   return;


This is not needed and is just creating another thread to contend for 
mutex. The checks below take care of enabling gfxoff correctly. If it's 
already in gfx_off state, it doesn't do anything. So I don't see why 
this change is needed.


The other problem is amdgpu_get_gfx_off_status() also uses the same 
mutex. So it won't be knowing which thread it would be contending 
against and blindly creates more work items.



+   }
+
if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, true))
adev->gfx.gfx_off_state = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..da4c46db3093 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -28,9 +28,6 @@
  #include "amdgpu_rlc.h"
  #include "amdgpu_ras.h"
  
-/* delay 0.1 second to enable gfx off feature */

-#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
-
  /*
   * GPU GFX IP block helpers function.
   */
@@ -569,9 +566,13 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)
adev->gfx.gfx_off_req_count--;
  
  	if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {

-   schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);
-   } else if (!enable && adev->gfx.gfx_off_state) {
-   if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {
+   schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
AMDGPU_GFX_OFF_DELAY_ENABLE);
+   } else if (!enable) {
+   if (adev->gfx.gfx_off_req_count == 1 && 
!adev->gfx.gfx_off_state)
+   cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);


This has the deadlock problem as discussed in the other thread.

Thanks,
Lijo


+   if (adev->gfx.gfx_off_state &&
+   !amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {
adev->gfx.gfx_off_state = false;
  
  			if (adev->gfx.funcs->init_spm_golden) {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index d43fe2ed8116..dcdb505bb7f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -32,6 +32,9 @@
  #include "amdgpu_rlc.h"
  #include "soc15.h"
  
+/* delay 0.1 second to enable gfx off feature */

+#define AMDGPU_GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
+
  /* GFX current status */
  #define AMDGPU_GFX_NORMAL_MODE0xL
  #define AMDGPU_GFX_SAFE_MODE 

Re: [PATCH 2/2] drm/amdgpu: Use mod_delayed_work in JPEG/UVD/VCE/VCN ring_end_use hooks

2021-08-13 Thread Lazar, Lijo




On 8/13/2021 4:01 PM, Michel Dänzer wrote:

On 2021-08-13 6:23 a.m., Lazar, Lijo wrote:



On 8/12/2021 10:24 PM, Michel Dänzer wrote:

On 2021-08-12 1:33 p.m., Lazar, Lijo wrote:

On 8/12/2021 1:41 PM, Michel Dänzer wrote:

On 2021-08-12 7:55 a.m., Koenig, Christian wrote:

Hi James,

Evan seems to have understood how this all works together.

See while any begin/end use critical section is active the work should not be 
active.

When you handle only one ring you can just call cancel in begin use and 
schedule in end use. But when you have more than one ring you need a lock or 
counter to prevent concurrent work items to be started.

Michelle's idea to use mod_delayed_work is a bad one because it assumes that 
the delayed work is still running.


It merely assumes that the work may already have been scheduled before.

Admittedly, I missed the cancel_delayed_work_sync calls for patch 2. While I 
think it can still have some effect when there's a single work item for 
multiple rings, as described by James, it's probably negligible, since 
presumably the time intervals between ring_begin_use and ring_end_use are 
normally much shorter than a second.

So, while patch 2 is at worst a no-op (since mod_delayed_work is the same as 
schedule_delayed_work if the work hasn't been scheduled yet), I'm fine with 
dropping it.



Something similar applies to the first patch I think,


There are no cancel work calls in that case, so the commit log is accurate 
TTBOMK.


Curious -

For patch 1, does it make a difference if any delayed work scheduled is 
cancelled in the else part before proceeding?

} else if (!enable && adev->gfx.gfx_off_state) {
cancel_delayed_work();


I tried the patch below.

While this does seem to fix the problem as well, I see a potential issue:

1. amdgpu_gfx_off_ctrl locks adev->gfx.gfx_off_mutex
2. amdgpu_device_delay_enable_gfx_off runs, blocks in mutex_lock
3. amdgpu_gfx_off_ctrl calls cancel_delayed_work_sync

I'm afraid this would deadlock? (CONFIG_PROVE_LOCKING doesn't complain though)


Should use the cancel_delayed_work instead of the _sync version.


The thing is, it's not clear to me from cancel_delayed_work's description that 
it's guaranteed not to wait for amdgpu_device_delay_enable_gfx_off to finish if 
it's already running. If that's not guaranteed, it's prone to the same deadlock.


From what I understood from the the description, cancel initiates a 
cancel. If the work has already started, it returns false saying it 
couldn't succeed otherwise cancels out the scheduled work and returns 
true. In the note below, it asks to specifically use the _sync version 
if we need to wait for an already started work and that definitely has 
the problem of deadlock you mentioned above.


 * Note:
 * The work callback function may still be running on return, unless
 * it returns %true and the work doesn't re-arm itself.  Explicitly 
flush or

 * use cancel_delayed_work_sync() to wait on it.





As you mentioned - at best work is not scheduled yet and cancelled 
successfully, or at worst it's waiting for the mutex. In the worst case, if 
amdgpu_device_delay_enable_gfx_off gets the mutex after amdgpu_gfx_off_ctrl 
unlocks it, there is an extra check as below.

if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count)

The count wouldn't be 0 and hence it won't enable GFXOFF.


I'm not sure, but it might also be possible for amdgpu_device_delay_enable_gfx_off 
to get the mutex only after amdgpu_gfx_off_ctrl was called again and set 
adev->gfx.gfx_off_req_count back to 0.



Yes, this is a case we can't avoid in either case. If the work has 
already started, then mod_delayed_ also doesn't have any impact. Another 
case is work thread already got the mutex and a disable request comes 
just at that time. It needs to wait till mutex is released by work, that 
could mean enable gfxoff immediately followed by disable.





Maybe it's possible to fix it with cancel_delayed_work_sync somehow, but I'm 
not sure how offhand. (With cancel_delayed_work instead, I'm worried 
amdgpu_device_delay_enable_gfx_off might still enable GFXOFF in the HW 
immediately after amdgpu_gfx_off_ctrl unlocks the mutex. Then again, that might 
happen with mod_delayed_work as well...)


As mentioned earlier, cancel_delayed_work won't cause this issue.

In the mod_delayed_ patch, mod_ version is called only when req_count is 0. 
While that is a good thing, it keeps alive one more contender for the mutex.


Not sure what you mean. It leaves the possibility of 
amdgpu_device_delay_enable_gfx_off running just after amdgpu_gfx_off_ctrl tried 
to postpone it. As discussed above, something similar might be possible with 
cancel_delayed_work as well.



The mod_delayed is called only req_count gets back to 0. If there is 
another disable request comes after that, it does

Re: [PATCH 2/2] drm/amdgpu: Use mod_delayed_work in JPEG/UVD/VCE/VCN ring_end_use hooks

2021-08-12 Thread Lazar, Lijo




On 8/12/2021 10:24 PM, Michel Dänzer wrote:

On 2021-08-12 1:33 p.m., Lazar, Lijo wrote:

On 8/12/2021 1:41 PM, Michel Dänzer wrote:

On 2021-08-12 7:55 a.m., Koenig, Christian wrote:

Hi James,

Evan seems to have understood how this all works together.

See while any begin/end use critical section is active the work should not be 
active.

When you handle only one ring you can just call cancel in begin use and 
schedule in end use. But when you have more than one ring you need a lock or 
counter to prevent concurrent work items to be started.

Michelle's idea to use mod_delayed_work is a bad one because it assumes that 
the delayed work is still running.


It merely assumes that the work may already have been scheduled before.

Admittedly, I missed the cancel_delayed_work_sync calls for patch 2. While I 
think it can still have some effect when there's a single work item for 
multiple rings, as described by James, it's probably negligible, since 
presumably the time intervals between ring_begin_use and ring_end_use are 
normally much shorter than a second.

So, while patch 2 is at worst a no-op (since mod_delayed_work is the same as 
schedule_delayed_work if the work hasn't been scheduled yet), I'm fine with 
dropping it.



Something similar applies to the first patch I think,


There are no cancel work calls in that case, so the commit log is accurate 
TTBOMK.


Curious -

For patch 1, does it make a difference if any delayed work scheduled is 
cancelled in the else part before proceeding?

} else if (!enable && adev->gfx.gfx_off_state) {
cancel_delayed_work();


I tried the patch below.

While this does seem to fix the problem as well, I see a potential issue:

1. amdgpu_gfx_off_ctrl locks adev->gfx.gfx_off_mutex
2. amdgpu_device_delay_enable_gfx_off runs, blocks in mutex_lock
3. amdgpu_gfx_off_ctrl calls cancel_delayed_work_sync

I'm afraid this would deadlock? (CONFIG_PROVE_LOCKING doesn't complain though)



Should use the cancel_delayed_work instead of the _sync version. As you 
mentioned - at best work is not scheduled yet and cancelled 
successfully, or at worst it's waiting for the mutex. In the worst case, 
if amdgpu_device_delay_enable_gfx_off gets the mutex after 
amdgpu_gfx_off_ctrl unlocks it, there is an extra check as below.


if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count)

The count wouldn't be 0 and hence it won't enable GFXOFF.



Maybe it's possible to fix it with cancel_delayed_work_sync somehow, but I'm 
not sure how offhand. (With cancel_delayed_work instead, I'm worried 
amdgpu_device_delay_enable_gfx_off might still enable GFXOFF in the HW 
immediately after amdgpu_gfx_off_ctrl unlocks the mutex. Then again, that might 
happen with mod_delayed_work as well...)


As mentioned earlier, cancel_delayed_work won't cause this issue.

In the mod_delayed_ patch, mod_ version is called only when req_count is 
0. While that is a good thing, it keeps alive one more contender for the 
mutex.


The cancel_ version eliminates that contender if happens to be called at 
the right time (more likely if there are multiple requests to disable 
gfxoff). On the other hand, don't know how costly it is to call cancel_ 
every time on the else part (or maybe call only once when count 
increments to 1?).


Thanks,
Lijo




diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index a0be0772c8b3..3e4585ffb9af 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

@@ -570,8 +570,11 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool 
enable)



 if (enable && !adev->gfx.gfx_off_state && 
!adev->gfx.gfx_off_req_count) {

 schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 
GFX_OFF_DELAY_ENABLE);

-   } else if (!enable && adev->gfx.gfx_off_state) {

-   if (!amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {

+   } else if (!enable) {

+   cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);

+

+   if (adev->gfx.gfx_off_state &&

+   !amdgpu_dpm_set_powergating_by_smu(adev, 
AMD_IP_BLOCK_TYPE_GFX, false)) {

 adev->gfx.gfx_off_state = false;



 if (adev->gfx.funcs->init_spm_golden) {





Re: [PATCH 2/2] drm/amdgpu: Use mod_delayed_work in JPEG/UVD/VCE/VCN ring_end_use hooks

2021-08-12 Thread Lazar, Lijo




On 8/12/2021 1:41 PM, Michel Dänzer wrote:

On 2021-08-12 7:55 a.m., Koenig, Christian wrote:

Hi James,

Evan seems to have understood how this all works together.

See while any begin/end use critical section is active the work should not be 
active.

When you handle only one ring you can just call cancel in begin use and 
schedule in end use. But when you have more than one ring you need a lock or 
counter to prevent concurrent work items to be started.

Michelle's idea to use mod_delayed_work is a bad one because it assumes that 
the delayed work is still running.


It merely assumes that the work may already have been scheduled before.

Admittedly, I missed the cancel_delayed_work_sync calls for patch 2. While I 
think it can still have some effect when there's a single work item for 
multiple rings, as described by James, it's probably negligible, since 
presumably the time intervals between ring_begin_use and ring_end_use are 
normally much shorter than a second.

So, while patch 2 is at worst a no-op (since mod_delayed_work is the same as 
schedule_delayed_work if the work hasn't been scheduled yet), I'm fine with 
dropping it.



Something similar applies to the first patch I think,


There are no cancel work calls in that case, so the commit log is accurate 
TTBOMK.


Curious -

For patch 1, does it make a difference if any delayed work scheduled is 
cancelled in the else part before proceeding?


} else if (!enable && adev->gfx.gfx_off_state) {
cancel_delayed_work();


Thanks,
Lijo



I noticed this because current mutter Git main wasn't able to sustain 60 fps on 
Navi 14 with a simple glxgears -fullscreen. mutter was dropping frames because 
its CPU work for a frame update occasionally took up to 3 ms, instead of the 
normal 2-300 microseconds. sysprof showed a lot of cycles spent in the 
functions which enable/disable GFXOFF in the HW.



so when this makes a difference it is actually a bug.


There was certainly a bug though, which patch 1 fixes. :)




Re: [PATCH v5 03/27] drm/amdgpu: Split amdgpu_device_fini into early and late

2021-04-29 Thread Lazar, Lijo
[AMD Official Use Only - Internal Distribution Only]

That looks better to me :) As more things get added, I don't know how long you 
will be able to hold sw/hw cleanup separate and the name could confuse 
eventually.

Thanks,
Lijo
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v5 03/27] drm/amdgpu: Split amdgpu_device_fini into early and late

2021-04-29 Thread Lazar, Lijo
[AMD Official Use Only - Internal Distribution Only]

sysfs cleanup is a sw cleanup to me but done inside hw fini. sw/hw separation 
is not strictly followed, or name it like stage1/stage2 fini.

Thanks,
Lijo

From: amd-gfx  on behalf of Andrey 
Grodzovsky 
Sent: Wednesday, April 28, 2021 8:41:43 PM
To: dri-devel@lists.freedesktop.org ; 
amd-...@lists.freedesktop.org ; 
linux-...@vger.kernel.org ; 
ckoenig.leichtzumer...@gmail.com ; 
daniel.vet...@ffwll.ch ; Wentland, Harry 

Cc: Grodzovsky, Andrey ; gre...@linuxfoundation.org 
; Kuehling, Felix ; 
ppaala...@gmail.com ; helg...@kernel.org 
; Deucher, Alexander 
Subject: [PATCH v5 03/27] drm/amdgpu: Split amdgpu_device_fini into early and 
late

Some of the stuff in amdgpu_device_fini such as HW interrupts
disable and pending fences finilization must be done right away on
pci_remove while most of the stuff which relates to finilizing and
releasing driver data structures can be kept until
drm_driver.release hook is called, i.e. when the last device
reference is dropped.

v4: Change functions prefix early->hw and late->sw

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  6 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  7 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 15 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c| 26 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h|  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 12 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  3 ++-
 drivers/gpu/drm/amd/amdgpu/cik_ih.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/cz_ih.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/iceland_ih.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/si_ih.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/tonga_ih.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/vega20_ih.c |  2 +-
 17 files changed, 79 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1af2fa1591fd..fddb82897e5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1073,7 +1073,9 @@ static inline struct amdgpu_device 
*amdgpu_ttm_adev(struct ttm_device *bdev)

 int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags);
-void amdgpu_device_fini(struct amdgpu_device *adev);
+void amdgpu_device_fini_hw(struct amdgpu_device *adev);
+void amdgpu_device_fini_sw(struct amdgpu_device *adev);
+
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);

 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
@@ -1289,6 +1291,8 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
 int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void amdgpu_driver_postclose_kms(struct drm_device *dev,
  struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
 int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
 int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6447cd6ca5a8..8d22b79fc1cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3590,14 +3590,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  * Tear down the driver info (all asics).
  * Called at driver shutdown.
  */
-void amdgpu_device_fini(struct amdgpu_device *adev)
+void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 {
 dev_info(adev->dev, "amdgpu: finishing device.\n");
 flush_delayed_work(&adev->delayed_init_work);
 adev->shutdown = true;

-   kfree(adev->pci_state);
-
 /* make sure IB test finished before entering exclusive mode
  * to avoid preemption on IB test
  * */
@@ -3614,11 +3612,24 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 else
 drm_atomic_helper_shutdown(adev_to_drm(adev));
 }
-   amdgpu_fence_driver_fini(adev);
+   amdgpu_fence_driver_fini_hw(adev);
+
 if (adev->pm_sysfs_en)
 amdgpu_pm_sysfs_fini(adev);
+   if (adev->ucode_sysfs_en)
+   amdgpu_ucode_sysfs_fini(adev);
+   sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
+
+
 amdgpu_fbdev_fini(adev);
+
+   amdgpu_irq_fini_hw(adev);
+}
+
+void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+{
 amdgpu_device_ip_fini(adev);
+   amdgpu_fence_driver_fini_sw(adev);
 release_firmware(adev->firmware.gpu_info_fw);
 adev->fir