[PATCH] drm/amdgpu: expose num_hops and num_links xgmi info through dev attr

2023-06-08 Thread Shiwu Zhang
Add these two dev attrs for xgmi info details which is helpful for
developers checking the xgmi topology by catting the sys file directly.

Take 4 cards with xgmi connection as an example, get the num_hops for each
device or node through xmig_hive_info dir like,
cat /sys/bus/pci/devices/:41:00.0/xgmi_hive_info/node1/num_hops
will return "00 41 41 41" where "00" stands for the hops to node1 itself
and "41" is the hops in hex format to every other node in the same hive.
There are node1/node2/node3/node4 representing 4 cards in the hive.

The same for num_links dev attr.

Signed-off-by: Shiwu Zhang 
Acked-by: Le Ma 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 46 
 1 file changed, 46 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 44822dd98e5e..9c0fc23ed5b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -331,6 +331,36 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device 
*dev,
 
 }
 
+static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
+struct device_attribute *attr,
+char *buf)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   struct psp_xgmi_topology_info *top = >psp.xgmi_context.top_info;
+   int i;
+   for (i=0; i < top->num_nodes; i++) {
+   sprintf(buf + 3*i, "%02x ", top->nodes[i].num_hops);
+   }
+
+   return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
+struct device_attribute *attr,
+char *buf)
+{
+   struct drm_device *ddev = dev_get_drvdata(dev);
+   struct amdgpu_device *adev = drm_to_adev(ddev);
+   struct psp_xgmi_topology_info *top = >psp.xgmi_context.top_info;
+   int i;
+   for (i=0; i < top->num_nodes; i++) {
+   sprintf(buf + 3*i, "%02x ", top->nodes[i].num_links);
+   }
+
+   return sysfs_emit(buf, "%s\n", buf);
+}
+
 #define AMDGPU_XGMI_SET_FICAA(o)   ((o) | 0x456801)
 static ssize_t amdgpu_xgmi_show_error(struct device *dev,
  struct device_attribute *attr,
@@ -367,6 +397,8 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
 
 static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
 static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
+static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
+static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
 
 static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
 struct amdgpu_hive_info *hive)
@@ -386,6 +418,15 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct 
amdgpu_device *adev,
if (ret)
pr_err("failed to create xgmi_error\n");
 
+   /* Create xgmi num hops file */
+   ret = device_create_file(adev->dev, _attr_xgmi_num_hops);
+   if (ret)
+   pr_err("failed to create xgmi_num_hops\n");
+
+   /* Create xgmi num links file */
+   ret = device_create_file(adev->dev, _attr_xgmi_num_links);
+   if (ret)
+   pr_err("failed to create xgmi_num_links\n");
 
/* Create sysfs link to hive info folder on the first device */
if (hive->kobj.parent != (>dev->kobj)) {
@@ -413,6 +454,9 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct 
amdgpu_device *adev,
 
 remove_file:
device_remove_file(adev->dev, _attr_xgmi_device_id);
+   device_remove_file(adev->dev, _attr_xgmi_error);
+   device_remove_file(adev->dev, _attr_xgmi_num_hops);
+   device_remove_file(adev->dev, _attr_xgmi_num_links);
 
 success:
return ret;
@@ -426,6 +470,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct 
amdgpu_device *adev,
 
device_remove_file(adev->dev, _attr_xgmi_device_id);
device_remove_file(adev->dev, _attr_xgmi_error);
+   device_remove_file(adev->dev, _attr_xgmi_num_hops);
+   device_remove_file(adev->dev, _attr_xgmi_num_links);
 
if (hive->kobj.parent != (>dev->kobj))
sysfs_remove_link(>dev->kobj,"xgmi_hive_info");
-- 
2.17.1



Re: [PATCH v2] drm/amd/pm: enable more Pstates profile levels for yellow_carp

2023-06-08 Thread Huang, Tim
[Public]

Hi Shikai,


From: Guo, Shikai 
Sent: Thursday, June 8, 2023 11:27 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Liang, Prike ; Liu, Aaron ; Huang, 
Tim ; Deucher, Alexander ; Guo, 
Shikai 
Subject: [PATCH v2] drm/amd/pm: enable more Pstates profile levels for 
yellow_carp

This patch enables following UMD stable Pstates profile levels for 
power_dpm_force_performance_level interface.

- profile_peak
- profile_min_mclk
- profile_min_sclk
- profile_standard

Signed-off-by: shikaguo 
---
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 94 ++-
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h  |  5 +-
 2 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index a92da336ecec..5c968ab2ea8d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -957,6 +957,9 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
 uint32_t max)
 {
 enum smu_message_type msg_set_min, msg_set_max;
+   uint32_t min_clk = min;
+   uint32_t max_clk = max;
+
 int ret = 0;

 if (!yellow_carp_clk_dpm_is_enabled(smu, clk_type))
@@ -985,11 +988,17 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
 return -EINVAL;
 }

-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL);
+   if (clk_type == SMU_VCLK) {
+   min_clk = min << SMU_13_VCLK_SHIFT;
+   max_clk = max << SMU_13_VCLK_SHIFT;
+   }
+
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL);
+
 if (ret)
 goto out;

-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max_clk, NULL);
 if (ret)
 goto out;

@@ -1107,6 +1116,50 @@ static int yellow_carp_force_clk_levels(struct 
smu_context *smu,
 return ret;
 }

+static int yellow_carp_get_dpm_profile_freq(struct smu_context *smu,
+   enum amd_dpm_forced_level level,
+   enum smu_clk_type clk_type,
+   uint32_t *min_clk,
+   uint32_t *max_clk)
+{
+   int ret = 0;
+   uint32_t clk_limit = 0;
+
+   switch (clk_type) {
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   clk_limit = YELLOW_CARP_UMD_PSTATE_GFXCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, 
_limit);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_SCLK, 
_limit, NULL);
+   break;
+   case SMU_SOCCLK:
+   clk_limit = YELLOW_CARP_UMD_PSTATE_SOCCLK;
+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_SOCCLK, 
NULL, _limit);
+   break;
+   case SMU_FCLK:
+   clk_limit = YELLOW_CARP_UMD_PSTATE_FCLK;

This is shared by other APU ASICs, like the SMU v13.0.8. We should need to 
apply the different profiling standard clocks for sclk/fclk/socclk according to
the IP version checking.  Thanks.

Tim


+   if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, 
_limit);
+   else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK)
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_FCLK, 
_limit, NULL);
+   break;
+   case SMU_VCLK:
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_VCLK, NULL, 
_limit);
+   break;
+   case SMU_DCLK:
+   yellow_carp_get_dpm_ultimate_freq(smu, SMU_DCLK, NULL, 
_limit);
+   break;
+   default:
+   ret = -EINVAL;
+   break;
+   }
+   *min_clk = *max_clk = clk_limit;
+   return ret;
+}
+
 static int yellow_carp_set_performance_level(struct smu_context *smu,
 enum amd_dpm_forced_level 
level)
 {
@@ -1114,6 +1167,9 @@ static int yellow_carp_set_performance_level(struct 
smu_context *smu,
 uint32_t sclk_min = 0, sclk_max = 0;
 uint32_t fclk_min = 0, fclk_max = 0;
 uint32_t socclk_min = 0, socclk_max = 0;
+   uint32_t vclk_min = 0, vclk_max = 0;
+   uint32_t dclk_min = 0, dclk_max = 0;
+
 int ret = 0;

 switch (level) {
@@ -1121,28 +1177,42 @@ static int yellow_carp_set_performance_level(struct 
smu_context *smu,
 

[PATCH v3 0/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register

2023-06-08 Thread Sui Jingfeng
From: Sui Jingfeng 

Patch 1,2 and 3 do basic clean up to the vgaarb module.
Patch 4 introduce is_boot_device function callback to vga_client_register

Sui Jingfeng (4):
  PCI/VGA: tidy up the code and comment format
  PCI/VGA: Use unsigned type for the io_state variable
  PCI/VGA: only deal with VGA class devices
  PCI/VGA: introduce is_boot_device function callback to
vga_client_register

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
 drivers/gpu/drm/i915/display/intel_vga.c   |  3 +-
 drivers/gpu/drm/nouveau/nouveau_vga.c  |  2 +-
 drivers/gpu/drm/radeon/radeon_device.c |  2 +-
 drivers/pci/vgaarb.c   | 72 +-
 drivers/vfio/pci/vfio_pci_core.c   |  2 +-
 include/linux/vgaarb.h | 16 ++---
 7 files changed, 57 insertions(+), 42 deletions(-)

-- 
2.25.1



[PATCH v3 4/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register

2023-06-08 Thread Sui Jingfeng
From: Sui Jingfeng 

The vga_is_firmware_default() function is arch-dependent, which doesn't
sound right. At least, it also works on the Mips and LoongArch platforms.
Tested with the drm/amdgpu and drm/radeon drivers. However, it's difficult
to enumerate all arch-driver combinations. I'm wrong if there is only one
exception.

With the observation that device drivers typically have better knowledge
about which PCI bar contains the firmware framebuffer, which could avoid
the need to iterate all of the PCI BARs.

But as a PCI function at pci/vgaarb.c, vga_is_firmware_default() is
probably not suitable to make such an optimization for a specific device.

There are PCI display controllers that don't have a dedicated VRAM bar,
this function will lose its effectiveness in such a case. Luckily, the
device driver can provide an accurate workaround.

Therefore, this patch introduces a callback that allows the device driver
to tell the VGAARB if the device is the default boot device. This patch
only intends to introduce the mechanism, while the implementation is left
to the device driver authors. Also honor the comment: "Clients have two
callback mechanisms they can use"

Signed-off-by: Sui Jingfeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
 drivers/gpu/drm/i915/display/intel_vga.c   |  3 +--
 drivers/gpu/drm/nouveau/nouveau_vga.c  |  2 +-
 drivers/gpu/drm/radeon/radeon_device.c |  2 +-
 drivers/pci/vgaarb.c   | 22 ++
 drivers/vfio/pci/vfio_pci_core.c   |  2 +-
 include/linux/vgaarb.h |  8 +---
 7 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5c7d40873ee2..7a096f2d5c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3960,7 +3960,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* this will fail for cards that aren't VGA class devices, just
 * ignore it */
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
-   vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
+   vga_client_register(adev->pdev, amdgpu_device_vga_set_decode, 
NULL);
 
px = amdgpu_device_supports_px(ddev);
 
diff --git a/drivers/gpu/drm/i915/display/intel_vga.c 
b/drivers/gpu/drm/i915/display/intel_vga.c
index 286a0bdd28c6..98d7d4dffe9f 100644
--- a/drivers/gpu/drm/i915/display/intel_vga.c
+++ b/drivers/gpu/drm/i915/display/intel_vga.c
@@ -115,7 +115,6 @@ intel_vga_set_decode(struct pci_dev *pdev, bool 
enable_decode)
 
 int intel_vga_register(struct drm_i915_private *i915)
 {
-
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
int ret;
 
@@ -127,7 +126,7 @@ int intel_vga_register(struct drm_i915_private *i915)
 * then we do not take part in VGA arbitration and the
 * vga_client_register() fails with -ENODEV.
 */
-   ret = vga_client_register(pdev, intel_vga_set_decode);
+   ret = vga_client_register(pdev, intel_vga_set_decode, NULL);
if (ret && ret != -ENODEV)
return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c 
b/drivers/gpu/drm/nouveau/nouveau_vga.c
index f8bf0ec26844..162b4f4676c7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -92,7 +92,7 @@ nouveau_vga_init(struct nouveau_drm *drm)
return;
pdev = to_pci_dev(dev->dev);
 
-   vga_client_register(pdev, nouveau_vga_set_decode);
+   vga_client_register(pdev, nouveau_vga_set_decode, NULL);
 
/* don't register Thunderbolt eGPU with vga_switcheroo */
if (pci_is_thunderbolt_attached(pdev))
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index afbb3a80c0c6..71f2ff39d6a1 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1425,7 +1425,7 @@ int radeon_device_init(struct radeon_device *rdev,
/* if we have > 1 VGA cards, then disable the radeon VGA resources */
/* this will fail for cards that aren't VGA class devices, just
 * ignore it */
-   vga_client_register(rdev->pdev, radeon_vga_set_decode);
+   vga_client_register(rdev->pdev, radeon_vga_set_decode, NULL);
 
if (rdev->flags & RADEON_IS_PX)
runtime = true;
diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index b0bf4952a95d..d3dab61e0ef2 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -53,6 +53,7 @@ struct vga_device {
bool bridge_has_one_vga;
bool is_firmware_default;   /* device selected by firmware */
unsigned int (*set_decode)(struct pci_dev *pdev, bool decode);
+   bool (*is_boot_device)(struct pci_dev *pdev);
 };
 
 static LIST_HEAD(vga_list);
@@ -614,10 +615,17 @@ static bool vga_is_boot_device(struct vga_device *vgadev)

[PATCH v3 2/4] PCI/VGA: Use unsigned type for the io_state variable

2023-06-08 Thread Sui Jingfeng
From: Sui Jingfeng 

The io_state variable in the vga_arb_write() function is declared with
unsigned int type, while the vga_str_to_iostate() function takes int *
type. To keep them consistent, replace the third argument of
vga_str_to_iostate() function with the unsigned int * type.

Signed-off-by: Sui Jingfeng 
---
 drivers/pci/vgaarb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index bf2921e3cb06..7f0347f4f6fd 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -76,7 +76,7 @@ static const char *vga_iostate_to_str(unsigned int iostate)
return "none";
 }
 
-static int vga_str_to_iostate(char *buf, int str_size, int *io_state)
+static int vga_str_to_iostate(char *buf, int str_size, unsigned int *io_state)
 {
/* we could in theory hand out locks on IO and mem
 * separately to userspace but it can cause deadlocks
-- 
2.25.1



[PATCH v3 1/4] PCI/VGA: tidy up the code and comment format

2023-06-08 Thread Sui Jingfeng
From: Sui Jingfeng 

This patch replaces the leading space with a tab and removes the double
blank line, no functional change.

Signed-off-by: Sui Jingfeng 
---
 drivers/pci/vgaarb.c   | 26 +-
 include/linux/vgaarb.h |  8 +++-
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index 5a696078b382..bf2921e3cb06 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -61,7 +61,6 @@ static bool vga_arbiter_used;
 static DEFINE_SPINLOCK(vga_lock);
 static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue);
 
-
 static const char *vga_iostate_to_str(unsigned int iostate)
 {
/* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */
@@ -80,7 +79,8 @@ static const char *vga_iostate_to_str(unsigned int iostate)
 static int vga_str_to_iostate(char *buf, int str_size, int *io_state)
 {
/* we could in theory hand out locks on IO and mem
-* separately to userspace but it can cause deadlocks */
+* separately to userspace but it can cause deadlocks
+*/
if (strncmp(buf, "none", 4) == 0) {
*io_state = VGA_RSRC_NONE;
return 1;
@@ -99,7 +99,7 @@ static int vga_str_to_iostate(char *buf, int str_size, int 
*io_state)
return 1;
 }
 
-/* this is only used a cookie - it should not be dereferenced */
+/* This is only used as cookie, it should not be dereferenced */
 static struct pci_dev *vga_default;
 
 /* Find somebody in our list */
@@ -194,13 +194,15 @@ int vga_remove_vgacon(struct pci_dev *pdev)
 EXPORT_SYMBOL(vga_remove_vgacon);
 
 /* If we don't ever use VGA arb we should avoid
-   turning off anything anywhere due to old X servers getting
-   confused about the boot device not being VGA */
+ * turning off anything anywhere due to old X servers getting
+ * confused about the boot device not being VGA
+ */
 static void vga_check_first_use(void)
 {
/* we should inform all GPUs in the system that
 * VGA arb has occurred and to try and disable resources
-* if they can */
+* if they can
+*/
if (!vga_arbiter_used) {
vga_arbiter_used = true;
vga_arbiter_notify_clients();
@@ -956,9 +958,9 @@ EXPORT_SYMBOL(vga_set_legacy_decoding);
  * @set_decode callback: If a client can disable its GPU VGA resource, it
  * will get a callback from this to set the encode/decode state.
  *
- * Rationale: we cannot disable VGA decode resources unconditionally some 
single
- * GPU laptops seem to require ACPI or BIOS access to the VGA registers to
- * control things like backlights etc.  Hopefully newer multi-GPU laptops do
+ * Rationale: we cannot disable VGA decode resources unconditionally, some
+ * single GPU laptops seem to require ACPI or BIOS access to the VGA registers
+ * to control things like backlights etc. Hopefully newer multi-GPU laptops do
  * something saner, and desktops won't have any special ACPI for this. The
  * driver will get a callback when VGA arbitration is first used by userspace
  * since some older X servers have issues.
@@ -988,7 +990,6 @@ int vga_client_register(struct pci_dev *pdev,
 bail:
spin_unlock_irqrestore(_lock, flags);
return ret;
-
 }
 EXPORT_SYMBOL(vga_client_register);
 
@@ -1079,7 +1080,6 @@ static int vga_pci_str_to_vars(char *buf, int count, 
unsigned int *domain,
int n;
unsigned int slot, func;
 
-
n = sscanf(buf, "PCI:%x:%x:%x.%x", domain, bus, , );
if (n != 4)
return 0;
@@ -1431,7 +1431,6 @@ static int vga_arb_open(struct inode *inode, struct file 
*file)
priv->cards[0].io_cnt = 0;
priv->cards[0].mem_cnt = 0;
 
-
return 0;
 }
 
@@ -1544,7 +1543,8 @@ static int __init vga_arb_device_init(void)
bus_register_notifier(_bus_type, _notifier);
 
/* We add all PCI devices satisfying VGA class in the arbiter by
-* default */
+* default
+*/
pdev = NULL;
while ((pdev =
pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index b4b9137f9792..d36225c582ee 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -23,9 +23,7 @@
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS
- * IN THE SOFTWARE.
- *
+ * DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef LINUX_VGA_H
@@ -96,7 +94,7 @@ static inline int vga_client_register(struct pci_dev *pdev,
 static inline int vga_get_interruptible(struct pci_dev *pdev,
unsigned int rsrc)
 {
-   return vga_get(pdev, rsrc, 1);
+   return vga_get(pdev, rsrc, 1);
 }
 
 /**
@@ -111,7 +109,7 @@ static inline int vga_get_interruptible(struct pci_dev 
*pdev,
 static inline int 

[PATCH v3 3/4] PCI/VGA: only deal with VGA class devices

2023-06-08 Thread Sui Jingfeng
From: Sui Jingfeng 

vgaarb only deal with the VGA devcie(pdev->class == 0x0300), so replace the
pci_get_subsys() function with pci_get_class(). Filter the non pci display
device out. There no need to process the non display PCI device.

Signed-off-by: Sui Jingfeng 
---
 drivers/pci/vgaarb.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index 7f0347f4f6fd..b0bf4952a95d 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -756,10 +756,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev 
*pdev)
struct pci_dev *bridge;
u16 cmd;
 
-   /* Only deal with VGA class devices */
-   if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
-   return false;
-
/* Allocate structure */
vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
if (vgadev == NULL) {
@@ -1499,7 +1495,9 @@ static int pci_notify(struct notifier_block *nb, unsigned 
long action,
struct pci_dev *pdev = to_pci_dev(dev);
bool notify = false;
 
-   vgaarb_dbg(dev, "%s\n", __func__);
+   /* Only deal with VGA class devices */
+   if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+   return 0;
 
/* For now we're only intereted in devices added and removed. I didn't
 * test this thing here, so someone needs to double check for the
@@ -1509,6 +1507,8 @@ static int pci_notify(struct notifier_block *nb, unsigned 
long action,
else if (action == BUS_NOTIFY_DEL_DEVICE)
notify = vga_arbiter_del_pci_device(pdev);
 
+   vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
+
if (notify)
vga_arbiter_notify_clients();
return 0;
@@ -1533,8 +1533,8 @@ static struct miscdevice vga_arb_device = {
 
 static int __init vga_arb_device_init(void)
 {
+   struct pci_dev *pdev = NULL;
int rc;
-   struct pci_dev *pdev;
 
rc = misc_register(_arb_device);
if (rc < 0)
@@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
/* We add all PCI devices satisfying VGA class in the arbiter by
 * default
 */
-   pdev = NULL;
-   while ((pdev =
-   pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
-  PCI_ANY_ID, pdev)) != NULL)
+   while (1) {
+   pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
+   if (!pdev)
+   break;
+
vga_arbiter_add_pci_device(pdev);
+   };
 
pr_info("loaded\n");
return rc;
-- 
2.25.1



Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit

2023-06-08 Thread Michel Dänzer
On 6/7/23 19:35, Pillai, Aurabindo wrote:
> 
> Do you see the issue if you force disable FAMS?
Neither hang occurs with FAMS disabled.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit

2023-06-08 Thread Pillai, Aurabindo
[Public]

Thanks Michel,

I reached out to windows driver team, and they have a monitor specific quirk to 
disable FAMS on this model. I suspect the issue is only present on certain fw 
revisions on the monitor which is why we cant see your issue.

Unfortunately, having the patches in question reverted causes hangs with 3 
monitor setups. So I will push that monitor specific quirk and bring back the 
reverted patches.

--

Regards,
Jay

From: Michel Dänzer 
Sent: Thursday, June 8, 2023 10:20 AM
To: Pillai, Aurabindo ; Zhuo, Qingqing (Lillian) 
; Chalmers, Wesley 
Cc: Wang, Chao-kai (Stylon) ; Li, Sun peng (Leo) 
; Lakha, Bhawanpreet ; Siqueira, 
Rodrigo ; Li, Roman ; 
amd-gfx@lists.freedesktop.org ; Chiu, Solomon 
; Lin, Wayne ; Wentland, Harry 
; Gutierrez, Agustin ; 
Kotarac, Pavle 
Subject: Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit

On 6/7/23 19:35, Pillai, Aurabindo wrote:
>
> Do you see the issue if you force disable FAMS?
Neither hang occurs with FAMS disabled.


--
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



[PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new message

2023-06-08 Thread Wayne Lin
[Why]
The sequence for collecting down_reply from source perspective should
be:

Request_n->repeat (get partial reply of Request_n->clear message ready
flag to ack DPRX that the message is received) till all partial
replies for Request_n are received->new Request_n+1.

Now there is chance that drm_dp_mst_hpd_irq() will fire new down
request in the tx queue when the down reply is incomplete. Source is
restricted to generate interveleaved message transactions so we should
avoid it.

Also, while assembling partial reply packets, reading out DPCD DOWN_REP
Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag should be
wrapped up as a complete operation for reading out a reply packet.
Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might
be risky. e.g. If the reply of the new request has overwritten the
DPRX DOWN_REP Sideband MSG buffer before source writing one to clear
DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply
for the new request. Should handle the up request in the same way.

[How]
Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ
event, driver calls drm_dp_mst_hpd_irq_send_new_request() and might
trigger drm_dp_mst_kick_tx() only when there is no on going message
transaction.

Changes since v1:
* Reworked on review comments received
-> Adjust the fix to let driver explicitly kick off new down request
when mst irq event is handled and acked
-> Adjust the commit message

Changes since v2:
* Adjust the commit message
* Adjust the naming of the divided 2 functions and add a new input
  parameter "ack".
* Adjust code flow as per review comments.

Changes since v3:
* Update the function description of drm_dp_mst_hpd_irq_handle_event

Signed-off-by: Wayne Lin 
Cc: sta...@vger.kernel.org
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ++--
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 54 ---
 drivers/gpu/drm/i915/display/intel_dp.c   |  7 +--
 drivers/gpu/drm/nouveau/dispnv50/disp.c   | 12 +++--
 include/drm/display/drm_dp_mst_helper.h   |  7 ++-
 5 files changed, 82 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d5cec03eaa8d..597c3368bcfb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3236,6 +3236,7 @@ static void dm_handle_mst_sideband_msg(struct 
amdgpu_dm_connector *aconnector)
 {
u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
u8 dret;
+   u8 ack;
bool new_irq_handled = false;
int dpcd_addr;
int dpcd_bytes_to_read;
@@ -3265,34 +3266,36 @@ static void dm_handle_mst_sideband_msg(struct 
amdgpu_dm_connector *aconnector)
process_count < max_process_count) {
u8 retry;
dret = 0;
+   ack = 0;
 
process_count++;
 
DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], 
esi[2]);
/* handle HPD short pulse irq */
if (aconnector->mst_mgr.mst_state)
-   drm_dp_mst_hpd_irq(
-   >mst_mgr,
-   esi,
-   _irq_handled);
+   drm_dp_mst_hpd_irq_handle_event(>mst_mgr,
+   esi,
+   ,
+   _irq_handled);
 
if (new_irq_handled) {
/* ACK at DPCD to notify down stream */
-   const int ack_dpcd_bytes_to_write =
-   dpcd_bytes_to_read - 1;
-
for (retry = 0; retry < 3; retry++) {
-   u8 wret;
-
-   wret = drm_dp_dpcd_write(
-   >dm_dp_aux.aux,
-   dpcd_addr + 1,
-   [1],
-   ack_dpcd_bytes_to_write);
-   if (wret == ack_dpcd_bytes_to_write)
+   ssize_t wret;
+
+   wret = 
drm_dp_dpcd_writeb(>dm_dp_aux.aux,
+ dpcd_addr + 1,
+ ack);
+   if (wret == 1)
break;
}
 
+   if (retry == 3) {
+   DRM_ERROR("Failed to ack MST event.\n");
+   return;
+   }
+
+   
drm_dp_mst_hpd_irq_send_new_request(>mst_mgr);
/* check if there is new irq to be handled */
dret = 

[PATCH] drm/amdgpu: vcn_4_0 set instance 0 init sched score to 1

2023-06-08 Thread Sonny Jiang
From: Sonny Jiang 

Only vcn0 can process AV1 codecx. In order to use both vcn0 and
vcn1 in h264/265 transcode to AV1 cases, set vcn0 sched score to 1
at initialization time.

Signed-off-by: Sonny Jiang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 8d371faaa2b3..b48bb5212488 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -129,7 +129,11 @@ static int vcn_v4_0_sw_init(void *handle)
if (adev->vcn.harvest_config & (1 << i))
continue;
 
-   atomic_set(>vcn.inst[i].sched_score, 0);
+   /* Init instance 0 sched_score to 1, so it's scheduled after 
other instances */
+   if (i == 0)
+   atomic_set(>vcn.inst[i].sched_score, 1);
+   else
+   atomic_set(>vcn.inst[i].sched_score, 0);
 
/* VCN UNIFIED TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
-- 
2.34.1



[PATCH v3] drm/amd/pm: enable more Pstates profile levels for yellow_carp

2023-06-08 Thread shikaguo
This patch enables following UMD stable Pstates profile levels for 
power_dpm_force_performance_level interface.

- profile_peak
- profile_min_mclk
- profile_min_sclk
- profile_standard

Signed-off-by: shikaguo 
---
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 140 +-
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h  |   1 -
 2 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index a92da336ecec..71566c60372f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -47,6 +47,14 @@
 #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK0x0006L
 #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT  0x1L
 
+#define SMU_13_0_8_UMD_PSTATE_GFXCLK   533
+#define SMU_13_0_8_UMD_PSTATE_SOCCLK   533
+#define SMU_13_0_8_UMD_PSTATE_FCLK 800
+
+#define SMU_13_0_1_UMD_PSTATE_GFXCLK 700
+#define SMU_13_0_1_UMD_PSTATE_SOCCLK 678
+#define SMU_13_0_1_UMD_PSTATE_FCLK   1800
+
 #define FEATURE_MASK(feature) (1ULL << feature)
 #define SMC_DPM_FEATURE ( \
FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \
@@ -957,6 +965,9 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
uint32_t max)
 {
enum smu_message_type msg_set_min, msg_set_max;
+   uint32_t min_clk = min;
+   uint32_t max_clk = max;
+
int ret = 0;
 
if (!yellow_carp_clk_dpm_is_enabled(smu, clk_type))
@@ -985,11 +996,17 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
return -EINVAL;
}
 
-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL);
+   if (clk_type == SMU_VCLK) {
+   min_clk = min << SMU_13_VCLK_SHIFT;
+   max_clk = max << SMU_13_VCLK_SHIFT;
+   }
+
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL);
+
if (ret)
goto out;
 
-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max_clk, NULL);
if (ret)
goto out;
 
@@ -997,12 +1014,48 @@ static int 
yellow_carp_set_soft_freq_limited_range(struct smu_context *smu,
return ret;
 }
 
+static int yellow_carp_get_umd_pstate_clk_default(struct smu_context *smu,
+   enum smu_clk_type clk_type)
+{
+   uint32_t clk_limit = 0;
+   struct amdgpu_device *adev = smu->adev;
+   switch (clk_type) {
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8))
+   clk_limit = SMU_13_0_8_UMD_PSTATE_GFXCLK;
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) ||
+   (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 
3))
+   clk_limit = SMU_13_0_1_UMD_PSTATE_GFXCLK;
+   break;
+   case SMU_SOCCLK:
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8))
+   clk_limit = SMU_13_0_8_UMD_PSTATE_SOCCLK;
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) ||
+   (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 
3))
+   clk_limit = SMU_13_0_1_UMD_PSTATE_SOCCLK;
+   break;
+   case SMU_FCLK:
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8))
+   clk_limit = SMU_13_0_8_UMD_PSTATE_FCLK;
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) ||
+   (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 
3))
+   clk_limit = SMU_13_0_1_UMD_PSTATE_FCLK;
+   break;
+   default:
+   break;
+   }
+
+   return clk_limit;
+}
+
 static int yellow_carp_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
 {
int i, idx, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
uint32_t min, max;
+   uint32_t clk_limit = 0;
 
smu_cmn_get_sysfs_buf(, );
 
@@ -1044,6 +1097,7 @@ static int yellow_carp_print_clk_levels(struct 
smu_context *smu,
break;
case SMU_GFXCLK:
case SMU_SCLK:
+   clk_limit = yellow_carp_get_umd_pstate_clk_default(smu, 
clk_type);
ret = yellow_carp_get_current_clk_freq(smu, clk_type, 
_value);
if (ret)
goto print_clk_out;
@@ -1058,7 +1112,7 @@ static int yellow_carp_print_clk_levels(struct 
smu_context *smu,
 

Re: [PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new message

2023-06-08 Thread Jani Nikula
On Thu, 08 Jun 2023, Wayne Lin  wrote:
> [Why]
> The sequence for collecting down_reply from source perspective should
> be:
>
> Request_n->repeat (get partial reply of Request_n->clear message ready
> flag to ack DPRX that the message is received) till all partial
> replies for Request_n are received->new Request_n+1.
>
> Now there is chance that drm_dp_mst_hpd_irq() will fire new down
> request in the tx queue when the down reply is incomplete. Source is
> restricted to generate interveleaved message transactions so we should
> avoid it.
>
> Also, while assembling partial reply packets, reading out DPCD DOWN_REP
> Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag should be
> wrapped up as a complete operation for reading out a reply packet.
> Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might
> be risky. e.g. If the reply of the new request has overwritten the
> DPRX DOWN_REP Sideband MSG buffer before source writing one to clear
> DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply
> for the new request. Should handle the up request in the same way.
>
> [How]
> Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ
> event, driver calls drm_dp_mst_hpd_irq_send_new_request() and might
> trigger drm_dp_mst_kick_tx() only when there is no on going message
> transaction.
>
> Changes since v1:
> * Reworked on review comments received
> -> Adjust the fix to let driver explicitly kick off new down request
> when mst irq event is handled and acked
> -> Adjust the commit message
>
> Changes since v2:
> * Adjust the commit message
> * Adjust the naming of the divided 2 functions and add a new input
>   parameter "ack".
> * Adjust code flow as per review comments.
>
> Changes since v3:
> * Update the function description of drm_dp_mst_hpd_irq_handle_event
>
> Signed-off-by: Wayne Lin 
> Cc: sta...@vger.kernel.org
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ++--
>  drivers/gpu/drm/display/drm_dp_mst_topology.c | 54 ---
>  drivers/gpu/drm/i915/display/intel_dp.c   |  7 +--
>  drivers/gpu/drm/nouveau/dispnv50/disp.c   | 12 +++--
>  include/drm/display/drm_dp_mst_helper.h   |  7 ++-
>  5 files changed, 82 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index d5cec03eaa8d..597c3368bcfb 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -3236,6 +3236,7 @@ static void dm_handle_mst_sideband_msg(struct 
> amdgpu_dm_connector *aconnector)
>  {
>   u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
>   u8 dret;
> + u8 ack;
>   bool new_irq_handled = false;
>   int dpcd_addr;
>   int dpcd_bytes_to_read;
> @@ -3265,34 +3266,36 @@ static void dm_handle_mst_sideband_msg(struct 
> amdgpu_dm_connector *aconnector)
>   process_count < max_process_count) {
>   u8 retry;
>   dret = 0;
> + ack = 0;
>  
>   process_count++;
>  
>   DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], 
> esi[2]);
>   /* handle HPD short pulse irq */
>   if (aconnector->mst_mgr.mst_state)
> - drm_dp_mst_hpd_irq(
> - >mst_mgr,
> - esi,
> - _irq_handled);
> + drm_dp_mst_hpd_irq_handle_event(>mst_mgr,
> + esi,
> + ,
> + _irq_handled);
>  
>   if (new_irq_handled) {
>   /* ACK at DPCD to notify down stream */
> - const int ack_dpcd_bytes_to_write =
> - dpcd_bytes_to_read - 1;
> -
>   for (retry = 0; retry < 3; retry++) {
> - u8 wret;
> -
> - wret = drm_dp_dpcd_write(
> - >dm_dp_aux.aux,
> - dpcd_addr + 1,
> - [1],
> - ack_dpcd_bytes_to_write);
> - if (wret == ack_dpcd_bytes_to_write)
> + ssize_t wret;
> +
> + wret = 
> drm_dp_dpcd_writeb(>dm_dp_aux.aux,
> +   dpcd_addr + 1,
> +   ack);
> + if (wret == 1)
>   break;
>   }
>  
> + if (retry == 3) {
> + DRM_ERROR("Failed to ack MST event.\n");
> + return;
> + }
> +
> + 

Re: [PATCH] drm/amdgpu: vcn_4_0 set instance 0 init sched score to 1

2023-06-08 Thread Liu, Leo
[Public]

Reviewed-by: Leo Liu 

From: amd-gfx  on behalf of Sonny Jiang 

Sent: June 8, 2023 10:54 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Jiang, Sonny 
Subject: [PATCH] drm/amdgpu: vcn_4_0 set instance 0 init sched score to 1

From: Sonny Jiang 

Only vcn0 can process AV1 codecx. In order to use both vcn0 and
vcn1 in h264/265 transcode to AV1 cases, set vcn0 sched score to 1
at initialization time.

Signed-off-by: Sonny Jiang 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 8d371faaa2b3..b48bb5212488 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -129,7 +129,11 @@ static int vcn_v4_0_sw_init(void *handle)
 if (adev->vcn.harvest_config & (1 << i))
 continue;

-   atomic_set(>vcn.inst[i].sched_score, 0);
+   /* Init instance 0 sched_score to 1, so it's scheduled after 
other instances */
+   if (i == 0)
+   atomic_set(>vcn.inst[i].sched_score, 1);
+   else
+   atomic_set(>vcn.inst[i].sched_score, 0);

 /* VCN UNIFIED TRAP */
 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
--
2.34.1



Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2023-06-08 Thread Lucas Stach
Hi all,

and almost 2 years later I stumbled across this exact issue still being
present in the scheduler: if the driver bails out of the timeout
handling before calling drm_sched_stop(), the timeout job will be
leaked and the TDR timer will potentially not be restarted as the job
isn't put back in the pending_list.

How do we solve this? Apply the below suggestion?

Regards,
Lucas

Am Freitag, dem 20.08.2021 um 07:12 + schrieb Liu, Monk:
> [AMD Official Use Only]
> 
> @Daniel Vetter @Grodzovsky, Andrey @Koenig, Christian
>  
> Do you have any concern on the kthread_park() approach ?
> 
> Theoretically speaking sched_main shall run there exclusively with 
> job_timeout since they both touches jobs, and stop scheduler during 
> job_timeout won't impact performance since in that scenario
> There was already something wrong/stuck on that ring/scheduler 
> 
> Thanks 
> 
> --
> Monk Liu | Cloud-GPU Core team
> --
> 
> -Original Message-
> From: Liu, Monk 
> Sent: Thursday, August 19, 2021 6:26 PM
> To: Daniel Vetter ; Grodzovsky, Andrey 
> 
> Cc: Alex Deucher ; Chen, JingWen 
> ; Maling list - DRI developers 
> ; amd-gfx list 
> ; Koenig, Christian 
> Subject: RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."
> 
> [AMD Official Use Only]
> 
> Hi Daniel
> 
> > > Why can't we stop the scheduler thread first, so that there's guaranteed 
> > > no race? I've recently had a lot of discussions with panfrost folks about 
> > > their reset that spawns across engines, and without stopping the 
> > > scheduler thread first before you touch anything it's just plain 
> > > impossible.
> 
> Yeah we had this though as well in our mind.
> 
> Our second approach is to call ktrhead_stop() in job_timedout() routine so 
> that  the "bad" job is guaranteed to be used without scheduler's touching or 
> freeing, Check this sample patch one as well please:
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> b/drivers/gpu/drm/scheduler/sched_main.c
> index a2a9536..50a49cb 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
> *work)
> sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
>  
> /* Protects against concurrent deletion in drm_sched_get_cleanup_job 
> */
> +   kthread_park(sched->thread);
> spin_lock(>job_list_lock);
> job = list_first_entry_or_null(>pending_list,
>struct drm_sched_job, list);
>  
> if (job) {
> -   /*
> -* Remove the bad job so it cannot be freed by concurrent
> -* drm_sched_cleanup_jobs. It will be reinserted back after 
> sched->thread
> -* is parked at which point it's safe.
> -*/
> -   list_del_init(>list);
> spin_unlock(>job_list_lock);
>  
> status = job->sched->ops->timedout_job(job);
> @@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct 
> *work)
> } else {
> spin_unlock(>job_list_lock);
> }
> +   kthread_unpark(sched->thread);
>  
> if (status != DRM_GPU_SCHED_STAT_ENODEV) {
> spin_lock(>job_list_lock); @@ -393,20 +389,6 @@ void 
> drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
> kthread_park(sched->thread);
>  
> /*
> -* Reinsert back the bad job here - now it's safe as
> -* drm_sched_get_cleanup_job cannot race against us and release the
> -* bad job at this point - we parked (waited for) any in progress
> -* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
> -* now until the scheduler thread is unparked.
> -*/
> -   if (bad && bad->sched == sched)
> -   /*
> -* Add at the head of the queue to reflect it was the earliest
> -* job extracted.
> -*/
> -   list_add(>list, >pending_list);
> -
> -   /*
>  * Iterate the job list from later to  earlier one and either deactive
>  * their HW callbacks or remove them from pending list if they already
>  * signaled.
> 
> 
> Thanks 
> 
> --
> Monk Liu | Cloud-GPU Core team
> --
> 
> -Original Message-
> From: Daniel Vetter 
> Sent: Thursday, August 19, 2021 5:31 PM
> To: Grodzovsky, Andrey 
> Cc: Daniel Vetter ; Alex Deucher ; 
> Chen, JingWen ; Maling list - DRI developers 
> ; amd-gfx list 
> ; Liu, Monk ; Koenig, 
> Christian 
> Subject: Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."
> 
> On Wed, Aug 18, 2021 at 10:51:00AM -0400, Andrey Grodzovsky wrote:
> > 
> > On 2021-08-18 10:42 a.m., Daniel 

Re: [Intel-gfx] [PATCH v3 3/4] PCI/VGA: only deal with VGA class devices

2023-06-08 Thread Bjorn Helgaas
Start with verb and capitalize to match ("Deal only with ...")

On Thu, Jun 08, 2023 at 07:43:21PM +0800, Sui Jingfeng wrote:
> From: Sui Jingfeng 
> 
> vgaarb only deal with the VGA devcie(pdev->class == 0x0300), so replace the
> pci_get_subsys() function with pci_get_class(). Filter the non pci display
> device out. There no need to process the non display PCI device.

s/non pci/non-PCI/
s/non display/non-display/

This is fine, and I'll merge this, but someday I would like to get rid
of the bus_register_notifier() and call vga_arbiter_add_pci_device()
and vga_arbiter_del_pci_device() directly from the PCI core.

Or if you wanted to do that now, that would be even better :)

Bjorn

> Signed-off-by: Sui Jingfeng 
> ---
>  drivers/pci/vgaarb.c | 22 --
>  1 file changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> index 7f0347f4f6fd..b0bf4952a95d 100644
> --- a/drivers/pci/vgaarb.c
> +++ b/drivers/pci/vgaarb.c
> @@ -756,10 +756,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev 
> *pdev)
>   struct pci_dev *bridge;
>   u16 cmd;
>  
> - /* Only deal with VGA class devices */
> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
> - return false;
> -
>   /* Allocate structure */
>   vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
>   if (vgadev == NULL) {
> @@ -1499,7 +1495,9 @@ static int pci_notify(struct notifier_block *nb, 
> unsigned long action,
>   struct pci_dev *pdev = to_pci_dev(dev);
>   bool notify = false;
>  
> - vgaarb_dbg(dev, "%s\n", __func__);
> + /* Only deal with VGA class devices */
> + if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
> + return 0;
>  
>   /* For now we're only intereted in devices added and removed. I didn't
>* test this thing here, so someone needs to double check for the
> @@ -1509,6 +1507,8 @@ static int pci_notify(struct notifier_block *nb, 
> unsigned long action,
>   else if (action == BUS_NOTIFY_DEL_DEVICE)
>   notify = vga_arbiter_del_pci_device(pdev);
>  
> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
> +
>   if (notify)
>   vga_arbiter_notify_clients();
>   return 0;
> @@ -1533,8 +1533,8 @@ static struct miscdevice vga_arb_device = {
>  
>  static int __init vga_arb_device_init(void)
>  {
> + struct pci_dev *pdev = NULL;
>   int rc;
> - struct pci_dev *pdev;
>  
>   rc = misc_register(_arb_device);
>   if (rc < 0)
> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>   /* We add all PCI devices satisfying VGA class in the arbiter by
>* default
>*/
> - pdev = NULL;
> - while ((pdev =
> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
> -PCI_ANY_ID, pdev)) != NULL)
> + while (1) {
> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
> + if (!pdev)
> + break;
> +
>   vga_arbiter_add_pci_device(pdev);
> + };
>  
>   pr_info("loaded\n");
>   return rc;
> -- 
> 2.25.1
> 


[PATCH] drm/amdgpu: Wrap -Wunused-but-set-variable in cc-option

2023-06-08 Thread Nathan Chancellor
-Wunused-but-set-variable was only supported in clang starting with
13.0.0, so earlier versions will emit a warning, which is turned into a
hard error for the kernel to mirror GCC:

  error: unknown warning option '-Wunused-but-set-variable'; did you mean 
'-Wunused-const-variable'? [-Werror,-Wunknown-warning-option]

The minimum supported version of clang for building the kernel is
11.0.0, so match the rest of the kernel and wrap
-Wunused-but-set-variable in a cc-option call, so that it is only used
when supported by the compiler.

Closes: https://github.com/ClangBuiltLinux/linux/issues/1869
Fixes: a0fd5a5f676c ("drm/amd/amdgpu: introduce DRM_AMDGPU_WERROR")
Signed-off-by: Nathan Chancellor 
---
 drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 7ee68b1bbfed..86b833085f19 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -40,7 +40,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/amdkfd
 
 subdir-ccflags-y := -Wextra
-subdir-ccflags-y += -Wunused-but-set-variable
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
 subdir-ccflags-y += -Wno-unused-parameter
 subdir-ccflags-y += -Wno-type-limits
 subdir-ccflags-y += -Wno-sign-compare

---
base-commit: 6bd4b01e8938779b0d959bdf33949a9aa258a363
change-id: 
20230608-amdgpu-wrap-wunused-but-set-variable-in-cc-option-0be9528ac5c8

Best regards,
-- 
Nathan Chancellor 



Re: [PATCH] drm/amdgpu: add the accelerator pcie class

2023-06-08 Thread Bjorn Helgaas
s/pcie/PCIe/ in the subject.

On Tue, May 23, 2023 at 12:02:32PM +0800, Shiwu Zhang wrote:
> v2: add the base class id for accelerator (lijo)

Please include a commit log.  For PCI, the "v2: ..." stuff would go
below the "---" so it doesn't get included in the git commit.  I don't
know what the DRM convention is.

It's OK if the commit log repeats the subject.  The subject is like
the title of a story -- it's not the first sentence of the story.

Please include a spec citation for the PCI_BASE_CLASS_ACCELERATOR
values in the commit log.  I think it's something like "PCI Code and
ID Assignment, r1.9, sec 1, 1.19".

> Signed-off-by: Shiwu Zhang 
> Acked-by: Lijo Lazar 

With the above:

Acked-by: Bjorn Helgaas   # pci_ids.h

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +
>  include/linux/pci_ids.h | 3 +++
>  2 files changed, 8 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 3d91e123f9bd..5d652e6f0b1e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2042,6 +2042,11 @@ static const struct pci_device_id pciidlist[] = {
> .class_mask = 0xff,
> .driver_data = CHIP_IP_DISCOVERY },
>  
> + { PCI_DEVICE(0x1002, PCI_ANY_ID),
> +   .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
> +   .class_mask = 0xff,
> +   .driver_data = CHIP_IP_DISCOVERY },
> +
>   {0, 0, 0}
>  };
>  
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index b362d90eb9b0..4918ff26a987 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -151,6 +151,9 @@
>  #define PCI_CLASS_SP_DPIO0x1100
>  #define PCI_CLASS_SP_OTHER   0x1180
>  
> +#define PCI_BASE_CLASS_ACCELERATOR   0x12
> +#define PCI_CLASS_ACCELERATOR_PROCESSING 0x1200
> +
>  #define PCI_CLASS_OTHERS 0xff
>  
>  /* Vendors and devices.  Sort key: vendor first, device next. */
> -- 
> 2.17.1
> 


Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit

2023-06-08 Thread Michel Dänzer
On 6/8/23 16:31, Pillai, Aurabindo wrote:
> 
> Thanks Michel,
> 
> I reached out to windows driver team, and they have a monitor specific quirk 
> to disable FAMS on this model. I suspect the issue is only present on certain 
> fw revisions on the monitor which is why we cant see your issue.
> 
> Unfortunately, having the patches in question reverted causes hangs with 3 
> monitor setups. So I will push that monitor specific quirk and bring back the 
> reverted patches.

Sounds good, thanks.


-- 
Earthling Michel Dänzer|  https://redhat.com
Libre software enthusiast  | Mesa and Xwayland developer



[PATCH v3 3/5] drm/amdkfd: set activated flag true when event age unmatchs

2023-06-08 Thread James Zhu
Set waiter's activated flag true when event age unmatchs with last_event_age.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index c7689181cc22..4c6907507190 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -41,6 +41,7 @@ struct kfd_event_waiter {
wait_queue_entry_t wait;
struct kfd_event *event; /* Event to wait for */
bool activated;  /* Becomes true when event is signaled */
+   bool event_age_enabled;  /* set to true when last_event_age is non-zero 
*/
 };
 
 /*
@@ -797,9 +798,9 @@ static struct kfd_event_waiter 
*alloc_event_waiters(uint32_t num_events)
 
 static int init_event_waiter(struct kfd_process *p,
struct kfd_event_waiter *waiter,
-   uint32_t event_id)
+   struct kfd_event_data *event_data)
 {
-   struct kfd_event *ev = lookup_event_by_id(p, event_id);
+   struct kfd_event *ev = lookup_event_by_id(p, event_data->event_id);
 
if (!ev)
return -EINVAL;
@@ -808,6 +809,13 @@ static int init_event_waiter(struct kfd_process *p,
waiter->event = ev;
waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset;
+
+   /* last_event_age = 0 reserved for backward compatible */
+   waiter->event_age_enabled = 
!!event_data->signal_event_data.last_event_age;
+   if (waiter->event_age_enabled &&
+   ev->event_age != event_data->signal_event_data.last_event_age)
+   WRITE_ONCE(waiter->activated, true);
+
if (!waiter->activated)
add_wait_queue(>wq, >wait);
spin_unlock(>lock);
@@ -948,8 +956,7 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out_unlock;
}
 
-   ret = init_event_waiter(p, _waiters[i],
-   event_data.event_id);
+   ret = init_event_waiter(p, _waiters[i], _data);
if (ret)
goto out_unlock;
}
-- 
2.34.1



[PATCH v3 5/5] drm/amdkfd: bump kfd ioctl minor version for event age availability

2023-06-08 Thread James Zhu
Bump the minor version to declare event age tracking feature is now
available.

Signed-off-by: James Zhu 
---
 include/uapi/linux/kfd_ioctl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 93f1c0bc5caf..eeb2fdcbdcb7 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -39,9 +39,10 @@
  * - 1.11 - Add unified memory for ctx save/restore area
  * - 1.12 - Add DMA buf export ioctl
  * - 1.13 - Add debugger API
+ * - 1.14 - Update kfd_event_data
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 13
+#define KFD_IOCTL_MINOR_VERSION 14
 
 struct kfd_ioctl_get_version_args {
__u32 major_version;/* from KFD */
-- 
2.34.1



[PATCH v3 4/5] drm/amdkfd: update user space last_event_age

2023-06-08 Thread James Zhu
Update user space last_event_age when event age is enabled.
It is only for KFD_EVENT_TYPE_SIGNAL which is checked by user space.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 4c6907507190..9fcfec57a094 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -863,22 +863,29 @@ static int copy_signaled_event_data(uint32_t num_events,
struct kfd_event_waiter *event_waiters,
struct kfd_event_data __user *data)
 {
-   struct kfd_hsa_memory_exception_data *src;
-   struct kfd_hsa_memory_exception_data __user *dst;
+   void *src;
+   void __user *dst;
struct kfd_event_waiter *waiter;
struct kfd_event *event;
-   uint32_t i;
+   uint32_t i, size = 0;
 
for (i = 0; i < num_events; i++) {
waiter = _waiters[i];
event = waiter->event;
if (!event)
return -EINVAL; /* event was destroyed */
-   if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
-   dst = [i].memory_exception_data;
-   src = >memory_exception_data;
-   if (copy_to_user(dst, src,
-   sizeof(struct kfd_hsa_memory_exception_data)))
+   if (waiter->activated) {
+   if (event->type == KFD_EVENT_TYPE_MEMORY) {
+   dst = [i].memory_exception_data;
+   src = >memory_exception_data;
+   size = sizeof(struct 
kfd_hsa_memory_exception_data);
+   } else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
+   waiter->event_age_enabled) {
+   dst = [i].signal_event_data.last_event_age;
+   src = >event_age;
+   size = sizeof(u64);
+   }
+   if (size && copy_to_user(dst, src, size))
return -EFAULT;
}
}
-- 
2.34.1



[PATCH v3 1/5] drm/amdkfd: add event age tracking

2023-06-08 Thread James Zhu
Add event age tracking

Signed-off-by: James Zhu 
---
 include/uapi/linux/kfd_ioctl.h | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 1781e7669982..93f1c0bc5caf 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -320,12 +320,20 @@ struct kfd_hsa_hw_exception_data {
__u32 gpu_id;
 };
 
+/* hsa signal event data */
+struct kfd_hsa_signal_event_data {
+   __u64 last_event_age;   /* to and from KFD */
+};
+
 /* Event data */
 struct kfd_event_data {
union {
+   /* From KFD */
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
-   };  /* From KFD */
+   /* To and From KFD */
+   struct kfd_hsa_signal_event_data signal_event_data;
+   };
__u64 kfd_event_data_ext;   /* pointer to an extension structure
   for future exception types */
__u32 event_id; /* to KFD */
-- 
2.34.1



[PATCH v3 2/5] drm/amdkfd: add event_age tracking when receiving interrupt

2023-06-08 Thread James Zhu
Add event_age tracking when receiving interrupt.

Signed-off-by: James Zhu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 7ff5c4e1b7e2..c7689181cc22 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -431,6 +431,7 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
if (!ret) {
*event_id = ev->event_id;
*event_trigger_data = ev->event_id;
+   ev->event_age = 1;
} else {
kfree(ev);
}
@@ -629,6 +630,11 @@ static void set_event(struct kfd_event *ev)
 * updating the wait queues in kfd_wait_on_events.
 */
ev->signaled = !ev->auto_reset || !waitqueue_active(>wq);
+   if (!(++ev->event_age)) {
+   /* Never wrap back to reserved/default event age 0/1 */
+   ev->event_age = 2;
+   WARN_ONCE(1, "event_age wrap back!");
+   }
 
list_for_each_entry(waiter, >wq.head, wait.entry)
WRITE_ONCE(waiter->activated, true);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 1c62c8dd6460..52ccfd397c2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -53,6 +53,7 @@ struct signal_page;
 
 struct kfd_event {
u32 event_id;
+   u64 event_age;
 
bool signaled;
bool auto_reset;
-- 
2.34.1



[PATCH v3 0/5] new event wait support

2023-06-08 Thread James Zhu
In kernel amdgpu driver, kfd_wait_on_events is used to support user space 
signal event wait
function. For multiple threads waiting on same event scenery, race condition 
could occur
since some threads after checking signal condition, before calling 
kfd_wait_on_events, the
event interrupt could be fired and wake up other thread which are sleeping on 
this event.
Then those threads could fall into sleep without waking up again. Adding event 
age tracking
in both kernel and user mode, will help avoiding this race condition.

The changes for The user space ROCT-Thunk-Interface/ROCR-Runtime are listed 
below for
review togehter with kernel mode changes.

ROCT-Thunk-Interface:
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/efdbf6cfbc026bd68ac3c35d00dacf84370eb81e
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/910108272091d1ce61dbc48bd9519731e0e9cf52

ROCR-Runtime:
https://github.com/RadeonOpenCompute/ROCR-Runtime/compare/master...zhums:ROCR-Runtime:new_event_wait_review
https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/e1f5bdb88eb882ac798aeca2c00ea3fbb2dba459
https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/7d26afd14107b5c2a754c1a3f415d89f3aabb503

-v2: remove unnecessay link

-v3: 1. update kfd test cases (910108272091d1ce61dbc48bd9519731e0e9cf52)
 2. move event age match checking into init_event_waiter
 3. move last event age update into copy_signaled_event_data

James Zhu (5):
  drm/amdkfd: add event age tracking
  drm/amdkfd: add event_age tracking when receiving interrupt
  drm/amdkfd: set activated flag true when event age unmatchs
  drm/amdkfd: update user space last_event_age
  drm/amdkfd: bump kfd ioctl minor version for event age availability

 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 44 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_events.h |  1 +
 include/uapi/linux/kfd_ioctl.h  | 13 ++--
 3 files changed, 44 insertions(+), 14 deletions(-)

-- 
2.34.1



Re: [PATCH] drm/amdgpu: Wrap -Wunused-but-set-variable in cc-option

2023-06-08 Thread Hamza Mahfooz

On 6/8/23 13:01, Nathan Chancellor wrote:

-Wunused-but-set-variable was only supported in clang starting with
13.0.0, so earlier versions will emit a warning, which is turned into a
hard error for the kernel to mirror GCC:

   error: unknown warning option '-Wunused-but-set-variable'; did you mean 
'-Wunused-const-variable'? [-Werror,-Wunknown-warning-option]

The minimum supported version of clang for building the kernel is
11.0.0, so match the rest of the kernel and wrap
-Wunused-but-set-variable in a cc-option call, so that it is only used
when supported by the compiler.

Closes: https://github.com/ClangBuiltLinux/linux/issues/1869
Fixes: a0fd5a5f676c ("drm/amd/amdgpu: introduce DRM_AMDGPU_WERROR")
Signed-off-by: Nathan Chancellor 


Applied, thanks!


---
  drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 7ee68b1bbfed..86b833085f19 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -40,7 +40,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/amdkfd
  
  subdir-ccflags-y := -Wextra

-subdir-ccflags-y += -Wunused-but-set-variable
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
  subdir-ccflags-y += -Wno-unused-parameter
  subdir-ccflags-y += -Wno-type-limits
  subdir-ccflags-y += -Wno-sign-compare

---
base-commit: 6bd4b01e8938779b0d959bdf33949a9aa258a363
change-id: 
20230608-amdgpu-wrap-wunused-but-set-variable-in-cc-option-0be9528ac5c8

Best regards,

--
Hamza



Re: [Intel-gfx] [PATCH v3 1/4] PCI/VGA: tidy up the code and comment format

2023-06-08 Thread Bjorn Helgaas
Capitalize subject to match ("Tidy ...")

On Thu, Jun 08, 2023 at 07:43:19PM +0800, Sui Jingfeng wrote:
> From: Sui Jingfeng 
> 
> This patch replaces the leading space with a tab and removes the double
> blank line, no functional change.

Can you move this to the end of the series?  The functional changes
are more likely to be backported, and I think the backport may be a
little easier without the cleanup in the middle.

>   /* we could in theory hand out locks on IO and mem
> -  * separately to userspace but it can cause deadlocks */
> +  * separately to userspace but it can cause deadlocks
> +  */

Since you're touching this anyway, can you update it to the
conventional multi-line comment style:

  /*
   * We could in theory ...
   */

And capitalize "We", add a period at end, and rewrap to fill 78
columns or so?  Same for other comments below.

> +++ b/include/linux/vgaarb.h
> @@ -23,9 +23,7 @@
>   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> - * DEALINGS
> - * IN THE SOFTWARE.
> - *
> + * DEALINGS IN THE SOFTWARE.
>   */

Can you make a separate patch to replace this entire copyright notice
with the appropriate SPDX-License-Identifier header?
Documentation/process/license-rules.rst has details.

Bjorn


Re: [Intel-gfx] [PATCH v3 4/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register

2023-06-08 Thread Bjorn Helgaas
On Thu, Jun 08, 2023 at 07:43:22PM +0800, Sui Jingfeng wrote:
> From: Sui Jingfeng 
> 
> The vga_is_firmware_default() function is arch-dependent, which doesn't
> sound right. At least, it also works on the Mips and LoongArch platforms.
> Tested with the drm/amdgpu and drm/radeon drivers. However, it's difficult
> to enumerate all arch-driver combinations. I'm wrong if there is only one
> exception.
> 
> With the observation that device drivers typically have better knowledge
> about which PCI bar contains the firmware framebuffer, which could avoid
> the need to iterate all of the PCI BARs.
> 
> But as a PCI function at pci/vgaarb.c, vga_is_firmware_default() is
> probably not suitable to make such an optimization for a specific device.
> 
> There are PCI display controllers that don't have a dedicated VRAM bar,
> this function will lose its effectiveness in such a case. Luckily, the
> device driver can provide an accurate workaround.
> 
> Therefore, this patch introduces a callback that allows the device driver
> to tell the VGAARB if the device is the default boot device. This patch
> only intends to introduce the mechanism, while the implementation is left
> to the device driver authors. Also honor the comment: "Clients have two
> callback mechanisms they can use"

s/bar/BAR/ (several)

Nothing here uses the callback.  I don't want to merge this until we
have a user.

I'm not sure why the device driver should know whether its device is
the default boot device.

> Signed-off-by: Sui Jingfeng 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
>  drivers/gpu/drm/i915/display/intel_vga.c   |  3 +--
>  drivers/gpu/drm/nouveau/nouveau_vga.c  |  2 +-
>  drivers/gpu/drm/radeon/radeon_device.c |  2 +-
>  drivers/pci/vgaarb.c   | 22 ++
>  drivers/vfio/pci/vfio_pci_core.c   |  2 +-
>  include/linux/vgaarb.h |  8 +---
>  7 files changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 5c7d40873ee2..7a096f2d5c16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3960,7 +3960,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   /* this will fail for cards that aren't VGA class devices, just
>* ignore it */
>   if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
> - vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
> + vga_client_register(adev->pdev, amdgpu_device_vga_set_decode, 
> NULL);
>  
>   px = amdgpu_device_supports_px(ddev);
>  
> diff --git a/drivers/gpu/drm/i915/display/intel_vga.c 
> b/drivers/gpu/drm/i915/display/intel_vga.c
> index 286a0bdd28c6..98d7d4dffe9f 100644
> --- a/drivers/gpu/drm/i915/display/intel_vga.c
> +++ b/drivers/gpu/drm/i915/display/intel_vga.c
> @@ -115,7 +115,6 @@ intel_vga_set_decode(struct pci_dev *pdev, bool 
> enable_decode)
>  
>  int intel_vga_register(struct drm_i915_private *i915)
>  {
> -
>   struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
>   int ret;
>  
> @@ -127,7 +126,7 @@ int intel_vga_register(struct drm_i915_private *i915)
>* then we do not take part in VGA arbitration and the
>* vga_client_register() fails with -ENODEV.
>*/
> - ret = vga_client_register(pdev, intel_vga_set_decode);
> + ret = vga_client_register(pdev, intel_vga_set_decode, NULL);
>   if (ret && ret != -ENODEV)
>   return ret;
>  
> diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c 
> b/drivers/gpu/drm/nouveau/nouveau_vga.c
> index f8bf0ec26844..162b4f4676c7 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_vga.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
> @@ -92,7 +92,7 @@ nouveau_vga_init(struct nouveau_drm *drm)
>   return;
>   pdev = to_pci_dev(dev->dev);
>  
> - vga_client_register(pdev, nouveau_vga_set_decode);
> + vga_client_register(pdev, nouveau_vga_set_decode, NULL);
>  
>   /* don't register Thunderbolt eGPU with vga_switcheroo */
>   if (pci_is_thunderbolt_attached(pdev))
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
> b/drivers/gpu/drm/radeon/radeon_device.c
> index afbb3a80c0c6..71f2ff39d6a1 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -1425,7 +1425,7 @@ int radeon_device_init(struct radeon_device *rdev,
>   /* if we have > 1 VGA cards, then disable the radeon VGA resources */
>   /* this will fail for cards that aren't VGA class devices, just
>* ignore it */
> - vga_client_register(rdev->pdev, radeon_vga_set_decode);
> + vga_client_register(rdev->pdev, radeon_vga_set_decode, NULL);
>  
>   if (rdev->flags & RADEON_IS_PX)
>   runtime = true;
> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> index b0bf4952a95d..d3dab61e0ef2 100644
> --- a/drivers/pci/vgaarb.c
> 

[PATCH] Align eccinfo table structure with smu v13_0_0 interface

2023-06-08 Thread Candice Li
Update eccinfo table structure according to smu v13_0_0 interface.

Signed-off-by: Candice Li 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/umc_v8_10.h   | 3 +++
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h 
b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
index c6dfd433fec7bc..2cdaf746e8cd4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -31,6 +31,9 @@
 /* number of umc instance with memory map register access */
 #define UMC_V8_10_UMC_INSTANCE_NUM 2
 
+/* Max number of umc channel instances */
+#define UMC_V8_10_MAX_CHANNEL_NUM  24
+
 /* Total channel instances for all available umc nodes */
 #define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \
(UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * 
(adev)->gmc.num_umc)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 413e592f0ed611..90ea15496879c4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2580,7 +2580,7 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct 
smu_context *smu,
 
ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
 
-   for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) {
+   for (i = 0; i < UMC_V8_10_MAX_CHANNEL_NUM; i++) {
ecc_info_per_channel = &(eccinfo->ecc[i]);
ecc_info_per_channel->ce_count_lo_chip =
ecc_table->EccInfo[i].ce_count_lo_chip;
-- 
2.25.1



Re: [Intel-gfx] [PATCH v3 1/4] PCI/VGA: tidy up the code and comment format

2023-06-08 Thread Sui Jingfeng

Hi,

On 2023/6/9 03:07, Bjorn Helgaas wrote:

Capitalize subject to match ("Tidy ...")

On Thu, Jun 08, 2023 at 07:43:19PM +0800, Sui Jingfeng wrote:

From: Sui Jingfeng 

This patch replaces the leading space with a tab and removes the double
blank line, no functional change.

Can you move this to the end of the series?  The functional changes
are more likely to be backported, and I think the backport may be a
little easier without the cleanup in the middle.


OK, acceptable.


/* we could in theory hand out locks on IO and mem
-* separately to userspace but it can cause deadlocks */
+* separately to userspace but it can cause deadlocks
+*/

Since you're touching this anyway, can you update it to the
conventional multi-line comment style:

   /*
* We could in theory ...
*/

And capitalize "We", add a period at end, and rewrap to fill 78
columns or so?  Same for other comments below.

OK, I could improve this at next version.

+++ b/include/linux/vgaarb.h
@@ -23,9 +23,7 @@
   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS
- * IN THE SOFTWARE.
- *
+ * DEALINGS IN THE SOFTWARE.
   */

Can you make a separate patch to replace this entire copyright notice
with the appropriate SPDX-License-Identifier header?
Documentation/process/license-rules.rst has details.


Wow ...


Bjorn


--
Jingfeng



[PATCH] drm/amd/pm: workaround for compute workload type on some skus

2023-06-08 Thread Kenneth Feng
On smu 13.0.0, the compute workload type cannot be set on all the skus
due to some other problems. This workaround is to make sure compute workload 
type
can also run on some specific skus.

Signed-off-by: Kenneth Feng 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 26 +++
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index e2265f50bacc..6e8acd021ee6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2179,6 +2179,32 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
smu_context *smu,
}
}
 
+   if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
+   (((smu->adev->pdev->device == 0x744C) && 
(smu->adev->pdev->revision == 0xC8)) ||
+   ((smu->adev->pdev->device == 0x744C) && 
(smu->adev->pdev->revision == 0xCC {
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+  WORKLOAD_PPLIB_COMPUTE_BIT,
+  (void *)(_monitor_external),
+  false);
+   if (ret) {
+   dev_err(smu->adev->dev, "[%s] Failed to get activity 
monitor!", __func__);
+   return ret;
+   }
+
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+  WORKLOAD_PPLIB_CUSTOM_BIT,
+  (void *)(_monitor_external),
+  true);
+   if (ret) {
+   dev_err(smu->adev->dev, "[%s] Failed to set activity 
monitor!", __func__);
+   return ret;
+   }
+
+   smu->power_profile_mode = PP_SMC_POWER_PROFILE_CUSTOM;
+   }
+
/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
workload_type = smu_cmn_to_asic_specific_index(smu,
   
CMN2ASIC_MAPPING_WORKLOAD,
-- 
2.34.1



Re: [PATCH] drm/amdgpu: Raname DRM schedulers in amdgpu TTM

2023-06-08 Thread Christian König

Am 07.06.23 um 17:26 schrieb Mukul Joshi:

Rename mman.entity to mman.high_pr to make the distinction
clearer that this is a high priority scheduler. Similarly,
rename the recently added mman.delayed to mman.low_pr to
make it clear it is a low priority scheduler.
No functional change in this patch.

Signed-off-by: Mukul Joshi 


Reviewed-by: Christian König 

Thanks,
Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 18 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  8 
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  2 +-
  4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8884c043cf76..8a4ed69a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -228,7 +228,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object 
*bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
  
-	r = amdgpu_job_alloc_with_ib(adev, >mman.entity,

+   r = amdgpu_job_alloc_with_ib(adev, >mman.high_pr,
 AMDGPU_FENCE_OWNER_UNDEFINED,
 num_dw * 4 + num_bytes,
 AMDGPU_IB_POOL_DELAYED, );
@@ -1460,7 +1460,7 @@ static int amdgpu_ttm_access_memory_sdma(struct 
ttm_buffer_object *bo,
memcpy(adev->mman.sdma_access_ptr, buf, len);
  
  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);

-   r = amdgpu_job_alloc_with_ib(adev, >mman.entity,
+   r = amdgpu_job_alloc_with_ib(adev, >mman.high_pr,
 AMDGPU_FENCE_OWNER_UNDEFINED,
 num_dw * 4, AMDGPU_IB_POOL_DELAYED,
 );
@@ -2036,7 +2036,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
  
  		ring = adev->mman.buffer_funcs_ring;

sched = >sched;
-   r = drm_sched_entity_init(>mman.entity,
+   r = drm_sched_entity_init(>mman.high_pr,
  DRM_SCHED_PRIORITY_KERNEL, ,
  1, NULL);
if (r) {
@@ -2045,7 +2045,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
return;
}
  
-		r = drm_sched_entity_init(>mman.delayed,

+   r = drm_sched_entity_init(>mman.low_pr,
  DRM_SCHED_PRIORITY_NORMAL, ,
  1, NULL);
if (r) {
@@ -2054,8 +2054,8 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
goto error_free_entity;
}
} else {
-   drm_sched_entity_destroy(>mman.entity);
-   drm_sched_entity_destroy(>mman.delayed);
+   drm_sched_entity_destroy(>mman.high_pr);
+   drm_sched_entity_destroy(>mman.low_pr);
dma_fence_put(man->move);
man->move = NULL;
}
@@ -2071,7 +2071,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
return;
  
  error_free_entity:

-   drm_sched_entity_destroy(>mman.entity);
+   drm_sched_entity_destroy(>mman.high_pr);
  }
  
  static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,

@@ -2086,8 +2086,8 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device 
*adev,
AMDGPU_IB_POOL_DIRECT :
AMDGPU_IB_POOL_DELAYED;
int r;
-   struct drm_sched_entity *entity = delayed ? >mman.delayed :
-   >mman.entity;
+   struct drm_sched_entity *entity = delayed ? >mman.low_pr :
+   >mman.high_pr;
r = amdgpu_job_alloc_with_ib(adev, entity,
 AMDGPU_FENCE_OWNER_UNDEFINED,
 num_dw * 4, pool, job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e82b1edee7a4..6d0d66e40db9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -59,10 +59,10 @@ struct amdgpu_mman {
boolbuffer_funcs_enabled;
  
  	struct mutexgtt_window_lock;

-   /* Scheduler entity for buffer moves */
-   struct drm_sched_entity entity;
-   /* Scheduler entity for VRAM clearing */
-   struct drm_sched_entity delayed;
+   /* High priority scheduler entity for buffer moves */
+   struct drm_sched_entity high_pr;
+   /* Low priority scheduler entity for VRAM clearing */
+   

[PATCH] drm/amdgpu: Print client id for the unregistered interrupt resource

2023-06-08 Thread Ma Jun
Modify the debug information and print the clien id for these
interrupts as well.

Signed-off-by: Ma Jun 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index b0808c1be013..e6edc67ef010 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -486,7 +486,8 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
handled = true;
 
} else {
-   DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
+   DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n",
+   src_id, client_id);
}
 
/* Send it to amdkfd as well if it isn't already handled */
-- 
2.34.1



Re: [Intel-gfx] [PATCH v3 3/4] PCI/VGA: only deal with VGA class devices

2023-06-08 Thread Sui Jingfeng

Hi,

On 2023/6/9 03:12, Bjorn Helgaas wrote:

Start with verb and capitalize to match ("Deal only with ...")

On Thu, Jun 08, 2023 at 07:43:21PM +0800, Sui Jingfeng wrote:

From: Sui Jingfeng 

vgaarb only deal with the VGA devcie(pdev->class == 0x0300), so replace the
pci_get_subsys() function with pci_get_class(). Filter the non pci display
device out. There no need to process the non display PCI device.

s/non pci/non-PCI/
s/non display/non-display/


Will be fixed at next version, thanks for the strict checking,

I will try to bring this rigorous to the other patch of myself in the 
future.



This is fine, and I'll merge this, but someday I would like to get rid
of the bus_register_notifier() and call vga_arbiter_add_pci_device()
and vga_arbiter_del_pci_device() directly from the PCI core.

Or if you wanted to do that now, that would be even better :)


I think, I probably should only do what I'm good at.


The "good at" here means that It's under prepared,

already matured with thinking(or consideration) by myself.

And also including testing(on two or three architecture).


That idea is belong to you, I would like to see how does it going to 
happen only.



Bjorn


Signed-off-by: Sui Jingfeng 
---
  drivers/pci/vgaarb.c | 22 --
  1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index 7f0347f4f6fd..b0bf4952a95d 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -756,10 +756,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev 
*pdev)
struct pci_dev *bridge;
u16 cmd;
  
-	/* Only deal with VGA class devices */

-   if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
-   return false;
-
/* Allocate structure */
vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
if (vgadev == NULL) {
@@ -1499,7 +1495,9 @@ static int pci_notify(struct notifier_block *nb, unsigned 
long action,
struct pci_dev *pdev = to_pci_dev(dev);
bool notify = false;
  
-	vgaarb_dbg(dev, "%s\n", __func__);

+   /* Only deal with VGA class devices */
+   if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+   return 0;
  
  	/* For now we're only intereted in devices added and removed. I didn't

 * test this thing here, so someone needs to double check for the
@@ -1509,6 +1507,8 @@ static int pci_notify(struct notifier_block *nb, unsigned 
long action,
else if (action == BUS_NOTIFY_DEL_DEVICE)
notify = vga_arbiter_del_pci_device(pdev);
  
+	vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);

+
if (notify)
vga_arbiter_notify_clients();
return 0;
@@ -1533,8 +1533,8 @@ static struct miscdevice vga_arb_device = {
  
  static int __init vga_arb_device_init(void)

  {
+   struct pci_dev *pdev = NULL;
int rc;
-   struct pci_dev *pdev;
  
  	rc = misc_register(_arb_device);

if (rc < 0)
@@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
/* We add all PCI devices satisfying VGA class in the arbiter by
 * default
 */
-   pdev = NULL;
-   while ((pdev =
-   pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
-  PCI_ANY_ID, pdev)) != NULL)
+   while (1) {
+   pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
+   if (!pdev)
+   break;
+
vga_arbiter_add_pci_device(pdev);
+   };
  
  	pr_info("loaded\n");

return rc;
--
2.25.1


--
Jingfeng



[PATCH v2] drm/amdkfd: To enable traps for GC_11_0_4 and up

2023-06-08 Thread Ji, Ruili
From: Ruili Ji 

Flag trap_en should be enabled for trap handler.

Signed-off-by: Ruili Ji 
Signed-off-by: Aaron Liu 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d6b15493fffd..8a39a9e0ed5a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -227,7 +227,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, 
struct queue *q,
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) 
||
- KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0);
+ KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3);
queue_input.skip_process_ctx_clear = 
qpd->pqm->process->debug_trap_enabled;
 
queue_type = convert_to_mes_queue_type(q->properties.type);
@@ -1807,7 +1807,7 @@ static int create_queue_cpsch(struct device_queue_manager 
*dqm, struct queue *q,
q->properties.is_evicted = !!qpd->evicted;
q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) &&
-   KFD_GC_VERSION(q->device) < IP_VERSION(12, 0, 0);
+   KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
 
if (qd)
mqd_mgr->restore_mqd(mqd_mgr, >mqd, q->mqd_mem_obj, 
>gart_mqd_addr,
-- 
2.40.1



Re: [Intel-gfx] [PATCH v3 4/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register

2023-06-08 Thread Sui Jingfeng

Hi,

On 2023/6/9 03:19, Bjorn Helgaas wrote:

On Thu, Jun 08, 2023 at 07:43:22PM +0800, Sui Jingfeng wrote:

From: Sui Jingfeng 

The vga_is_firmware_default() function is arch-dependent, which doesn't
sound right. At least, it also works on the Mips and LoongArch platforms.
Tested with the drm/amdgpu and drm/radeon drivers. However, it's difficult
to enumerate all arch-driver combinations. I'm wrong if there is only one
exception.

With the observation that device drivers typically have better knowledge
about which PCI bar contains the firmware framebuffer, which could avoid
the need to iterate all of the PCI BARs.

But as a PCI function at pci/vgaarb.c, vga_is_firmware_default() is
probably not suitable to make such an optimization for a specific device.

There are PCI display controllers that don't have a dedicated VRAM bar,
this function will lose its effectiveness in such a case. Luckily, the
device driver can provide an accurate workaround.

Therefore, this patch introduces a callback that allows the device driver
to tell the VGAARB if the device is the default boot device. This patch
only intends to introduce the mechanism, while the implementation is left
to the device driver authors. Also honor the comment: "Clients have two
callback mechanisms they can use"

s/bar/BAR/ (several)

Nothing here uses the callback.  I don't want to merge this until we
have a user.


This is chicken and egg question.

If you could help get this merge first, I will show you the first user.


I'm not sure why the device driver should know whether its device is
the default boot device.


It's not that the device driver should know,

but it's about that the device driver has the right to override.

Device driver may have better approach to identify the default boot device.


Signed-off-by: Sui Jingfeng 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
  drivers/gpu/drm/i915/display/intel_vga.c   |  3 +--
  drivers/gpu/drm/nouveau/nouveau_vga.c  |  2 +-
  drivers/gpu/drm/radeon/radeon_device.c |  2 +-
  drivers/pci/vgaarb.c   | 22 ++
  drivers/vfio/pci/vfio_pci_core.c   |  2 +-
  include/linux/vgaarb.h |  8 +---
  7 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5c7d40873ee2..7a096f2d5c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3960,7 +3960,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* this will fail for cards that aren't VGA class devices, just
 * ignore it */
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
-   vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
+   vga_client_register(adev->pdev, amdgpu_device_vga_set_decode, 
NULL);
  
  	px = amdgpu_device_supports_px(ddev);
  
diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c

index 286a0bdd28c6..98d7d4dffe9f 100644
--- a/drivers/gpu/drm/i915/display/intel_vga.c
+++ b/drivers/gpu/drm/i915/display/intel_vga.c
@@ -115,7 +115,6 @@ intel_vga_set_decode(struct pci_dev *pdev, bool 
enable_decode)
  
  int intel_vga_register(struct drm_i915_private *i915)

  {
-
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
int ret;
  
@@ -127,7 +126,7 @@ int intel_vga_register(struct drm_i915_private *i915)

 * then we do not take part in VGA arbitration and the
 * vga_client_register() fails with -ENODEV.
 */
-   ret = vga_client_register(pdev, intel_vga_set_decode);
+   ret = vga_client_register(pdev, intel_vga_set_decode, NULL);
if (ret && ret != -ENODEV)
return ret;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c

index f8bf0ec26844..162b4f4676c7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -92,7 +92,7 @@ nouveau_vga_init(struct nouveau_drm *drm)
return;
pdev = to_pci_dev(dev->dev);
  
-	vga_client_register(pdev, nouveau_vga_set_decode);

+   vga_client_register(pdev, nouveau_vga_set_decode, NULL);
  
  	/* don't register Thunderbolt eGPU with vga_switcheroo */

if (pci_is_thunderbolt_attached(pdev))
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index afbb3a80c0c6..71f2ff39d6a1 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1425,7 +1425,7 @@ int radeon_device_init(struct radeon_device *rdev,
/* if we have > 1 VGA cards, then disable the radeon VGA resources */
/* this will fail for cards that aren't VGA class devices, just
 * ignore it */
-   vga_client_register(rdev->pdev, radeon_vga_set_decode);
+   vga_client_register(rdev->pdev, radeon_vga_set_decode, 

RE: [PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new message

2023-06-08 Thread Lin, Wayne
[Public]

> -Original Message-
> From: Jani Nikula 
> Sent: Thursday, June 8, 2023 8:26 PM
> To: Lin, Wayne ; dri-de...@lists.freedesktop.org;
> amd-gfx@lists.freedesktop.org
> Cc: ly...@redhat.com; ville.syrj...@linux.intel.com; imre.d...@intel.com;
> Wentland, Harry ; Zuo, Jerry
> ; Lin, Wayne ;
> sta...@vger.kernel.org
> Subject: Re: [PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new
> message
>
> On Thu, 08 Jun 2023, Wayne Lin  wrote:
> > [Why]
> > The sequence for collecting down_reply from source perspective should
> > be:
> >
> > Request_n->repeat (get partial reply of Request_n->clear message ready
> > flag to ack DPRX that the message is received) till all partial
> > replies for Request_n are received->new Request_n+1.
> >
> > Now there is chance that drm_dp_mst_hpd_irq() will fire new down
> > request in the tx queue when the down reply is incomplete. Source is
> > restricted to generate interveleaved message transactions so we should
> > avoid it.
> >
> > Also, while assembling partial reply packets, reading out DPCD
> > DOWN_REP Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag
> should
> > be wrapped up as a complete operation for reading out a reply packet.
> > Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might
> > be risky. e.g. If the reply of the new request has overwritten the
> > DPRX DOWN_REP Sideband MSG buffer before source writing one to clear
> > DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply
> > for the new request. Should handle the up request in the same way.
> >
> > [How]
> > Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ
> > event, driver calls drm_dp_mst_hpd_irq_send_new_request() and might
> > trigger drm_dp_mst_kick_tx() only when there is no on going message
> > transaction.
> >
> > Changes since v1:
> > * Reworked on review comments received
> > -> Adjust the fix to let driver explicitly kick off new down request
> > when mst irq event is handled and acked
> > -> Adjust the commit message
> >
> > Changes since v2:
> > * Adjust the commit message
> > * Adjust the naming of the divided 2 functions and add a new input
> >   parameter "ack".
> > * Adjust code flow as per review comments.
> >
> > Changes since v3:
> > * Update the function description of drm_dp_mst_hpd_irq_handle_event
> >
> > Signed-off-by: Wayne Lin 
> > Cc: sta...@vger.kernel.org
> > ---
> >  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ++--
> > drivers/gpu/drm/display/drm_dp_mst_topology.c | 54
> ---
> >  drivers/gpu/drm/i915/display/intel_dp.c   |  7 +--
> >  drivers/gpu/drm/nouveau/dispnv50/disp.c   | 12 +++--
> >  include/drm/display/drm_dp_mst_helper.h   |  7 ++-
> >  5 files changed, 82 insertions(+), 31 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index d5cec03eaa8d..597c3368bcfb 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -3236,6 +3236,7 @@ static void dm_handle_mst_sideband_msg(struct
> > amdgpu_dm_connector *aconnector)  {
> > u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
> > u8 dret;
> > +   u8 ack;
> > bool new_irq_handled = false;
> > int dpcd_addr;
> > int dpcd_bytes_to_read;
> > @@ -3265,34 +3266,36 @@ static void
> dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
> > process_count < max_process_count) {
> > u8 retry;
> > dret = 0;
> > +   ack = 0;
> >
> > process_count++;
> >
> > DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0],
> esi[1], esi[2]);
> > /* handle HPD short pulse irq */
> > if (aconnector->mst_mgr.mst_state)
> > -   drm_dp_mst_hpd_irq(
> > -   >mst_mgr,
> > -   esi,
> > -   _irq_handled);
> > +   drm_dp_mst_hpd_irq_handle_event(
> >mst_mgr,
> > +   esi,
> > +   ,
> > +   _irq_handled);
> >
> > if (new_irq_handled) {
> > /* ACK at DPCD to notify down stream */
> > -   const int ack_dpcd_bytes_to_write =
> > -   dpcd_bytes_to_read - 1;
> > -
> > for (retry = 0; retry < 3; retry++) {
> > -   u8 wret;
> > -
> > -   wret = drm_dp_dpcd_write(
> > -   >dm_dp_aux.aux,
> > -   dpcd_addr + 1,
> > -   [1],
> > -   ack_dpcd_bytes_to_write);
> > -   if (wret == ack_dpcd_bytes_to_write)
> > +

Re: [PATCH] drm/amd/pm: workaround for compute workload type on some skus

2023-06-08 Thread Lazar, Lijo




On 6/9/2023 8:25 AM, Kenneth Feng wrote:

On smu 13.0.0, the compute workload type cannot be set on all the skus
due to some other problems. This workaround is to make sure compute workload 
type
can also run on some specific skus.

Signed-off-by: Kenneth Feng 
---
  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 26 +++
  1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index e2265f50bacc..6e8acd021ee6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2179,6 +2179,32 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
smu_context *smu,
}
}
  
+	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&

+   (((smu->adev->pdev->device == 0x744C) && 
(smu->adev->pdev->revision == 0xC8)) ||
+   ((smu->adev->pdev->device == 0x744C) && 
(smu->adev->pdev->revision == 0xCC {
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+  WORKLOAD_PPLIB_COMPUTE_BIT,
+  (void *)(_monitor_external),
+  false);
+   if (ret) {
+   dev_err(smu->adev->dev, "[%s] Failed to get activity 
monitor!", __func__);
+   return ret;
+   }
+
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+  WORKLOAD_PPLIB_CUSTOM_BIT,
+  (void *)(_monitor_external),
+  true);
+   if (ret) {
+   dev_err(smu->adev->dev, "[%s] Failed to set activity 
monitor!", __func__);
+   return ret;
+   }
+
+   smu->power_profile_mode = PP_SMC_POWER_PROFILE_CUSTOM;


Though custom profile is used to set compute profile as a workaround, 
shouldn't this be set to COMPUTE to indicate that the current profile 
set is COMPUTE?


Thanks,
Lijo

+   }
+
/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
workload_type = smu_cmn_to_asic_specific_index(smu,
   
CMN2ASIC_MAPPING_WORKLOAD,


Re: [PATCH v3] drm/amd/pm: enable more Pstates profile levels for yellow_carp

2023-06-08 Thread Huang, Tim
[Public]

Hi Shikai


From: Guo, Shikai 
Sent: Thursday, June 8, 2023 6:27 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Liang, Prike ; Liu, Aaron ; Huang, 
Tim ; Deucher, Alexander ; Guo, 
Shikai 
Subject: [PATCH v3] drm/amd/pm: enable more Pstates profile levels for 
yellow_carp

This patch enables following UMD stable Pstates profile levels for 
power_dpm_force_performance_level interface.

- profile_peak
- profile_min_mclk
- profile_min_sclk
- profile_standard

Signed-off-by: shikaguo 
---
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 140 +-
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h  |   1 -
 2 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index a92da336ecec..71566c60372f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -47,6 +47,14 @@
 #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK 0x0006L
 #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT  0x1L

+#define SMU_13_0_8_UMD_PSTATE_GFXCLK   533
+#define SMU_13_0_8_UMD_PSTATE_SOCCLK   533
+#define SMU_13_0_8_UMD_PSTATE_FCLK 800
+
+#define SMU_13_0_1_UMD_PSTATE_GFXCLK  700
+#define SMU_13_0_1_UMD_PSTATE_SOCCLK 678
+#define SMU_13_0_1_UMD_PSTATE_FCLK   1800
+
 #define FEATURE_MASK(feature) (1ULL << feature)
 #define SMC_DPM_FEATURE ( \
 FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \
@@ -957,6 +965,9 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
 uint32_t max)
 {
 enum smu_message_type msg_set_min, msg_set_max;
+   uint32_t min_clk = min;
+   uint32_t max_clk = max;
+
 int ret = 0;

 if (!yellow_carp_clk_dpm_is_enabled(smu, clk_type))
@@ -985,11 +996,17 @@ static int yellow_carp_set_soft_freq_limited_range(struct 
smu_context *smu,
 return -EINVAL;
 }

-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL);
+   if (clk_type == SMU_VCLK) {
+   min_clk = min << SMU_13_VCLK_SHIFT;
+   max_clk = max << SMU_13_VCLK_SHIFT;
+   }
+
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL);
+
 if (ret)
 goto out;

-   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL);
+   ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max_clk, NULL);
 if (ret)
 goto out;

@@ -997,12 +1014,48 @@ static int 
yellow_carp_set_soft_freq_limited_range(struct smu_context *smu,
 return ret;
 }

+static int yellow_carp_get_umd_pstate_clk_default(struct smu_context *smu,

It should return uint32_t for the default clk. With this fixed, this patch is

Reviewed-by: Tim Huang 


+   enum smu_clk_type clk_type)
+{
+   uint32_t clk_limit = 0;
+   struct amdgpu_device *adev = smu->adev;
+   switch (clk_type) {
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8))
+   clk_limit = SMU_13_0_8_UMD_PSTATE_GFXCLK;
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) ||
+   (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 
3))
+   clk_limit = SMU_13_0_1_UMD_PSTATE_GFXCLK;
+   break;
+   case SMU_SOCCLK:
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8))
+   clk_limit = SMU_13_0_8_UMD_PSTATE_SOCCLK;
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) ||
+   (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 
3))
+   clk_limit = SMU_13_0_1_UMD_PSTATE_SOCCLK;
+   break;
+   case SMU_FCLK:
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8))
+   clk_limit = SMU_13_0_8_UMD_PSTATE_FCLK;
+   if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) ||
+   (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 
3))
+   clk_limit = SMU_13_0_1_UMD_PSTATE_FCLK;
+   break;
+   default:
+   break;
+   }
+
+   return clk_limit;
+}
+
 static int yellow_carp_print_clk_levels(struct smu_context *smu,
 enum smu_clk_type clk_type, char *buf)
 {
 int i, idx, size = 0, ret = 0;
 uint32_t cur_value = 0, value = 0, count = 0;
 uint32_t min, max;
+   uint32_t clk_limit = 0;

 smu_cmn_get_sysfs_buf(, );

@@ -1044,6 +1097,7 @@ static int 

RE: [PATCH] drm/amd/pm: workaround for compute workload type on some skus

2023-06-08 Thread Xu, Feifei
[AMD Official Use Only - General]

Reviewed-by: Feifei Xu 

-Original Message-
From: amd-gfx  On Behalf Of Kenneth Feng
Sent: Friday, June 9, 2023 10:56 AM
To: amd-gfx@lists.freedesktop.org
Cc: Quan, Evan ; Feng, Kenneth 
Subject: [PATCH] drm/amd/pm: workaround for compute workload type on some skus

On smu 13.0.0, the compute workload type cannot be set on all the skus due to 
some other problems. This workaround is to make sure compute workload type can 
also run on some specific skus.

Signed-off-by: Kenneth Feng 
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 26 +++
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index e2265f50bacc..6e8acd021ee6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2179,6 +2179,32 @@ static int smu_v13_0_0_set_power_profile_mode(struct 
smu_context *smu,
}
}

+   if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
+   (((smu->adev->pdev->device == 0x744C) && 
(smu->adev->pdev->revision == 0xC8)) ||
+   ((smu->adev->pdev->device == 0x744C) && 
(smu->adev->pdev->revision == 0xCC {
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+  WORKLOAD_PPLIB_COMPUTE_BIT,
+  (void *)(_monitor_external),
+  false);
+   if (ret) {
+   dev_err(smu->adev->dev, "[%s] Failed to get activity 
monitor!", __func__);
+   return ret;
+   }
+
+   ret = smu_cmn_update_table(smu,
+  SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+  WORKLOAD_PPLIB_CUSTOM_BIT,
+  (void *)(_monitor_external),
+  true);
+   if (ret) {
+   dev_err(smu->adev->dev, "[%s] Failed to set activity 
monitor!", __func__);
+   return ret;
+   }
+
+   smu->power_profile_mode = PP_SMC_POWER_PROFILE_CUSTOM;
+   }
+
/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
workload_type = smu_cmn_to_asic_specific_index(smu,
   
CMN2ASIC_MAPPING_WORKLOAD,
--
2.34.1