[PATCH] drm/amdgpu: expose num_hops and num_links xgmi info through dev attr
Add these two dev attrs for xgmi info details which is helpful for developers checking the xgmi topology by catting the sys file directly. Take 4 cards with xgmi connection as an example, get the num_hops for each device or node through xmig_hive_info dir like, cat /sys/bus/pci/devices/:41:00.0/xgmi_hive_info/node1/num_hops will return "00 41 41 41" where "00" stands for the hops to node1 itself and "41" is the hops in hex format to every other node in the same hive. There are node1/node2/node3/node4 representing 4 cards in the hive. The same for num_links dev attr. Signed-off-by: Shiwu Zhang Acked-by: Le Ma Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 46 1 file changed, 46 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 44822dd98e5e..9c0fc23ed5b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -331,6 +331,36 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, } +static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev, +struct device_attribute *attr, +char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = >psp.xgmi_context.top_info; + int i; + for (i=0; i < top->num_nodes; i++) { + sprintf(buf + 3*i, "%02x ", top->nodes[i].num_hops); + } + + return sysfs_emit(buf, "%s\n", buf); +} + +static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, +struct device_attribute *attr, +char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = >psp.xgmi_context.top_info; + int i; + for (i=0; i < top->num_nodes; i++) { + sprintf(buf + 3*i, "%02x ", top->nodes[i].num_links); + } + + return sysfs_emit(buf, "%s\n", buf); +} + #define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) static ssize_t amdgpu_xgmi_show_error(struct device *dev, struct device_attribute *attr, @@ -367,6 +397,8 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); +static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL); +static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -386,6 +418,15 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, if (ret) pr_err("failed to create xgmi_error\n"); + /* Create xgmi num hops file */ + ret = device_create_file(adev->dev, _attr_xgmi_num_hops); + if (ret) + pr_err("failed to create xgmi_num_hops\n"); + + /* Create xgmi num links file */ + ret = device_create_file(adev->dev, _attr_xgmi_num_links); + if (ret) + pr_err("failed to create xgmi_num_links\n"); /* Create sysfs link to hive info folder on the first device */ if (hive->kobj.parent != (>dev->kobj)) { @@ -413,6 +454,9 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, remove_file: device_remove_file(adev->dev, _attr_xgmi_device_id); + device_remove_file(adev->dev, _attr_xgmi_error); + device_remove_file(adev->dev, _attr_xgmi_num_hops); + device_remove_file(adev->dev, _attr_xgmi_num_links); success: return ret; @@ -426,6 +470,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, device_remove_file(adev->dev, _attr_xgmi_device_id); device_remove_file(adev->dev, _attr_xgmi_error); + device_remove_file(adev->dev, _attr_xgmi_num_hops); + device_remove_file(adev->dev, _attr_xgmi_num_links); if (hive->kobj.parent != (>dev->kobj)) sysfs_remove_link(>dev->kobj,"xgmi_hive_info"); -- 2.17.1
Re: [PATCH v2] drm/amd/pm: enable more Pstates profile levels for yellow_carp
[Public] Hi Shikai, From: Guo, Shikai Sent: Thursday, June 8, 2023 11:27 AM To: amd-gfx@lists.freedesktop.org Cc: Liang, Prike ; Liu, Aaron ; Huang, Tim ; Deucher, Alexander ; Guo, Shikai Subject: [PATCH v2] drm/amd/pm: enable more Pstates profile levels for yellow_carp This patch enables following UMD stable Pstates profile levels for power_dpm_force_performance_level interface. - profile_peak - profile_min_mclk - profile_min_sclk - profile_standard Signed-off-by: shikaguo --- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 94 ++- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h | 5 +- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index a92da336ecec..5c968ab2ea8d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -957,6 +957,9 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, uint32_t max) { enum smu_message_type msg_set_min, msg_set_max; + uint32_t min_clk = min; + uint32_t max_clk = max; + int ret = 0; if (!yellow_carp_clk_dpm_is_enabled(smu, clk_type)) @@ -985,11 +988,17 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, return -EINVAL; } - ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL); + if (clk_type == SMU_VCLK) { + min_clk = min << SMU_13_VCLK_SHIFT; + max_clk = max << SMU_13_VCLK_SHIFT; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL); + if (ret) goto out; - ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max_clk, NULL); if (ret) goto out; @@ -1107,6 +1116,50 @@ static int yellow_carp_force_clk_levels(struct smu_context *smu, return ret; } +static int yellow_carp_get_dpm_profile_freq(struct smu_context *smu, + enum amd_dpm_forced_level level, + enum smu_clk_type clk_type, + uint32_t *min_clk, + uint32_t *max_clk) +{ + int ret = 0; + uint32_t clk_limit = 0; + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + clk_limit = YELLOW_CARP_UMD_PSTATE_GFXCLK; + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) + yellow_carp_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, _limit); + else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) + yellow_carp_get_dpm_ultimate_freq(smu, SMU_SCLK, _limit, NULL); + break; + case SMU_SOCCLK: + clk_limit = YELLOW_CARP_UMD_PSTATE_SOCCLK; + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) + yellow_carp_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, _limit); + break; + case SMU_FCLK: + clk_limit = YELLOW_CARP_UMD_PSTATE_FCLK; This is shared by other APU ASICs, like the SMU v13.0.8. We should need to apply the different profiling standard clocks for sclk/fclk/socclk according to the IP version checking. Thanks. Tim + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) + yellow_carp_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, _limit); + else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) + yellow_carp_get_dpm_ultimate_freq(smu, SMU_FCLK, _limit, NULL); + break; + case SMU_VCLK: + yellow_carp_get_dpm_ultimate_freq(smu, SMU_VCLK, NULL, _limit); + break; + case SMU_DCLK: + yellow_carp_get_dpm_ultimate_freq(smu, SMU_DCLK, NULL, _limit); + break; + default: + ret = -EINVAL; + break; + } + *min_clk = *max_clk = clk_limit; + return ret; +} + static int yellow_carp_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level) { @@ -1114,6 +1167,9 @@ static int yellow_carp_set_performance_level(struct smu_context *smu, uint32_t sclk_min = 0, sclk_max = 0; uint32_t fclk_min = 0, fclk_max = 0; uint32_t socclk_min = 0, socclk_max = 0; + uint32_t vclk_min = 0, vclk_max = 0; + uint32_t dclk_min = 0, dclk_max = 0; + int ret = 0; switch (level) { @@ -1121,28 +1177,42 @@ static int yellow_carp_set_performance_level(struct smu_context *smu,
[PATCH v3 0/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register
From: Sui Jingfeng Patch 1,2 and 3 do basic clean up to the vgaarb module. Patch 4 introduce is_boot_device function callback to vga_client_register Sui Jingfeng (4): PCI/VGA: tidy up the code and comment format PCI/VGA: Use unsigned type for the io_state variable PCI/VGA: only deal with VGA class devices PCI/VGA: introduce is_boot_device function callback to vga_client_register drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/i915/display/intel_vga.c | 3 +- drivers/gpu/drm/nouveau/nouveau_vga.c | 2 +- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/pci/vgaarb.c | 72 +- drivers/vfio/pci/vfio_pci_core.c | 2 +- include/linux/vgaarb.h | 16 ++--- 7 files changed, 57 insertions(+), 42 deletions(-) -- 2.25.1
[PATCH v3 4/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register
From: Sui Jingfeng The vga_is_firmware_default() function is arch-dependent, which doesn't sound right. At least, it also works on the Mips and LoongArch platforms. Tested with the drm/amdgpu and drm/radeon drivers. However, it's difficult to enumerate all arch-driver combinations. I'm wrong if there is only one exception. With the observation that device drivers typically have better knowledge about which PCI bar contains the firmware framebuffer, which could avoid the need to iterate all of the PCI BARs. But as a PCI function at pci/vgaarb.c, vga_is_firmware_default() is probably not suitable to make such an optimization for a specific device. There are PCI display controllers that don't have a dedicated VRAM bar, this function will lose its effectiveness in such a case. Luckily, the device driver can provide an accurate workaround. Therefore, this patch introduces a callback that allows the device driver to tell the VGAARB if the device is the default boot device. This patch only intends to introduce the mechanism, while the implementation is left to the device driver authors. Also honor the comment: "Clients have two callback mechanisms they can use" Signed-off-by: Sui Jingfeng --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/i915/display/intel_vga.c | 3 +-- drivers/gpu/drm/nouveau/nouveau_vga.c | 2 +- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/pci/vgaarb.c | 22 ++ drivers/vfio/pci/vfio_pci_core.c | 2 +- include/linux/vgaarb.h | 8 +--- 7 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c7d40873ee2..7a096f2d5c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3960,7 +3960,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* this will fail for cards that aren't VGA class devices, just * ignore it */ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) - vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); + vga_client_register(adev->pdev, amdgpu_device_vga_set_decode, NULL); px = amdgpu_device_supports_px(ddev); diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index 286a0bdd28c6..98d7d4dffe9f 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -115,7 +115,6 @@ intel_vga_set_decode(struct pci_dev *pdev, bool enable_decode) int intel_vga_register(struct drm_i915_private *i915) { - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); int ret; @@ -127,7 +126,7 @@ int intel_vga_register(struct drm_i915_private *i915) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - ret = vga_client_register(pdev, intel_vga_set_decode); + ret = vga_client_register(pdev, intel_vga_set_decode, NULL); if (ret && ret != -ENODEV) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index f8bf0ec26844..162b4f4676c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -92,7 +92,7 @@ nouveau_vga_init(struct nouveau_drm *drm) return; pdev = to_pci_dev(dev->dev); - vga_client_register(pdev, nouveau_vga_set_decode); + vga_client_register(pdev, nouveau_vga_set_decode, NULL); /* don't register Thunderbolt eGPU with vga_switcheroo */ if (pci_is_thunderbolt_attached(pdev)) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index afbb3a80c0c6..71f2ff39d6a1 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1425,7 +1425,7 @@ int radeon_device_init(struct radeon_device *rdev, /* if we have > 1 VGA cards, then disable the radeon VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ - vga_client_register(rdev->pdev, radeon_vga_set_decode); + vga_client_register(rdev->pdev, radeon_vga_set_decode, NULL); if (rdev->flags & RADEON_IS_PX) runtime = true; diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index b0bf4952a95d..d3dab61e0ef2 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -53,6 +53,7 @@ struct vga_device { bool bridge_has_one_vga; bool is_firmware_default; /* device selected by firmware */ unsigned int (*set_decode)(struct pci_dev *pdev, bool decode); + bool (*is_boot_device)(struct pci_dev *pdev); }; static LIST_HEAD(vga_list); @@ -614,10 +615,17 @@ static bool vga_is_boot_device(struct vga_device *vgadev)
[PATCH v3 2/4] PCI/VGA: Use unsigned type for the io_state variable
From: Sui Jingfeng The io_state variable in the vga_arb_write() function is declared with unsigned int type, while the vga_str_to_iostate() function takes int * type. To keep them consistent, replace the third argument of vga_str_to_iostate() function with the unsigned int * type. Signed-off-by: Sui Jingfeng --- drivers/pci/vgaarb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index bf2921e3cb06..7f0347f4f6fd 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -76,7 +76,7 @@ static const char *vga_iostate_to_str(unsigned int iostate) return "none"; } -static int vga_str_to_iostate(char *buf, int str_size, int *io_state) +static int vga_str_to_iostate(char *buf, int str_size, unsigned int *io_state) { /* we could in theory hand out locks on IO and mem * separately to userspace but it can cause deadlocks -- 2.25.1
[PATCH v3 1/4] PCI/VGA: tidy up the code and comment format
From: Sui Jingfeng This patch replaces the leading space with a tab and removes the double blank line, no functional change. Signed-off-by: Sui Jingfeng --- drivers/pci/vgaarb.c | 26 +- include/linux/vgaarb.h | 8 +++- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index 5a696078b382..bf2921e3cb06 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -61,7 +61,6 @@ static bool vga_arbiter_used; static DEFINE_SPINLOCK(vga_lock); static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue); - static const char *vga_iostate_to_str(unsigned int iostate) { /* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */ @@ -80,7 +79,8 @@ static const char *vga_iostate_to_str(unsigned int iostate) static int vga_str_to_iostate(char *buf, int str_size, int *io_state) { /* we could in theory hand out locks on IO and mem -* separately to userspace but it can cause deadlocks */ +* separately to userspace but it can cause deadlocks +*/ if (strncmp(buf, "none", 4) == 0) { *io_state = VGA_RSRC_NONE; return 1; @@ -99,7 +99,7 @@ static int vga_str_to_iostate(char *buf, int str_size, int *io_state) return 1; } -/* this is only used a cookie - it should not be dereferenced */ +/* This is only used as cookie, it should not be dereferenced */ static struct pci_dev *vga_default; /* Find somebody in our list */ @@ -194,13 +194,15 @@ int vga_remove_vgacon(struct pci_dev *pdev) EXPORT_SYMBOL(vga_remove_vgacon); /* If we don't ever use VGA arb we should avoid - turning off anything anywhere due to old X servers getting - confused about the boot device not being VGA */ + * turning off anything anywhere due to old X servers getting + * confused about the boot device not being VGA + */ static void vga_check_first_use(void) { /* we should inform all GPUs in the system that * VGA arb has occurred and to try and disable resources -* if they can */ +* if they can +*/ if (!vga_arbiter_used) { vga_arbiter_used = true; vga_arbiter_notify_clients(); @@ -956,9 +958,9 @@ EXPORT_SYMBOL(vga_set_legacy_decoding); * @set_decode callback: If a client can disable its GPU VGA resource, it * will get a callback from this to set the encode/decode state. * - * Rationale: we cannot disable VGA decode resources unconditionally some single - * GPU laptops seem to require ACPI or BIOS access to the VGA registers to - * control things like backlights etc. Hopefully newer multi-GPU laptops do + * Rationale: we cannot disable VGA decode resources unconditionally, some + * single GPU laptops seem to require ACPI or BIOS access to the VGA registers + * to control things like backlights etc. Hopefully newer multi-GPU laptops do * something saner, and desktops won't have any special ACPI for this. The * driver will get a callback when VGA arbitration is first used by userspace * since some older X servers have issues. @@ -988,7 +990,6 @@ int vga_client_register(struct pci_dev *pdev, bail: spin_unlock_irqrestore(_lock, flags); return ret; - } EXPORT_SYMBOL(vga_client_register); @@ -1079,7 +1080,6 @@ static int vga_pci_str_to_vars(char *buf, int count, unsigned int *domain, int n; unsigned int slot, func; - n = sscanf(buf, "PCI:%x:%x:%x.%x", domain, bus, , ); if (n != 4) return 0; @@ -1431,7 +1431,6 @@ static int vga_arb_open(struct inode *inode, struct file *file) priv->cards[0].io_cnt = 0; priv->cards[0].mem_cnt = 0; - return 0; } @@ -1544,7 +1543,8 @@ static int __init vga_arb_device_init(void) bus_register_notifier(_bus_type, _notifier); /* We add all PCI devices satisfying VGA class in the arbiter by -* default */ +* default +*/ pdev = NULL; while ((pdev = pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index b4b9137f9792..d36225c582ee 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -23,9 +23,7 @@ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS - * IN THE SOFTWARE. - * + * DEALINGS IN THE SOFTWARE. */ #ifndef LINUX_VGA_H @@ -96,7 +94,7 @@ static inline int vga_client_register(struct pci_dev *pdev, static inline int vga_get_interruptible(struct pci_dev *pdev, unsigned int rsrc) { - return vga_get(pdev, rsrc, 1); + return vga_get(pdev, rsrc, 1); } /** @@ -111,7 +109,7 @@ static inline int vga_get_interruptible(struct pci_dev *pdev, static inline int
[PATCH v3 3/4] PCI/VGA: only deal with VGA class devices
From: Sui Jingfeng vgaarb only deal with the VGA devcie(pdev->class == 0x0300), so replace the pci_get_subsys() function with pci_get_class(). Filter the non pci display device out. There no need to process the non display PCI device. Signed-off-by: Sui Jingfeng --- drivers/pci/vgaarb.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index 7f0347f4f6fd..b0bf4952a95d 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -756,10 +756,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) struct pci_dev *bridge; u16 cmd; - /* Only deal with VGA class devices */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return false; - /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { @@ -1499,7 +1495,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; - vgaarb_dbg(dev, "%s\n", __func__); + /* Only deal with VGA class devices */ + if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) + return 0; /* For now we're only intereted in devices added and removed. I didn't * test this thing here, so someone needs to double check for the @@ -1509,6 +1507,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, else if (action == BUS_NOTIFY_DEL_DEVICE) notify = vga_arbiter_del_pci_device(pdev); + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action); + if (notify) vga_arbiter_notify_clients(); return 0; @@ -1533,8 +1533,8 @@ static struct miscdevice vga_arb_device = { static int __init vga_arb_device_init(void) { + struct pci_dev *pdev = NULL; int rc; - struct pci_dev *pdev; rc = misc_register(_arb_device); if (rc < 0) @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void) /* We add all PCI devices satisfying VGA class in the arbiter by * default */ - pdev = NULL; - while ((pdev = - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, - PCI_ANY_ID, pdev)) != NULL) + while (1) { + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev); + if (!pdev) + break; + vga_arbiter_add_pci_device(pdev); + }; pr_info("loaded\n"); return rc; -- 2.25.1
Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit
On 6/7/23 19:35, Pillai, Aurabindo wrote: > > Do you see the issue if you force disable FAMS? Neither hang occurs with FAMS disabled. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit
[Public] Thanks Michel, I reached out to windows driver team, and they have a monitor specific quirk to disable FAMS on this model. I suspect the issue is only present on certain fw revisions on the monitor which is why we cant see your issue. Unfortunately, having the patches in question reverted causes hangs with 3 monitor setups. So I will push that monitor specific quirk and bring back the reverted patches. -- Regards, Jay From: Michel Dänzer Sent: Thursday, June 8, 2023 10:20 AM To: Pillai, Aurabindo ; Zhuo, Qingqing (Lillian) ; Chalmers, Wesley Cc: Wang, Chao-kai (Stylon) ; Li, Sun peng (Leo) ; Lakha, Bhawanpreet ; Siqueira, Rodrigo ; Li, Roman ; amd-gfx@lists.freedesktop.org ; Chiu, Solomon ; Lin, Wayne ; Wentland, Harry ; Gutierrez, Agustin ; Kotarac, Pavle Subject: Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit On 6/7/23 19:35, Pillai, Aurabindo wrote: > > Do you see the issue if you force disable FAMS? Neither hang occurs with FAMS disabled. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
[PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new message
[Why] The sequence for collecting down_reply from source perspective should be: Request_n->repeat (get partial reply of Request_n->clear message ready flag to ack DPRX that the message is received) till all partial replies for Request_n are received->new Request_n+1. Now there is chance that drm_dp_mst_hpd_irq() will fire new down request in the tx queue when the down reply is incomplete. Source is restricted to generate interveleaved message transactions so we should avoid it. Also, while assembling partial reply packets, reading out DPCD DOWN_REP Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag should be wrapped up as a complete operation for reading out a reply packet. Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might be risky. e.g. If the reply of the new request has overwritten the DPRX DOWN_REP Sideband MSG buffer before source writing one to clear DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply for the new request. Should handle the up request in the same way. [How] Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ event, driver calls drm_dp_mst_hpd_irq_send_new_request() and might trigger drm_dp_mst_kick_tx() only when there is no on going message transaction. Changes since v1: * Reworked on review comments received -> Adjust the fix to let driver explicitly kick off new down request when mst irq event is handled and acked -> Adjust the commit message Changes since v2: * Adjust the commit message * Adjust the naming of the divided 2 functions and add a new input parameter "ack". * Adjust code flow as per review comments. Changes since v3: * Update the function description of drm_dp_mst_hpd_irq_handle_event Signed-off-by: Wayne Lin Cc: sta...@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ++-- drivers/gpu/drm/display/drm_dp_mst_topology.c | 54 --- drivers/gpu/drm/i915/display/intel_dp.c | 7 +-- drivers/gpu/drm/nouveau/dispnv50/disp.c | 12 +++-- include/drm/display/drm_dp_mst_helper.h | 7 ++- 5 files changed, 82 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d5cec03eaa8d..597c3368bcfb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3236,6 +3236,7 @@ static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector) { u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 }; u8 dret; + u8 ack; bool new_irq_handled = false; int dpcd_addr; int dpcd_bytes_to_read; @@ -3265,34 +3266,36 @@ static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector) process_count < max_process_count) { u8 retry; dret = 0; + ack = 0; process_count++; DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]); /* handle HPD short pulse irq */ if (aconnector->mst_mgr.mst_state) - drm_dp_mst_hpd_irq( - >mst_mgr, - esi, - _irq_handled); + drm_dp_mst_hpd_irq_handle_event(>mst_mgr, + esi, + , + _irq_handled); if (new_irq_handled) { /* ACK at DPCD to notify down stream */ - const int ack_dpcd_bytes_to_write = - dpcd_bytes_to_read - 1; - for (retry = 0; retry < 3; retry++) { - u8 wret; - - wret = drm_dp_dpcd_write( - >dm_dp_aux.aux, - dpcd_addr + 1, - [1], - ack_dpcd_bytes_to_write); - if (wret == ack_dpcd_bytes_to_write) + ssize_t wret; + + wret = drm_dp_dpcd_writeb(>dm_dp_aux.aux, + dpcd_addr + 1, + ack); + if (wret == 1) break; } + if (retry == 3) { + DRM_ERROR("Failed to ack MST event.\n"); + return; + } + + drm_dp_mst_hpd_irq_send_new_request(>mst_mgr); /* check if there is new irq to be handled */ dret =
[PATCH] drm/amdgpu: vcn_4_0 set instance 0 init sched score to 1
From: Sonny Jiang Only vcn0 can process AV1 codecx. In order to use both vcn0 and vcn1 in h264/265 transcode to AV1 cases, set vcn0 sched score to 1 at initialization time. Signed-off-by: Sonny Jiang --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 8d371faaa2b3..b48bb5212488 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -129,7 +129,11 @@ static int vcn_v4_0_sw_init(void *handle) if (adev->vcn.harvest_config & (1 << i)) continue; - atomic_set(>vcn.inst[i].sched_score, 0); + /* Init instance 0 sched_score to 1, so it's scheduled after other instances */ + if (i == 0) + atomic_set(>vcn.inst[i].sched_score, 1); + else + atomic_set(>vcn.inst[i].sched_score, 0); /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], -- 2.34.1
[PATCH v3] drm/amd/pm: enable more Pstates profile levels for yellow_carp
This patch enables following UMD stable Pstates profile levels for power_dpm_force_performance_level interface. - profile_peak - profile_min_mclk - profile_min_sclk - profile_standard Signed-off-by: shikaguo --- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 140 +- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h | 1 - 2 files changed, 136 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index a92da336ecec..71566c60372f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -47,6 +47,14 @@ #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK0x0006L #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT 0x1L +#define SMU_13_0_8_UMD_PSTATE_GFXCLK 533 +#define SMU_13_0_8_UMD_PSTATE_SOCCLK 533 +#define SMU_13_0_8_UMD_PSTATE_FCLK 800 + +#define SMU_13_0_1_UMD_PSTATE_GFXCLK 700 +#define SMU_13_0_1_UMD_PSTATE_SOCCLK 678 +#define SMU_13_0_1_UMD_PSTATE_FCLK 1800 + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ @@ -957,6 +965,9 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, uint32_t max) { enum smu_message_type msg_set_min, msg_set_max; + uint32_t min_clk = min; + uint32_t max_clk = max; + int ret = 0; if (!yellow_carp_clk_dpm_is_enabled(smu, clk_type)) @@ -985,11 +996,17 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, return -EINVAL; } - ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL); + if (clk_type == SMU_VCLK) { + min_clk = min << SMU_13_VCLK_SHIFT; + max_clk = max << SMU_13_VCLK_SHIFT; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL); + if (ret) goto out; - ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max_clk, NULL); if (ret) goto out; @@ -997,12 +1014,48 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, return ret; } +static int yellow_carp_get_umd_pstate_clk_default(struct smu_context *smu, + enum smu_clk_type clk_type) +{ + uint32_t clk_limit = 0; + struct amdgpu_device *adev = smu->adev; + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8)) + clk_limit = SMU_13_0_8_UMD_PSTATE_GFXCLK; + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) || + (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 3)) + clk_limit = SMU_13_0_1_UMD_PSTATE_GFXCLK; + break; + case SMU_SOCCLK: + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8)) + clk_limit = SMU_13_0_8_UMD_PSTATE_SOCCLK; + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) || + (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 3)) + clk_limit = SMU_13_0_1_UMD_PSTATE_SOCCLK; + break; + case SMU_FCLK: + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8)) + clk_limit = SMU_13_0_8_UMD_PSTATE_FCLK; + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) || + (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 3)) + clk_limit = SMU_13_0_1_UMD_PSTATE_FCLK; + break; + default: + break; + } + + return clk_limit; +} + static int yellow_carp_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min, max; + uint32_t clk_limit = 0; smu_cmn_get_sysfs_buf(, ); @@ -1044,6 +1097,7 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu, break; case SMU_GFXCLK: case SMU_SCLK: + clk_limit = yellow_carp_get_umd_pstate_clk_default(smu, clk_type); ret = yellow_carp_get_current_clk_freq(smu, clk_type, _value); if (ret) goto print_clk_out; @@ -1058,7 +1112,7 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu,
Re: [PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new message
On Thu, 08 Jun 2023, Wayne Lin wrote: > [Why] > The sequence for collecting down_reply from source perspective should > be: > > Request_n->repeat (get partial reply of Request_n->clear message ready > flag to ack DPRX that the message is received) till all partial > replies for Request_n are received->new Request_n+1. > > Now there is chance that drm_dp_mst_hpd_irq() will fire new down > request in the tx queue when the down reply is incomplete. Source is > restricted to generate interveleaved message transactions so we should > avoid it. > > Also, while assembling partial reply packets, reading out DPCD DOWN_REP > Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag should be > wrapped up as a complete operation for reading out a reply packet. > Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might > be risky. e.g. If the reply of the new request has overwritten the > DPRX DOWN_REP Sideband MSG buffer before source writing one to clear > DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply > for the new request. Should handle the up request in the same way. > > [How] > Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ > event, driver calls drm_dp_mst_hpd_irq_send_new_request() and might > trigger drm_dp_mst_kick_tx() only when there is no on going message > transaction. > > Changes since v1: > * Reworked on review comments received > -> Adjust the fix to let driver explicitly kick off new down request > when mst irq event is handled and acked > -> Adjust the commit message > > Changes since v2: > * Adjust the commit message > * Adjust the naming of the divided 2 functions and add a new input > parameter "ack". > * Adjust code flow as per review comments. > > Changes since v3: > * Update the function description of drm_dp_mst_hpd_irq_handle_event > > Signed-off-by: Wayne Lin > Cc: sta...@vger.kernel.org > --- > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ++-- > drivers/gpu/drm/display/drm_dp_mst_topology.c | 54 --- > drivers/gpu/drm/i915/display/intel_dp.c | 7 +-- > drivers/gpu/drm/nouveau/dispnv50/disp.c | 12 +++-- > include/drm/display/drm_dp_mst_helper.h | 7 ++- > 5 files changed, 82 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index d5cec03eaa8d..597c3368bcfb 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -3236,6 +3236,7 @@ static void dm_handle_mst_sideband_msg(struct > amdgpu_dm_connector *aconnector) > { > u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 }; > u8 dret; > + u8 ack; > bool new_irq_handled = false; > int dpcd_addr; > int dpcd_bytes_to_read; > @@ -3265,34 +3266,36 @@ static void dm_handle_mst_sideband_msg(struct > amdgpu_dm_connector *aconnector) > process_count < max_process_count) { > u8 retry; > dret = 0; > + ack = 0; > > process_count++; > > DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], > esi[2]); > /* handle HPD short pulse irq */ > if (aconnector->mst_mgr.mst_state) > - drm_dp_mst_hpd_irq( > - >mst_mgr, > - esi, > - _irq_handled); > + drm_dp_mst_hpd_irq_handle_event(>mst_mgr, > + esi, > + , > + _irq_handled); > > if (new_irq_handled) { > /* ACK at DPCD to notify down stream */ > - const int ack_dpcd_bytes_to_write = > - dpcd_bytes_to_read - 1; > - > for (retry = 0; retry < 3; retry++) { > - u8 wret; > - > - wret = drm_dp_dpcd_write( > - >dm_dp_aux.aux, > - dpcd_addr + 1, > - [1], > - ack_dpcd_bytes_to_write); > - if (wret == ack_dpcd_bytes_to_write) > + ssize_t wret; > + > + wret = > drm_dp_dpcd_writeb(>dm_dp_aux.aux, > + dpcd_addr + 1, > + ack); > + if (wret == 1) > break; > } > > + if (retry == 3) { > + DRM_ERROR("Failed to ack MST event.\n"); > + return; > + } > + > +
Re: [PATCH] drm/amdgpu: vcn_4_0 set instance 0 init sched score to 1
[Public] Reviewed-by: Leo Liu From: amd-gfx on behalf of Sonny Jiang Sent: June 8, 2023 10:54 AM To: amd-gfx@lists.freedesktop.org Cc: Jiang, Sonny Subject: [PATCH] drm/amdgpu: vcn_4_0 set instance 0 init sched score to 1 From: Sonny Jiang Only vcn0 can process AV1 codecx. In order to use both vcn0 and vcn1 in h264/265 transcode to AV1 cases, set vcn0 sched score to 1 at initialization time. Signed-off-by: Sonny Jiang --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 8d371faaa2b3..b48bb5212488 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -129,7 +129,11 @@ static int vcn_v4_0_sw_init(void *handle) if (adev->vcn.harvest_config & (1 << i)) continue; - atomic_set(>vcn.inst[i].sched_score, 0); + /* Init instance 0 sched_score to 1, so it's scheduled after other instances */ + if (i == 0) + atomic_set(>vcn.inst[i].sched_score, 1); + else + atomic_set(>vcn.inst[i].sched_score, 0); /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], -- 2.34.1
Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."
Hi all, and almost 2 years later I stumbled across this exact issue still being present in the scheduler: if the driver bails out of the timeout handling before calling drm_sched_stop(), the timeout job will be leaked and the TDR timer will potentially not be restarted as the job isn't put back in the pending_list. How do we solve this? Apply the below suggestion? Regards, Lucas Am Freitag, dem 20.08.2021 um 07:12 + schrieb Liu, Monk: > [AMD Official Use Only] > > @Daniel Vetter @Grodzovsky, Andrey @Koenig, Christian > > Do you have any concern on the kthread_park() approach ? > > Theoretically speaking sched_main shall run there exclusively with > job_timeout since they both touches jobs, and stop scheduler during > job_timeout won't impact performance since in that scenario > There was already something wrong/stuck on that ring/scheduler > > Thanks > > -- > Monk Liu | Cloud-GPU Core team > -- > > -Original Message- > From: Liu, Monk > Sent: Thursday, August 19, 2021 6:26 PM > To: Daniel Vetter ; Grodzovsky, Andrey > > Cc: Alex Deucher ; Chen, JingWen > ; Maling list - DRI developers > ; amd-gfx list > ; Koenig, Christian > Subject: RE: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job." > > [AMD Official Use Only] > > Hi Daniel > > > > Why can't we stop the scheduler thread first, so that there's guaranteed > > > no race? I've recently had a lot of discussions with panfrost folks about > > > their reset that spawns across engines, and without stopping the > > > scheduler thread first before you touch anything it's just plain > > > impossible. > > Yeah we had this though as well in our mind. > > Our second approach is to call ktrhead_stop() in job_timedout() routine so > that the "bad" job is guaranteed to be used without scheduler's touching or > freeing, Check this sample patch one as well please: > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c > b/drivers/gpu/drm/scheduler/sched_main.c > index a2a9536..50a49cb 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct > *work) > sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); > > /* Protects against concurrent deletion in drm_sched_get_cleanup_job > */ > + kthread_park(sched->thread); > spin_lock(>job_list_lock); > job = list_first_entry_or_null(>pending_list, >struct drm_sched_job, list); > > if (job) { > - /* > -* Remove the bad job so it cannot be freed by concurrent > -* drm_sched_cleanup_jobs. It will be reinserted back after > sched->thread > -* is parked at which point it's safe. > -*/ > - list_del_init(>list); > spin_unlock(>job_list_lock); > > status = job->sched->ops->timedout_job(job); > @@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct > *work) > } else { > spin_unlock(>job_list_lock); > } > + kthread_unpark(sched->thread); > > if (status != DRM_GPU_SCHED_STAT_ENODEV) { > spin_lock(>job_list_lock); @@ -393,20 +389,6 @@ void > drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) > kthread_park(sched->thread); > > /* > -* Reinsert back the bad job here - now it's safe as > -* drm_sched_get_cleanup_job cannot race against us and release the > -* bad job at this point - we parked (waited for) any in progress > -* (earlier) cleanups and drm_sched_get_cleanup_job will not be called > -* now until the scheduler thread is unparked. > -*/ > - if (bad && bad->sched == sched) > - /* > -* Add at the head of the queue to reflect it was the earliest > -* job extracted. > -*/ > - list_add(>list, >pending_list); > - > - /* > * Iterate the job list from later to earlier one and either deactive > * their HW callbacks or remove them from pending list if they already > * signaled. > > > Thanks > > -- > Monk Liu | Cloud-GPU Core team > -- > > -Original Message- > From: Daniel Vetter > Sent: Thursday, August 19, 2021 5:31 PM > To: Grodzovsky, Andrey > Cc: Daniel Vetter ; Alex Deucher ; > Chen, JingWen ; Maling list - DRI developers > ; amd-gfx list > ; Liu, Monk ; Koenig, > Christian > Subject: Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job." > > On Wed, Aug 18, 2021 at 10:51:00AM -0400, Andrey Grodzovsky wrote: > > > > On 2021-08-18 10:42 a.m., Daniel
Re: [Intel-gfx] [PATCH v3 3/4] PCI/VGA: only deal with VGA class devices
Start with verb and capitalize to match ("Deal only with ...") On Thu, Jun 08, 2023 at 07:43:21PM +0800, Sui Jingfeng wrote: > From: Sui Jingfeng > > vgaarb only deal with the VGA devcie(pdev->class == 0x0300), so replace the > pci_get_subsys() function with pci_get_class(). Filter the non pci display > device out. There no need to process the non display PCI device. s/non pci/non-PCI/ s/non display/non-display/ This is fine, and I'll merge this, but someday I would like to get rid of the bus_register_notifier() and call vga_arbiter_add_pci_device() and vga_arbiter_del_pci_device() directly from the PCI core. Or if you wanted to do that now, that would be even better :) Bjorn > Signed-off-by: Sui Jingfeng > --- > drivers/pci/vgaarb.c | 22 -- > 1 file changed, 12 insertions(+), 10 deletions(-) > > diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c > index 7f0347f4f6fd..b0bf4952a95d 100644 > --- a/drivers/pci/vgaarb.c > +++ b/drivers/pci/vgaarb.c > @@ -756,10 +756,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev > *pdev) > struct pci_dev *bridge; > u16 cmd; > > - /* Only deal with VGA class devices */ > - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) > - return false; > - > /* Allocate structure */ > vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); > if (vgadev == NULL) { > @@ -1499,7 +1495,9 @@ static int pci_notify(struct notifier_block *nb, > unsigned long action, > struct pci_dev *pdev = to_pci_dev(dev); > bool notify = false; > > - vgaarb_dbg(dev, "%s\n", __func__); > + /* Only deal with VGA class devices */ > + if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) > + return 0; > > /* For now we're only intereted in devices added and removed. I didn't >* test this thing here, so someone needs to double check for the > @@ -1509,6 +1507,8 @@ static int pci_notify(struct notifier_block *nb, > unsigned long action, > else if (action == BUS_NOTIFY_DEL_DEVICE) > notify = vga_arbiter_del_pci_device(pdev); > > + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action); > + > if (notify) > vga_arbiter_notify_clients(); > return 0; > @@ -1533,8 +1533,8 @@ static struct miscdevice vga_arb_device = { > > static int __init vga_arb_device_init(void) > { > + struct pci_dev *pdev = NULL; > int rc; > - struct pci_dev *pdev; > > rc = misc_register(_arb_device); > if (rc < 0) > @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void) > /* We add all PCI devices satisfying VGA class in the arbiter by >* default >*/ > - pdev = NULL; > - while ((pdev = > - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, > -PCI_ANY_ID, pdev)) != NULL) > + while (1) { > + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev); > + if (!pdev) > + break; > + > vga_arbiter_add_pci_device(pdev); > + }; > > pr_info("loaded\n"); > return rc; > -- > 2.25.1 >
[PATCH] drm/amdgpu: Wrap -Wunused-but-set-variable in cc-option
-Wunused-but-set-variable was only supported in clang starting with 13.0.0, so earlier versions will emit a warning, which is turned into a hard error for the kernel to mirror GCC: error: unknown warning option '-Wunused-but-set-variable'; did you mean '-Wunused-const-variable'? [-Werror,-Wunknown-warning-option] The minimum supported version of clang for building the kernel is 11.0.0, so match the rest of the kernel and wrap -Wunused-but-set-variable in a cc-option call, so that it is only used when supported by the compiler. Closes: https://github.com/ClangBuiltLinux/linux/issues/1869 Fixes: a0fd5a5f676c ("drm/amd/amdgpu: introduce DRM_AMDGPU_WERROR") Signed-off-by: Nathan Chancellor --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 7ee68b1bbfed..86b833085f19 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -40,7 +40,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/amdkfd subdir-ccflags-y := -Wextra -subdir-ccflags-y += -Wunused-but-set-variable +subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable) subdir-ccflags-y += -Wno-unused-parameter subdir-ccflags-y += -Wno-type-limits subdir-ccflags-y += -Wno-sign-compare --- base-commit: 6bd4b01e8938779b0d959bdf33949a9aa258a363 change-id: 20230608-amdgpu-wrap-wunused-but-set-variable-in-cc-option-0be9528ac5c8 Best regards, -- Nathan Chancellor
Re: [PATCH] drm/amdgpu: add the accelerator pcie class
s/pcie/PCIe/ in the subject. On Tue, May 23, 2023 at 12:02:32PM +0800, Shiwu Zhang wrote: > v2: add the base class id for accelerator (lijo) Please include a commit log. For PCI, the "v2: ..." stuff would go below the "---" so it doesn't get included in the git commit. I don't know what the DRM convention is. It's OK if the commit log repeats the subject. The subject is like the title of a story -- it's not the first sentence of the story. Please include a spec citation for the PCI_BASE_CLASS_ACCELERATOR values in the commit log. I think it's something like "PCI Code and ID Assignment, r1.9, sec 1, 1.19". > Signed-off-by: Shiwu Zhang > Acked-by: Lijo Lazar With the above: Acked-by: Bjorn Helgaas # pci_ids.h > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 + > include/linux/pci_ids.h | 3 +++ > 2 files changed, 8 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 3d91e123f9bd..5d652e6f0b1e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -2042,6 +2042,11 @@ static const struct pci_device_id pciidlist[] = { > .class_mask = 0xff, > .driver_data = CHIP_IP_DISCOVERY }, > > + { PCI_DEVICE(0x1002, PCI_ANY_ID), > + .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8, > + .class_mask = 0xff, > + .driver_data = CHIP_IP_DISCOVERY }, > + > {0, 0, 0} > }; > > diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h > index b362d90eb9b0..4918ff26a987 100644 > --- a/include/linux/pci_ids.h > +++ b/include/linux/pci_ids.h > @@ -151,6 +151,9 @@ > #define PCI_CLASS_SP_DPIO0x1100 > #define PCI_CLASS_SP_OTHER 0x1180 > > +#define PCI_BASE_CLASS_ACCELERATOR 0x12 > +#define PCI_CLASS_ACCELERATOR_PROCESSING 0x1200 > + > #define PCI_CLASS_OTHERS 0xff > > /* Vendors and devices. Sort key: vendor first, device next. */ > -- > 2.17.1 >
Re: [PATCH 10/66] drm/amd/display: Do not set drr on pipe commit
On 6/8/23 16:31, Pillai, Aurabindo wrote: > > Thanks Michel, > > I reached out to windows driver team, and they have a monitor specific quirk > to disable FAMS on this model. I suspect the issue is only present on certain > fw revisions on the monitor which is why we cant see your issue. > > Unfortunately, having the patches in question reverted causes hangs with 3 > monitor setups. So I will push that monitor specific quirk and bring back the > reverted patches. Sounds good, thanks. -- Earthling Michel Dänzer| https://redhat.com Libre software enthusiast | Mesa and Xwayland developer
[PATCH v3 3/5] drm/amdkfd: set activated flag true when event age unmatchs
Set waiter's activated flag true when event age unmatchs with last_event_age. Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index c7689181cc22..4c6907507190 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -41,6 +41,7 @@ struct kfd_event_waiter { wait_queue_entry_t wait; struct kfd_event *event; /* Event to wait for */ bool activated; /* Becomes true when event is signaled */ + bool event_age_enabled; /* set to true when last_event_age is non-zero */ }; /* @@ -797,9 +798,9 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) static int init_event_waiter(struct kfd_process *p, struct kfd_event_waiter *waiter, - uint32_t event_id) + struct kfd_event_data *event_data) { - struct kfd_event *ev = lookup_event_by_id(p, event_id); + struct kfd_event *ev = lookup_event_by_id(p, event_data->event_id); if (!ev) return -EINVAL; @@ -808,6 +809,13 @@ static int init_event_waiter(struct kfd_process *p, waiter->event = ev; waiter->activated = ev->signaled; ev->signaled = ev->signaled && !ev->auto_reset; + + /* last_event_age = 0 reserved for backward compatible */ + waiter->event_age_enabled = !!event_data->signal_event_data.last_event_age; + if (waiter->event_age_enabled && + ev->event_age != event_data->signal_event_data.last_event_age) + WRITE_ONCE(waiter->activated, true); + if (!waiter->activated) add_wait_queue(>wq, >wait); spin_unlock(>lock); @@ -948,8 +956,7 @@ int kfd_wait_on_events(struct kfd_process *p, goto out_unlock; } - ret = init_event_waiter(p, _waiters[i], - event_data.event_id); + ret = init_event_waiter(p, _waiters[i], _data); if (ret) goto out_unlock; } -- 2.34.1
[PATCH v3 5/5] drm/amdkfd: bump kfd ioctl minor version for event age availability
Bump the minor version to declare event age tracking feature is now available. Signed-off-by: James Zhu --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 93f1c0bc5caf..eeb2fdcbdcb7 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -39,9 +39,10 @@ * - 1.11 - Add unified memory for ctx save/restore area * - 1.12 - Add DMA buf export ioctl * - 1.13 - Add debugger API + * - 1.14 - Update kfd_event_data */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 13 +#define KFD_IOCTL_MINOR_VERSION 14 struct kfd_ioctl_get_version_args { __u32 major_version;/* from KFD */ -- 2.34.1
[PATCH v3 4/5] drm/amdkfd: update user space last_event_age
Update user space last_event_age when event age is enabled. It is only for KFD_EVENT_TYPE_SIGNAL which is checked by user space. Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 23 +++ 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 4c6907507190..9fcfec57a094 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -863,22 +863,29 @@ static int copy_signaled_event_data(uint32_t num_events, struct kfd_event_waiter *event_waiters, struct kfd_event_data __user *data) { - struct kfd_hsa_memory_exception_data *src; - struct kfd_hsa_memory_exception_data __user *dst; + void *src; + void __user *dst; struct kfd_event_waiter *waiter; struct kfd_event *event; - uint32_t i; + uint32_t i, size = 0; for (i = 0; i < num_events; i++) { waiter = _waiters[i]; event = waiter->event; if (!event) return -EINVAL; /* event was destroyed */ - if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { - dst = [i].memory_exception_data; - src = >memory_exception_data; - if (copy_to_user(dst, src, - sizeof(struct kfd_hsa_memory_exception_data))) + if (waiter->activated) { + if (event->type == KFD_EVENT_TYPE_MEMORY) { + dst = [i].memory_exception_data; + src = >memory_exception_data; + size = sizeof(struct kfd_hsa_memory_exception_data); + } else if (event->type == KFD_EVENT_TYPE_SIGNAL && + waiter->event_age_enabled) { + dst = [i].signal_event_data.last_event_age; + src = >event_age; + size = sizeof(u64); + } + if (size && copy_to_user(dst, src, size)) return -EFAULT; } } -- 2.34.1
[PATCH v3 1/5] drm/amdkfd: add event age tracking
Add event age tracking Signed-off-by: James Zhu --- include/uapi/linux/kfd_ioctl.h | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1781e7669982..93f1c0bc5caf 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -320,12 +320,20 @@ struct kfd_hsa_hw_exception_data { __u32 gpu_id; }; +/* hsa signal event data */ +struct kfd_hsa_signal_event_data { + __u64 last_event_age; /* to and from KFD */ +}; + /* Event data */ struct kfd_event_data { union { + /* From KFD */ struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_hsa_hw_exception_data hw_exception_data; - }; /* From KFD */ + /* To and From KFD */ + struct kfd_hsa_signal_event_data signal_event_data; + }; __u64 kfd_event_data_ext; /* pointer to an extension structure for future exception types */ __u32 event_id; /* to KFD */ -- 2.34.1
[PATCH v3 2/5] drm/amdkfd: add event_age tracking when receiving interrupt
Add event_age tracking when receiving interrupt. Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 6 ++ drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 7ff5c4e1b7e2..c7689181cc22 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -431,6 +431,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, if (!ret) { *event_id = ev->event_id; *event_trigger_data = ev->event_id; + ev->event_age = 1; } else { kfree(ev); } @@ -629,6 +630,11 @@ static void set_event(struct kfd_event *ev) * updating the wait queues in kfd_wait_on_events. */ ev->signaled = !ev->auto_reset || !waitqueue_active(>wq); + if (!(++ev->event_age)) { + /* Never wrap back to reserved/default event age 0/1 */ + ev->event_age = 2; + WARN_ONCE(1, "event_age wrap back!"); + } list_for_each_entry(waiter, >wq.head, wait.entry) WRITE_ONCE(waiter->activated, true); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 1c62c8dd6460..52ccfd397c2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -53,6 +53,7 @@ struct signal_page; struct kfd_event { u32 event_id; + u64 event_age; bool signaled; bool auto_reset; -- 2.34.1
[PATCH v3 0/5] new event wait support
In kernel amdgpu driver, kfd_wait_on_events is used to support user space signal event wait function. For multiple threads waiting on same event scenery, race condition could occur since some threads after checking signal condition, before calling kfd_wait_on_events, the event interrupt could be fired and wake up other thread which are sleeping on this event. Then those threads could fall into sleep without waking up again. Adding event age tracking in both kernel and user mode, will help avoiding this race condition. The changes for The user space ROCT-Thunk-Interface/ROCR-Runtime are listed below for review togehter with kernel mode changes. ROCT-Thunk-Interface: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/efdbf6cfbc026bd68ac3c35d00dacf84370eb81e https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/910108272091d1ce61dbc48bd9519731e0e9cf52 ROCR-Runtime: https://github.com/RadeonOpenCompute/ROCR-Runtime/compare/master...zhums:ROCR-Runtime:new_event_wait_review https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/e1f5bdb88eb882ac798aeca2c00ea3fbb2dba459 https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/7d26afd14107b5c2a754c1a3f415d89f3aabb503 -v2: remove unnecessay link -v3: 1. update kfd test cases (910108272091d1ce61dbc48bd9519731e0e9cf52) 2. move event age match checking into init_event_waiter 3. move last event age update into copy_signaled_event_data James Zhu (5): drm/amdkfd: add event age tracking drm/amdkfd: add event_age tracking when receiving interrupt drm/amdkfd: set activated flag true when event age unmatchs drm/amdkfd: update user space last_event_age drm/amdkfd: bump kfd ioctl minor version for event age availability drivers/gpu/drm/amd/amdkfd/kfd_events.c | 44 ++--- drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 + include/uapi/linux/kfd_ioctl.h | 13 ++-- 3 files changed, 44 insertions(+), 14 deletions(-) -- 2.34.1
Re: [PATCH] drm/amdgpu: Wrap -Wunused-but-set-variable in cc-option
On 6/8/23 13:01, Nathan Chancellor wrote: -Wunused-but-set-variable was only supported in clang starting with 13.0.0, so earlier versions will emit a warning, which is turned into a hard error for the kernel to mirror GCC: error: unknown warning option '-Wunused-but-set-variable'; did you mean '-Wunused-const-variable'? [-Werror,-Wunknown-warning-option] The minimum supported version of clang for building the kernel is 11.0.0, so match the rest of the kernel and wrap -Wunused-but-set-variable in a cc-option call, so that it is only used when supported by the compiler. Closes: https://github.com/ClangBuiltLinux/linux/issues/1869 Fixes: a0fd5a5f676c ("drm/amd/amdgpu: introduce DRM_AMDGPU_WERROR") Signed-off-by: Nathan Chancellor Applied, thanks! --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 7ee68b1bbfed..86b833085f19 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -40,7 +40,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/amdkfd subdir-ccflags-y := -Wextra -subdir-ccflags-y += -Wunused-but-set-variable +subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable) subdir-ccflags-y += -Wno-unused-parameter subdir-ccflags-y += -Wno-type-limits subdir-ccflags-y += -Wno-sign-compare --- base-commit: 6bd4b01e8938779b0d959bdf33949a9aa258a363 change-id: 20230608-amdgpu-wrap-wunused-but-set-variable-in-cc-option-0be9528ac5c8 Best regards, -- Hamza
Re: [Intel-gfx] [PATCH v3 1/4] PCI/VGA: tidy up the code and comment format
Capitalize subject to match ("Tidy ...") On Thu, Jun 08, 2023 at 07:43:19PM +0800, Sui Jingfeng wrote: > From: Sui Jingfeng > > This patch replaces the leading space with a tab and removes the double > blank line, no functional change. Can you move this to the end of the series? The functional changes are more likely to be backported, and I think the backport may be a little easier without the cleanup in the middle. > /* we could in theory hand out locks on IO and mem > - * separately to userspace but it can cause deadlocks */ > + * separately to userspace but it can cause deadlocks > + */ Since you're touching this anyway, can you update it to the conventional multi-line comment style: /* * We could in theory ... */ And capitalize "We", add a period at end, and rewrap to fill 78 columns or so? Same for other comments below. > +++ b/include/linux/vgaarb.h > @@ -23,9 +23,7 @@ > * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > - * DEALINGS > - * IN THE SOFTWARE. > - * > + * DEALINGS IN THE SOFTWARE. > */ Can you make a separate patch to replace this entire copyright notice with the appropriate SPDX-License-Identifier header? Documentation/process/license-rules.rst has details. Bjorn
Re: [Intel-gfx] [PATCH v3 4/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register
On Thu, Jun 08, 2023 at 07:43:22PM +0800, Sui Jingfeng wrote: > From: Sui Jingfeng > > The vga_is_firmware_default() function is arch-dependent, which doesn't > sound right. At least, it also works on the Mips and LoongArch platforms. > Tested with the drm/amdgpu and drm/radeon drivers. However, it's difficult > to enumerate all arch-driver combinations. I'm wrong if there is only one > exception. > > With the observation that device drivers typically have better knowledge > about which PCI bar contains the firmware framebuffer, which could avoid > the need to iterate all of the PCI BARs. > > But as a PCI function at pci/vgaarb.c, vga_is_firmware_default() is > probably not suitable to make such an optimization for a specific device. > > There are PCI display controllers that don't have a dedicated VRAM bar, > this function will lose its effectiveness in such a case. Luckily, the > device driver can provide an accurate workaround. > > Therefore, this patch introduces a callback that allows the device driver > to tell the VGAARB if the device is the default boot device. This patch > only intends to introduce the mechanism, while the implementation is left > to the device driver authors. Also honor the comment: "Clients have two > callback mechanisms they can use" s/bar/BAR/ (several) Nothing here uses the callback. I don't want to merge this until we have a user. I'm not sure why the device driver should know whether its device is the default boot device. > Signed-off-by: Sui Jingfeng > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- > drivers/gpu/drm/i915/display/intel_vga.c | 3 +-- > drivers/gpu/drm/nouveau/nouveau_vga.c | 2 +- > drivers/gpu/drm/radeon/radeon_device.c | 2 +- > drivers/pci/vgaarb.c | 22 ++ > drivers/vfio/pci/vfio_pci_core.c | 2 +- > include/linux/vgaarb.h | 8 +--- > 7 files changed, 28 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 5c7d40873ee2..7a096f2d5c16 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3960,7 +3960,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, > /* this will fail for cards that aren't VGA class devices, just >* ignore it */ > if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) > - vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); > + vga_client_register(adev->pdev, amdgpu_device_vga_set_decode, > NULL); > > px = amdgpu_device_supports_px(ddev); > > diff --git a/drivers/gpu/drm/i915/display/intel_vga.c > b/drivers/gpu/drm/i915/display/intel_vga.c > index 286a0bdd28c6..98d7d4dffe9f 100644 > --- a/drivers/gpu/drm/i915/display/intel_vga.c > +++ b/drivers/gpu/drm/i915/display/intel_vga.c > @@ -115,7 +115,6 @@ intel_vga_set_decode(struct pci_dev *pdev, bool > enable_decode) > > int intel_vga_register(struct drm_i915_private *i915) > { > - > struct pci_dev *pdev = to_pci_dev(i915->drm.dev); > int ret; > > @@ -127,7 +126,7 @@ int intel_vga_register(struct drm_i915_private *i915) >* then we do not take part in VGA arbitration and the >* vga_client_register() fails with -ENODEV. >*/ > - ret = vga_client_register(pdev, intel_vga_set_decode); > + ret = vga_client_register(pdev, intel_vga_set_decode, NULL); > if (ret && ret != -ENODEV) > return ret; > > diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c > b/drivers/gpu/drm/nouveau/nouveau_vga.c > index f8bf0ec26844..162b4f4676c7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_vga.c > +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c > @@ -92,7 +92,7 @@ nouveau_vga_init(struct nouveau_drm *drm) > return; > pdev = to_pci_dev(dev->dev); > > - vga_client_register(pdev, nouveau_vga_set_decode); > + vga_client_register(pdev, nouveau_vga_set_decode, NULL); > > /* don't register Thunderbolt eGPU with vga_switcheroo */ > if (pci_is_thunderbolt_attached(pdev)) > diff --git a/drivers/gpu/drm/radeon/radeon_device.c > b/drivers/gpu/drm/radeon/radeon_device.c > index afbb3a80c0c6..71f2ff39d6a1 100644 > --- a/drivers/gpu/drm/radeon/radeon_device.c > +++ b/drivers/gpu/drm/radeon/radeon_device.c > @@ -1425,7 +1425,7 @@ int radeon_device_init(struct radeon_device *rdev, > /* if we have > 1 VGA cards, then disable the radeon VGA resources */ > /* this will fail for cards that aren't VGA class devices, just >* ignore it */ > - vga_client_register(rdev->pdev, radeon_vga_set_decode); > + vga_client_register(rdev->pdev, radeon_vga_set_decode, NULL); > > if (rdev->flags & RADEON_IS_PX) > runtime = true; > diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c > index b0bf4952a95d..d3dab61e0ef2 100644 > --- a/drivers/pci/vgaarb.c >
[PATCH] Align eccinfo table structure with smu v13_0_0 interface
Update eccinfo table structure according to smu v13_0_0 interface. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/umc_v8_10.h | 3 +++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h index c6dfd433fec7bc..2cdaf746e8cd4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -31,6 +31,9 @@ /* number of umc instance with memory map register access */ #define UMC_V8_10_UMC_INSTANCE_NUM 2 +/* Max number of umc channel instances */ +#define UMC_V8_10_MAX_CHANNEL_NUM 24 + /* Total channel instances for all available umc nodes */ #define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->gmc.num_umc) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 413e592f0ed611..90ea15496879c4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2580,7 +2580,7 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu, ecc_table = (EccInfoTable_t *)smu_table->ecc_table; - for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) { + for (i = 0; i < UMC_V8_10_MAX_CHANNEL_NUM; i++) { ecc_info_per_channel = &(eccinfo->ecc[i]); ecc_info_per_channel->ce_count_lo_chip = ecc_table->EccInfo[i].ce_count_lo_chip; -- 2.25.1
Re: [Intel-gfx] [PATCH v3 1/4] PCI/VGA: tidy up the code and comment format
Hi, On 2023/6/9 03:07, Bjorn Helgaas wrote: Capitalize subject to match ("Tidy ...") On Thu, Jun 08, 2023 at 07:43:19PM +0800, Sui Jingfeng wrote: From: Sui Jingfeng This patch replaces the leading space with a tab and removes the double blank line, no functional change. Can you move this to the end of the series? The functional changes are more likely to be backported, and I think the backport may be a little easier without the cleanup in the middle. OK, acceptable. /* we could in theory hand out locks on IO and mem -* separately to userspace but it can cause deadlocks */ +* separately to userspace but it can cause deadlocks +*/ Since you're touching this anyway, can you update it to the conventional multi-line comment style: /* * We could in theory ... */ And capitalize "We", add a period at end, and rewrap to fill 78 columns or so? Same for other comments below. OK, I could improve this at next version. +++ b/include/linux/vgaarb.h @@ -23,9 +23,7 @@ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS - * IN THE SOFTWARE. - * + * DEALINGS IN THE SOFTWARE. */ Can you make a separate patch to replace this entire copyright notice with the appropriate SPDX-License-Identifier header? Documentation/process/license-rules.rst has details. Wow ... Bjorn -- Jingfeng
[PATCH] drm/amd/pm: workaround for compute workload type on some skus
On smu 13.0.0, the compute workload type cannot be set on all the skus due to some other problems. This workaround is to make sure compute workload type can also run on some specific skus. Signed-off-by: Kenneth Feng --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 26 +++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e2265f50bacc..6e8acd021ee6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2179,6 +2179,32 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && + (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) || + ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC { + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_COMPUTE_BIT, + (void *)(_monitor_external), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(_monitor_external), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } + + smu->power_profile_mode = PP_SMC_POWER_PROFILE_CUSTOM; + } + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, -- 2.34.1
Re: [PATCH] drm/amdgpu: Raname DRM schedulers in amdgpu TTM
Am 07.06.23 um 17:26 schrieb Mukul Joshi: Rename mman.entity to mman.high_pr to make the distinction clearer that this is a high priority scheduler. Similarly, rename the recently added mman.delayed to mman.low_pr to make it clear it is a low priority scheduler. No functional change in this patch. Signed-off-by: Mukul Joshi Reviewed-by: Christian König Thanks, Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8884c043cf76..8a4ed69a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -228,7 +228,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; - r = amdgpu_job_alloc_with_ib(adev, >mman.entity, + r = amdgpu_job_alloc_with_ib(adev, >mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, ); @@ -1460,7 +1460,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, memcpy(adev->mman.sdma_access_ptr, buf, len); num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, >mman.entity, + r = amdgpu_job_alloc_with_ib(adev, >mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, AMDGPU_IB_POOL_DELAYED, ); @@ -2036,7 +2036,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) ring = adev->mman.buffer_funcs_ring; sched = >sched; - r = drm_sched_entity_init(>mman.entity, + r = drm_sched_entity_init(>mman.high_pr, DRM_SCHED_PRIORITY_KERNEL, , 1, NULL); if (r) { @@ -2045,7 +2045,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) return; } - r = drm_sched_entity_init(>mman.delayed, + r = drm_sched_entity_init(>mman.low_pr, DRM_SCHED_PRIORITY_NORMAL, , 1, NULL); if (r) { @@ -2054,8 +2054,8 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) goto error_free_entity; } } else { - drm_sched_entity_destroy(>mman.entity); - drm_sched_entity_destroy(>mman.delayed); + drm_sched_entity_destroy(>mman.high_pr); + drm_sched_entity_destroy(>mman.low_pr); dma_fence_put(man->move); man->move = NULL; } @@ -2071,7 +2071,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) return; error_free_entity: - drm_sched_entity_destroy(>mman.entity); + drm_sched_entity_destroy(>mman.high_pr); } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, @@ -2086,8 +2086,8 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED; int r; - struct drm_sched_entity *entity = delayed ? >mman.delayed : - >mman.entity; + struct drm_sched_entity *entity = delayed ? >mman.low_pr : + >mman.high_pr; r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, pool, job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e82b1edee7a4..6d0d66e40db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -59,10 +59,10 @@ struct amdgpu_mman { boolbuffer_funcs_enabled; struct mutexgtt_window_lock; - /* Scheduler entity for buffer moves */ - struct drm_sched_entity entity; - /* Scheduler entity for VRAM clearing */ - struct drm_sched_entity delayed; + /* High priority scheduler entity for buffer moves */ + struct drm_sched_entity high_pr; + /* Low priority scheduler entity for VRAM clearing */ +
[PATCH] drm/amdgpu: Print client id for the unregistered interrupt resource
Modify the debug information and print the clien id for these interrupts as well. Signed-off-by: Ma Jun --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index b0808c1be013..e6edc67ef010 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -486,7 +486,8 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, handled = true; } else { - DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); + DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n", + src_id, client_id); } /* Send it to amdkfd as well if it isn't already handled */ -- 2.34.1
Re: [Intel-gfx] [PATCH v3 3/4] PCI/VGA: only deal with VGA class devices
Hi, On 2023/6/9 03:12, Bjorn Helgaas wrote: Start with verb and capitalize to match ("Deal only with ...") On Thu, Jun 08, 2023 at 07:43:21PM +0800, Sui Jingfeng wrote: From: Sui Jingfeng vgaarb only deal with the VGA devcie(pdev->class == 0x0300), so replace the pci_get_subsys() function with pci_get_class(). Filter the non pci display device out. There no need to process the non display PCI device. s/non pci/non-PCI/ s/non display/non-display/ Will be fixed at next version, thanks for the strict checking, I will try to bring this rigorous to the other patch of myself in the future. This is fine, and I'll merge this, but someday I would like to get rid of the bus_register_notifier() and call vga_arbiter_add_pci_device() and vga_arbiter_del_pci_device() directly from the PCI core. Or if you wanted to do that now, that would be even better :) I think, I probably should only do what I'm good at. The "good at" here means that It's under prepared, already matured with thinking(or consideration) by myself. And also including testing(on two or three architecture). That idea is belong to you, I would like to see how does it going to happen only. Bjorn Signed-off-by: Sui Jingfeng --- drivers/pci/vgaarb.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index 7f0347f4f6fd..b0bf4952a95d 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -756,10 +756,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) struct pci_dev *bridge; u16 cmd; - /* Only deal with VGA class devices */ - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return false; - /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { @@ -1499,7 +1495,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; - vgaarb_dbg(dev, "%s\n", __func__); + /* Only deal with VGA class devices */ + if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) + return 0; /* For now we're only intereted in devices added and removed. I didn't * test this thing here, so someone needs to double check for the @@ -1509,6 +1507,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action, else if (action == BUS_NOTIFY_DEL_DEVICE) notify = vga_arbiter_del_pci_device(pdev); + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action); + if (notify) vga_arbiter_notify_clients(); return 0; @@ -1533,8 +1533,8 @@ static struct miscdevice vga_arb_device = { static int __init vga_arb_device_init(void) { + struct pci_dev *pdev = NULL; int rc; - struct pci_dev *pdev; rc = misc_register(_arb_device); if (rc < 0) @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void) /* We add all PCI devices satisfying VGA class in the arbiter by * default */ - pdev = NULL; - while ((pdev = - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, - PCI_ANY_ID, pdev)) != NULL) + while (1) { + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev); + if (!pdev) + break; + vga_arbiter_add_pci_device(pdev); + }; pr_info("loaded\n"); return rc; -- 2.25.1 -- Jingfeng
[PATCH v2] drm/amdkfd: To enable traps for GC_11_0_4 and up
From: Ruili Ji Flag trap_en should be enabled for trap handler. Signed-off-by: Ruili Ji Signed-off-by: Aaron Liu Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d6b15493fffd..8a39a9e0ed5a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -227,7 +227,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.tba_addr = qpd->tba_addr; queue_input.tma_addr = qpd->tma_addr; queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) || - KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0); + KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3); queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; queue_type = convert_to_mes_queue_type(q->properties.type); @@ -1807,7 +1807,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = !!qpd->evicted; q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) && - KFD_GC_VERSION(q->device) < IP_VERSION(12, 0, 0); + KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3); if (qd) mqd_mgr->restore_mqd(mqd_mgr, >mqd, q->mqd_mem_obj, >gart_mqd_addr, -- 2.40.1
Re: [Intel-gfx] [PATCH v3 4/4] PCI/VGA: introduce is_boot_device function callback to vga_client_register
Hi, On 2023/6/9 03:19, Bjorn Helgaas wrote: On Thu, Jun 08, 2023 at 07:43:22PM +0800, Sui Jingfeng wrote: From: Sui Jingfeng The vga_is_firmware_default() function is arch-dependent, which doesn't sound right. At least, it also works on the Mips and LoongArch platforms. Tested with the drm/amdgpu and drm/radeon drivers. However, it's difficult to enumerate all arch-driver combinations. I'm wrong if there is only one exception. With the observation that device drivers typically have better knowledge about which PCI bar contains the firmware framebuffer, which could avoid the need to iterate all of the PCI BARs. But as a PCI function at pci/vgaarb.c, vga_is_firmware_default() is probably not suitable to make such an optimization for a specific device. There are PCI display controllers that don't have a dedicated VRAM bar, this function will lose its effectiveness in such a case. Luckily, the device driver can provide an accurate workaround. Therefore, this patch introduces a callback that allows the device driver to tell the VGAARB if the device is the default boot device. This patch only intends to introduce the mechanism, while the implementation is left to the device driver authors. Also honor the comment: "Clients have two callback mechanisms they can use" s/bar/BAR/ (several) Nothing here uses the callback. I don't want to merge this until we have a user. This is chicken and egg question. If you could help get this merge first, I will show you the first user. I'm not sure why the device driver should know whether its device is the default boot device. It's not that the device driver should know, but it's about that the device driver has the right to override. Device driver may have better approach to identify the default boot device. Signed-off-by: Sui Jingfeng --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/i915/display/intel_vga.c | 3 +-- drivers/gpu/drm/nouveau/nouveau_vga.c | 2 +- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/pci/vgaarb.c | 22 ++ drivers/vfio/pci/vfio_pci_core.c | 2 +- include/linux/vgaarb.h | 8 +--- 7 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c7d40873ee2..7a096f2d5c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3960,7 +3960,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* this will fail for cards that aren't VGA class devices, just * ignore it */ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) - vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); + vga_client_register(adev->pdev, amdgpu_device_vga_set_decode, NULL); px = amdgpu_device_supports_px(ddev); diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index 286a0bdd28c6..98d7d4dffe9f 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -115,7 +115,6 @@ intel_vga_set_decode(struct pci_dev *pdev, bool enable_decode) int intel_vga_register(struct drm_i915_private *i915) { - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); int ret; @@ -127,7 +126,7 @@ int intel_vga_register(struct drm_i915_private *i915) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - ret = vga_client_register(pdev, intel_vga_set_decode); + ret = vga_client_register(pdev, intel_vga_set_decode, NULL); if (ret && ret != -ENODEV) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index f8bf0ec26844..162b4f4676c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -92,7 +92,7 @@ nouveau_vga_init(struct nouveau_drm *drm) return; pdev = to_pci_dev(dev->dev); - vga_client_register(pdev, nouveau_vga_set_decode); + vga_client_register(pdev, nouveau_vga_set_decode, NULL); /* don't register Thunderbolt eGPU with vga_switcheroo */ if (pci_is_thunderbolt_attached(pdev)) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index afbb3a80c0c6..71f2ff39d6a1 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1425,7 +1425,7 @@ int radeon_device_init(struct radeon_device *rdev, /* if we have > 1 VGA cards, then disable the radeon VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ - vga_client_register(rdev->pdev, radeon_vga_set_decode); + vga_client_register(rdev->pdev, radeon_vga_set_decode,
RE: [PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new message
[Public] > -Original Message- > From: Jani Nikula > Sent: Thursday, June 8, 2023 8:26 PM > To: Lin, Wayne ; dri-de...@lists.freedesktop.org; > amd-gfx@lists.freedesktop.org > Cc: ly...@redhat.com; ville.syrj...@linux.intel.com; imre.d...@intel.com; > Wentland, Harry ; Zuo, Jerry > ; Lin, Wayne ; > sta...@vger.kernel.org > Subject: Re: [PATCH v4] drm/dp_mst: Clear MSG_RDY flag before sending new > message > > On Thu, 08 Jun 2023, Wayne Lin wrote: > > [Why] > > The sequence for collecting down_reply from source perspective should > > be: > > > > Request_n->repeat (get partial reply of Request_n->clear message ready > > flag to ack DPRX that the message is received) till all partial > > replies for Request_n are received->new Request_n+1. > > > > Now there is chance that drm_dp_mst_hpd_irq() will fire new down > > request in the tx queue when the down reply is incomplete. Source is > > restricted to generate interveleaved message transactions so we should > > avoid it. > > > > Also, while assembling partial reply packets, reading out DPCD > > DOWN_REP Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag > should > > be wrapped up as a complete operation for reading out a reply packet. > > Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might > > be risky. e.g. If the reply of the new request has overwritten the > > DPRX DOWN_REP Sideband MSG buffer before source writing one to clear > > DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply > > for the new request. Should handle the up request in the same way. > > > > [How] > > Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ > > event, driver calls drm_dp_mst_hpd_irq_send_new_request() and might > > trigger drm_dp_mst_kick_tx() only when there is no on going message > > transaction. > > > > Changes since v1: > > * Reworked on review comments received > > -> Adjust the fix to let driver explicitly kick off new down request > > when mst irq event is handled and acked > > -> Adjust the commit message > > > > Changes since v2: > > * Adjust the commit message > > * Adjust the naming of the divided 2 functions and add a new input > > parameter "ack". > > * Adjust code flow as per review comments. > > > > Changes since v3: > > * Update the function description of drm_dp_mst_hpd_irq_handle_event > > > > Signed-off-by: Wayne Lin > > Cc: sta...@vger.kernel.org > > --- > > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 ++-- > > drivers/gpu/drm/display/drm_dp_mst_topology.c | 54 > --- > > drivers/gpu/drm/i915/display/intel_dp.c | 7 +-- > > drivers/gpu/drm/nouveau/dispnv50/disp.c | 12 +++-- > > include/drm/display/drm_dp_mst_helper.h | 7 ++- > > 5 files changed, 82 insertions(+), 31 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > index d5cec03eaa8d..597c3368bcfb 100644 > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > @@ -3236,6 +3236,7 @@ static void dm_handle_mst_sideband_msg(struct > > amdgpu_dm_connector *aconnector) { > > u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 }; > > u8 dret; > > + u8 ack; > > bool new_irq_handled = false; > > int dpcd_addr; > > int dpcd_bytes_to_read; > > @@ -3265,34 +3266,36 @@ static void > dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector) > > process_count < max_process_count) { > > u8 retry; > > dret = 0; > > + ack = 0; > > > > process_count++; > > > > DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], > esi[1], esi[2]); > > /* handle HPD short pulse irq */ > > if (aconnector->mst_mgr.mst_state) > > - drm_dp_mst_hpd_irq( > > - >mst_mgr, > > - esi, > > - _irq_handled); > > + drm_dp_mst_hpd_irq_handle_event( > >mst_mgr, > > + esi, > > + , > > + _irq_handled); > > > > if (new_irq_handled) { > > /* ACK at DPCD to notify down stream */ > > - const int ack_dpcd_bytes_to_write = > > - dpcd_bytes_to_read - 1; > > - > > for (retry = 0; retry < 3; retry++) { > > - u8 wret; > > - > > - wret = drm_dp_dpcd_write( > > - >dm_dp_aux.aux, > > - dpcd_addr + 1, > > - [1], > > - ack_dpcd_bytes_to_write); > > - if (wret == ack_dpcd_bytes_to_write) > > +
Re: [PATCH] drm/amd/pm: workaround for compute workload type on some skus
On 6/9/2023 8:25 AM, Kenneth Feng wrote: On smu 13.0.0, the compute workload type cannot be set on all the skus due to some other problems. This workaround is to make sure compute workload type can also run on some specific skus. Signed-off-by: Kenneth Feng --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 26 +++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e2265f50bacc..6e8acd021ee6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2179,6 +2179,32 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && + (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) || + ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC { + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_COMPUTE_BIT, + (void *)(_monitor_external), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(_monitor_external), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } + + smu->power_profile_mode = PP_SMC_POWER_PROFILE_CUSTOM; Though custom profile is used to set compute profile as a workaround, shouldn't this be set to COMPUTE to indicate that the current profile set is COMPUTE? Thanks, Lijo + } + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD,
Re: [PATCH v3] drm/amd/pm: enable more Pstates profile levels for yellow_carp
[Public] Hi Shikai From: Guo, Shikai Sent: Thursday, June 8, 2023 6:27 PM To: amd-gfx@lists.freedesktop.org Cc: Liang, Prike ; Liu, Aaron ; Huang, Tim ; Deucher, Alexander ; Guo, Shikai Subject: [PATCH v3] drm/amd/pm: enable more Pstates profile levels for yellow_carp This patch enables following UMD stable Pstates profile levels for power_dpm_force_performance_level interface. - profile_peak - profile_min_mclk - profile_min_sclk - profile_standard Signed-off-by: shikaguo --- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 140 +- .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h | 1 - 2 files changed, 136 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index a92da336ecec..71566c60372f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -47,6 +47,14 @@ #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK 0x0006L #define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT 0x1L +#define SMU_13_0_8_UMD_PSTATE_GFXCLK 533 +#define SMU_13_0_8_UMD_PSTATE_SOCCLK 533 +#define SMU_13_0_8_UMD_PSTATE_FCLK 800 + +#define SMU_13_0_1_UMD_PSTATE_GFXCLK 700 +#define SMU_13_0_1_UMD_PSTATE_SOCCLK 678 +#define SMU_13_0_1_UMD_PSTATE_FCLK 1800 + #define FEATURE_MASK(feature) (1ULL << feature) #define SMC_DPM_FEATURE ( \ FEATURE_MASK(FEATURE_CCLK_DPM_BIT) | \ @@ -957,6 +965,9 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, uint32_t max) { enum smu_message_type msg_set_min, msg_set_max; + uint32_t min_clk = min; + uint32_t max_clk = max; + int ret = 0; if (!yellow_carp_clk_dpm_is_enabled(smu, clk_type)) @@ -985,11 +996,17 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, return -EINVAL; } - ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL); + if (clk_type == SMU_VCLK) { + min_clk = min << SMU_13_VCLK_SHIFT; + max_clk = max << SMU_13_VCLK_SHIFT; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min_clk, NULL); + if (ret) goto out; - ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max_clk, NULL); if (ret) goto out; @@ -997,12 +1014,48 @@ static int yellow_carp_set_soft_freq_limited_range(struct smu_context *smu, return ret; } +static int yellow_carp_get_umd_pstate_clk_default(struct smu_context *smu, It should return uint32_t for the default clk. With this fixed, this patch is Reviewed-by: Tim Huang + enum smu_clk_type clk_type) +{ + uint32_t clk_limit = 0; + struct amdgpu_device *adev = smu->adev; + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8)) + clk_limit = SMU_13_0_8_UMD_PSTATE_GFXCLK; + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) || + (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 3)) + clk_limit = SMU_13_0_1_UMD_PSTATE_GFXCLK; + break; + case SMU_SOCCLK: + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8)) + clk_limit = SMU_13_0_8_UMD_PSTATE_SOCCLK; + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) || + (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 3)) + clk_limit = SMU_13_0_1_UMD_PSTATE_SOCCLK; + break; + case SMU_FCLK: + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 8)) + clk_limit = SMU_13_0_8_UMD_PSTATE_FCLK; + if ((adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 1) || + (adev->ip_versions[MP1_HWIP][0]) == IP_VERSION(13, 0, 3)) + clk_limit = SMU_13_0_1_UMD_PSTATE_FCLK; + break; + default: + break; + } + + return clk_limit; +} + static int yellow_carp_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min, max; + uint32_t clk_limit = 0; smu_cmn_get_sysfs_buf(, ); @@ -1044,6 +1097,7 @@ static int
RE: [PATCH] drm/amd/pm: workaround for compute workload type on some skus
[AMD Official Use Only - General] Reviewed-by: Feifei Xu -Original Message- From: amd-gfx On Behalf Of Kenneth Feng Sent: Friday, June 9, 2023 10:56 AM To: amd-gfx@lists.freedesktop.org Cc: Quan, Evan ; Feng, Kenneth Subject: [PATCH] drm/amd/pm: workaround for compute workload type on some skus On smu 13.0.0, the compute workload type cannot be set on all the skus due to some other problems. This workaround is to make sure compute workload type can also run on some specific skus. Signed-off-by: Kenneth Feng --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 26 +++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e2265f50bacc..6e8acd021ee6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2179,6 +2179,32 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && + (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) || + ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC { + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_COMPUTE_BIT, + (void *)(_monitor_external), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(_monitor_external), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } + + smu->power_profile_mode = PP_SMC_POWER_PROFILE_CUSTOM; + } + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, -- 2.34.1