[AMD Official Use Only]

Reviewed-by: Harish Kasiviswanathan <harish.kasiviswanat...@amd.com>

-----Original Message-----
From: Sider, Graham <graham.si...@amd.com> 
Sent: Thursday, July 8, 2021 1:39 PM
To: amd-gfx@lists.freedesktop.org
Cc: Kasiviswanathan, Harish <harish.kasiviswanat...@amd.com>; Sider, Graham 
<graham.si...@amd.com>
Subject: [PATCH] drm/amdkfd: Update SMI throttle event bitmask

Update Arcturus/Aldebaran thermal throttle SMI event path to use
ASIC-independent throttler bits when logging.

Signed-off-by: Graham Sider <graham.si...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h         |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c        | 10 +++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h        |  2 +-
 drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c  |  4 +++-
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c |  4 +++-
 6 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index fabc68eec36a..9c505ac0be8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -330,7 +330,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd);
 int kgd2kfd_post_reset(struct kfd_dev *kfd);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t 
throttle_bitmask);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t 
throttle_bitmask);
 #else
 static inline int kgd2kfd_init(void)
 {
@@ -389,7 +389,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
 }
 
 static inline
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
 {
 }
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9e4a05e937f0..5b06bc308782 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1370,7 +1370,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
        WARN_ONCE(count < 0, "Compute profile ref. count error");
 }
 
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
 {
        if (kfd && kfd->init_complete)
                kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 246522423559..ed4bc5f844ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -205,23 +205,23 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, 
bool post_reset)
 }
 
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
-                                            uint32_t throttle_bitmask)
+                                            uint64_t throttle_bitmask)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
        /*
         * ThermalThrottle msg = throttle_bitmask(8):
         *                       thermal_interrupt_count(16):
-        * 1 byte event + 1 byte space + 8 byte throttle_bitmask +
+        * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
         * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
-        * 1 byte \0 = 29
+        * 1 byte \0 = 37
         */
-       char fifo_in[29];
+       char fifo_in[37];
        int len;
 
        if (list_empty(&dev->smi_clients))
                return;
 
-       len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
+       len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
                       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
                       atomic64_read(&adev->smu.throttle_int_counter));
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index b9b0438202e2..bffd0c32b060 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -26,7 +26,7 @@
 int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
-                                            uint32_t throttle_bitmask);
+                                            uint64_t throttle_bitmask);
 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 6b3e0ea10163..6ec8492f71f5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -2178,7 +2178,9 @@ static void arcturus_log_thermal_throttling_event(struct 
smu_context *smu)
 
        dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, 
expect performance decrease. %s.\n",
                        log_buf);
-       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
+       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
+               smu_cmn_get_indep_throttler_status(throttler_status,
+                                                  arcturus_throttler_map));
 }
 
 static uint16_t arcturus_get_current_pcie_link_speed(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index c16ca0c78e93..e1575d974315 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1627,7 +1627,9 @@ static void aldebaran_log_thermal_throttling_event(struct 
smu_context *smu)
 
        dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, 
expect performance decrease. %s.\n",
                 log_buf);
-       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
+       kgd2kfd_smi_event_throttle(smu->adev->kfd.dev,
+               smu_cmn_get_indep_throttler_status(throttler_status,
+                                                  aldebaran_throttler_map));
 }
 
 static int aldebaran_get_current_pcie_link_speed(struct smu_context *smu)
-- 
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to