On 2026-05-20 03:13, Yifan Zhang wrote:
Previous u16 type may not suffice for GPU coredump.
Fixes: f207b038bc38 (drm/amdgpu: add ioctl to handle RAS poison error)
Signed-off-by: Yifan Zhang <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_events.c | 10 +++++-----
include/uapi/drm/amdgpu_drm.h | 7 +++----
3 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0408476f1070..6a5459b59af2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -461,7 +461,7 @@ struct amdgpu_fpriv {
* DRM_IOCTL_AMDGPU_USER_OPTIONS /
AMDGPU_USER_OPTIONS_OP_KFD_SIGBUS_DELAY).
*
* 0 - send SIGBUS immediately (default)
- * 0xFFFF - suppress SIGBUS delivery
+ * 0xFFFFFFFF - suppress SIGBUS delivery
* other - delay SIGBUS delivery by this many milliseconds
*/
atomic_t kfd_sigbus_delay_ms;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 200570401f51..8b4e0a208e86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1352,7 +1352,7 @@ void kfd_signal_reset_event(struct kfd_node *dev)
*
* Encoded value (set on any of the process' amdgpu render fds):
* 0 - default: SIGBUS immediately (no opt-in)
- * 0xFFFF - opt-in, never escalate to SIGBUS
+ * 0xFFFFFFFF - opt-in, never escalate to SIGBUS
* N (other) - opt-in, escalate to SIGBUS after N ms if app does not
* handle the error in time (safety timeout)
*
@@ -1388,16 +1388,16 @@ static void kfd_signal_sigbus_delayed_fn(struct
work_struct *work)
* Rationale: if the app has explicitly opted in on any GPU it uses, it
* wants the chance to handle the error in userspace.
*/
-static u16 kfd_get_sigbus_delay_ms(struct kfd_process *p)
+static u32 kfd_get_sigbus_delay_ms(struct kfd_process *p)
{
- u16 result = 0;
+ u32 result = 0;
int i;
mutex_lock(&p->mutex);
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
struct amdgpu_fpriv *drv_priv;
- u16 v;
+ u32 v;
if (!pdd || !pdd->drm_file)
continue;
@@ -1420,7 +1420,7 @@ static u16 kfd_get_sigbus_delay_ms(struct kfd_process *p)
static void kfd_signal_sigbus_with_delay(struct kfd_node *dev,
struct kfd_process *p)
{
- u16 delay_ms = kfd_get_sigbus_delay_ms(p);
+ u32 delay_ms = kfd_get_sigbus_delay_ms(p);
struct kfd_sigbus_delayed_work *dw;
if (delay_ms == AMDGPU_USER_OPTIONS_KFD_SIGBUS_DELAY_DISABLED) {
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index e88d7cf53858..e0a382673b90 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1680,19 +1680,18 @@ struct drm_amdgpu_info_uq_metadata {
*
* option: AMDGPU_USER_OPTIONS_OP_KFD_SIGBUS_DELAY
* 0: Disable sigbus delay - SIGBUS will be raised immediately
- * 0xFFFF: SIGBUS will not be raised
+ * 0xFFFFFFFF: SIGBUS will not be raised
* other: Set the sigbus delay in milliseconds
*/
#define AMDGPU_USER_OPTIONS_OP_KFD_SIGBUS_DELAY 0
-#define AMDGPU_USER_OPTIONS_KFD_SIGBUS_DELAY_DISABLED 0xFFFFu
+#define AMDGPU_USER_OPTIONS_KFD_SIGBUS_DELAY_DISABLED 0xFFFFFFFFu
struct drm_amdgpu_user_options {
__u32 op;
union {
This union only has one member. You can remove the anonymous union
around the struct.
Regards,
Felix
struct {
- __u16 value;
- __u16 _pad;
+ __u32 value;
} kfd_sigbus_delay;
};
};