https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/181993

This is place to put definitions for various ABI structs.
Currently device libs is just hardcoding magic numbers and casting
and it's incomprehensible.

>From c0c2ac0554eb4744ddbcc9b6747a095ba072b23e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Wed, 18 Feb 2026 10:50:48 +0100
Subject: [PATCH] clang: Add builtin header for amdhsa abi

This is place to put definitions for various ABI structs.
Currently device libs is just hardcoding magic numbers and casting
and it's incomprehensible.
---
 clang/lib/Headers/CMakeLists.txt |   1 +
 clang/lib/Headers/amdhsa_abi.h   |  80 +++++++++++++++
 clang/test/Headers/amdhsa_abi.cl | 166 +++++++++++++++++++++++++++++++
 3 files changed, 247 insertions(+)
 create mode 100644 clang/lib/Headers/amdhsa_abi.h
 create mode 100644 clang/test/Headers/amdhsa_abi.cl

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 95d20bbca79ac..bc039fc508fb1 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -294,6 +294,7 @@ set(x86_files
   )
 
 set(gpu_files
+  amdhsa_abi.h
   gpuintrin.h
   nvptxintrin.h
   amdgpuintrin.h
diff --git a/clang/lib/Headers/amdhsa_abi.h b/clang/lib/Headers/amdhsa_abi.h
new file mode 100644
index 0000000000000..7dcf5b7f8624a
--- /dev/null
+++ b/clang/lib/Headers/amdhsa_abi.h
@@ -0,0 +1,80 @@
+//===-- amdhsa_abi.h - AMDHSA ABI definition utilities 
--------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __AMDHSA_ABI_H
+#define __AMDHSA_ABI_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct __attribute__((aligned(8))) amdhsa_implicit_kernarg_v5 {
+  uint32_t block_count[3];
+  uint16_t group_size[3];
+  uint16_t remainder[3];
+  char reserved0[16];
+  uint64_t global_offset[3];
+  uint16_t grid_dims;
+  char reserved1[14];
+  __attribute__((opencl_global)) void *hostcall_buffer;
+  __attribute__((opencl_global)) void *multigrid_sync_arg;
+  __attribute__((opencl_global)) void *heap_v1;
+  __attribute__((opencl_global)) void *default_queue;
+  __attribute__((opencl_global)) void *completion_action;
+  char reserved2[72];
+  uint32_t private_base; // Unused on gfx9+
+  uint32_t shared_base;  // Unused on gfx9+
+  __attribute__((opencl_global)) void *queue_ptr;
+  char reserved3[48];
+} amdhsa_implicit_kernarg_v5;
+
+_Static_assert(sizeof(amdhsa_implicit_kernarg_v5) == 256, "wrong struct size");
+
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, block_count[0]) == 0,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, block_count[1]) == 4,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, block_count[2]) == 8,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, group_size[0]) == 12,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, group_size[1]) == 14,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, group_size[2]) == 16,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, remainder[0]) == 18,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, remainder[1]) == 20,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, remainder[2]) == 22,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, global_offset[0]) == 40,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, global_offset[1]) == 48,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, global_offset[2]) == 56,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, grid_dims) == 64,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, hostcall_buffer) == 80,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, multigrid_sync_arg) == 88,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, heap_v1) == 96,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, default_queue) == 104,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, completion_action) == 112,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, private_base) == 192,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, shared_base) == 196,
+               "wrong offset");
+_Static_assert(offsetof(amdhsa_implicit_kernarg_v5, queue_ptr) == 200,
+               "wrong offset");
+
+#endif // __AMDHSA_ABI_H
diff --git a/clang/test/Headers/amdhsa_abi.cl b/clang/test/Headers/amdhsa_abi.cl
new file mode 100644
index 0000000000000..170ae925c8c41
--- /dev/null
+++ b/clang/test/Headers/amdhsa_abi.cl
@@ -0,0 +1,166 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O1 -disable-llvm-passes 
-emit-llvm %s -o - | FileCheck %s
+
+// Test that the struct definition for amdhsa_implicit_kernarg_v5 exists and 
has
+// the correct member offsets.
+
+#include <amdhsa_abi.h>
+
+void use_u64(uint64_t x);
+void use_u32(uint32_t x);
+void use_u16(uint16_t x);
+void use_ptr(__global void* p);
+
+// CHECK-LABEL: define dso_local void @test_implicit_args_v5(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[IMPLICIT_ARGS:%.*]] = alloca ptr addrspace(4), align 8, 
addrspace(5)
+// CHECK-NEXT:    call void @llvm.lifetime.start.p5(ptr addrspace(5) 
[[IMPLICIT_ARGS]]) #[[ATTR4:[0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(4) 
@llvm.amdgcn.implicitarg.ptr()
+// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa 
[[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[BLOCK_COUNT:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5:%.*]], ptr addrspace(4) [[TMP1]], i32 0, 
i32 0
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x i32], ptr 
addrspace(4) [[BLOCK_COUNT]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], 
align 8, !tbaa [[INT_TBAA4:![0-9]+]]
+// CHECK-NEXT:    call void @use_u32(i32 noundef [[TMP2]]) #[[ATTR5:[0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[BLOCK_COUNT1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP3]], i32 0, i32 0
+// CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x i32], ptr 
addrspace(4) [[BLOCK_COUNT1]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX2]], 
align 4, !tbaa [[INT_TBAA4]]
+// CHECK-NEXT:    call void @use_u32(i32 noundef [[TMP4]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[BLOCK_COUNT3:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP5]], i32 0, i32 0
+// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x i32], ptr 
addrspace(4) [[BLOCK_COUNT3]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX4]], 
align 8, !tbaa [[INT_TBAA4]]
+// CHECK-NEXT:    call void @use_u32(i32 noundef [[TMP6]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[GROUP_SIZE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP7]], i32 0, i32 1
+// CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x i16], ptr 
addrspace(4) [[GROUP_SIZE]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX5]], 
align 4, !tbaa [[SHORT_TBAA11:![0-9]+]]
+// CHECK-NEXT:    call void @use_u16(i16 noundef zeroext [[TMP8]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[GROUP_SIZE6:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP9]], i32 0, i32 1
+// CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x i16], ptr 
addrspace(4) [[GROUP_SIZE6]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP10:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX7]], 
align 2, !tbaa [[SHORT_TBAA11]]
+// CHECK-NEXT:    call void @use_u16(i16 noundef zeroext [[TMP10]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[GROUP_SIZE8:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP11]], i32 0, i32 1
+// CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [3 x i16], ptr 
addrspace(4) [[GROUP_SIZE8]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX9]], 
align 4, !tbaa [[SHORT_TBAA11]]
+// CHECK-NEXT:    call void @use_u16(i16 noundef zeroext [[TMP12]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[REMAINDER:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP13]], i32 0, i32 2
+// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [3 x i16], ptr 
addrspace(4) [[REMAINDER]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX10]], 
align 2, !tbaa [[SHORT_TBAA11]]
+// CHECK-NEXT:    call void @use_u16(i16 noundef zeroext [[TMP14]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[REMAINDER11:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP15]], i32 0, i32 2
+// CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [3 x i16], ptr 
addrspace(4) [[REMAINDER11]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP16:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX12]], 
align 2, !tbaa [[SHORT_TBAA11]]
+// CHECK-NEXT:    call void @use_u16(i16 noundef zeroext [[TMP16]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[REMAINDER13:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP17]], i32 0, i32 2
+// CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [3 x i16], ptr 
addrspace(4) [[REMAINDER13]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP18:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX14]], 
align 2, !tbaa [[SHORT_TBAA11]]
+// CHECK-NEXT:    call void @use_u16(i16 noundef zeroext [[TMP18]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[GLOBAL_OFFSET:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP19]], i32 0, i32 4
+// CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds [3 x i64], ptr 
addrspace(4) [[GLOBAL_OFFSET]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP20:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX15]], 
align 8, !tbaa [[LONG_TBAA13:![0-9]+]]
+// CHECK-NEXT:    call void @use_u64(i64 noundef [[TMP20]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP21:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[GLOBAL_OFFSET16:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP21]], i32 0, i32 4
+// CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [3 x i64], ptr 
addrspace(4) [[GLOBAL_OFFSET16]], i64 0, i64 1
+// CHECK-NEXT:    [[TMP22:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX17]], 
align 8, !tbaa [[LONG_TBAA13]]
+// CHECK-NEXT:    call void @use_u64(i64 noundef [[TMP22]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP23:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[GLOBAL_OFFSET18:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP23]], i32 0, i32 4
+// CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds [3 x i64], ptr 
addrspace(4) [[GLOBAL_OFFSET18]], i64 0, i64 2
+// CHECK-NEXT:    [[TMP24:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX19]], 
align 8, !tbaa [[LONG_TBAA13]]
+// CHECK-NEXT:    call void @use_u64(i64 noundef [[TMP24]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP25:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[HOSTCALL_BUFFER:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP25]], i32 0, i32 7
+// CHECK-NEXT:    [[TMP26:%.*]] = load ptr addrspace(1), ptr addrspace(4) 
[[HOSTCALL_BUFFER]], align 8, !tbaa [[ANYPTR_TBAA15:![0-9]+]]
+// CHECK-NEXT:    call void @use_ptr(ptr addrspace(1) noundef [[TMP26]]) 
#[[ATTR5]]
+// CHECK-NEXT:    [[TMP27:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[MULTIGRID_SYNC_ARG:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP27]], i32 0, i32 8
+// CHECK-NEXT:    [[TMP28:%.*]] = load ptr addrspace(1), ptr addrspace(4) 
[[MULTIGRID_SYNC_ARG]], align 8, !tbaa [[ANYPTR_TBAA17:![0-9]+]]
+// CHECK-NEXT:    call void @use_ptr(ptr addrspace(1) noundef [[TMP28]]) 
#[[ATTR5]]
+// CHECK-NEXT:    [[TMP29:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[HEAP_V1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP29]], i32 0, i32 9
+// CHECK-NEXT:    [[TMP30:%.*]] = load ptr addrspace(1), ptr addrspace(4) 
[[HEAP_V1]], align 8, !tbaa [[ANYPTR_TBAA18:![0-9]+]]
+// CHECK-NEXT:    call void @use_ptr(ptr addrspace(1) noundef [[TMP30]]) 
#[[ATTR5]]
+// CHECK-NEXT:    [[TMP31:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[DEFAULT_QUEUE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP31]], i32 0, i32 10
+// CHECK-NEXT:    [[TMP32:%.*]] = load ptr addrspace(1), ptr addrspace(4) 
[[DEFAULT_QUEUE]], align 8, !tbaa [[ANYPTR_TBAA19:![0-9]+]]
+// CHECK-NEXT:    call void @use_ptr(ptr addrspace(1) noundef [[TMP32]]) 
#[[ATTR5]]
+// CHECK-NEXT:    [[TMP33:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[COMPLETION_ACTION:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP33]], i32 0, i32 11
+// CHECK-NEXT:    [[TMP34:%.*]] = load ptr addrspace(1), ptr addrspace(4) 
[[COMPLETION_ACTION]], align 8, !tbaa [[ANYPTR_TBAA20:![0-9]+]]
+// CHECK-NEXT:    call void @use_ptr(ptr addrspace(1) noundef [[TMP34]]) 
#[[ATTR5]]
+// CHECK-NEXT:    [[TMP35:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[QUEUE_PTR:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP35]], i32 0, i32 15
+// CHECK-NEXT:    [[TMP36:%.*]] = load ptr addrspace(1), ptr addrspace(4) 
[[QUEUE_PTR]], align 8, !tbaa [[ANYPTR_TBAA21:![0-9]+]]
+// CHECK-NEXT:    call void @use_ptr(ptr addrspace(1) noundef [[TMP36]]) 
#[[ATTR5]]
+// CHECK-NEXT:    [[TMP37:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[PRIVATE_BASE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP37]], i32 0, i32 13
+// CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr addrspace(4) [[PRIVATE_BASE]], 
align 8, !tbaa [[INT_TBAA22:![0-9]+]]
+// CHECK-NEXT:    call void @use_u32(i32 noundef [[TMP38]]) #[[ATTR5]]
+// CHECK-NEXT:    [[TMP39:%.*]] = load ptr addrspace(4), ptr addrspace(5) 
[[IMPLICIT_ARGS]], align 8, !tbaa [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]]
+// CHECK-NEXT:    [[SHARED_BASE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_AMDHSA_IMPLICIT_KERNARG_V5]], ptr addrspace(4) [[TMP39]], i32 0, i32 14
+// CHECK-NEXT:    [[TMP40:%.*]] = load i32, ptr addrspace(4) [[SHARED_BASE]], 
align 4, !tbaa [[INT_TBAA23:![0-9]+]]
+// CHECK-NEXT:    call void @use_u32(i32 noundef [[TMP40]]) #[[ATTR5]]
+// CHECK-NEXT:    call void @llvm.lifetime.end.p5(ptr addrspace(5) 
[[IMPLICIT_ARGS]]) #[[ATTR4]]
+// CHECK-NEXT:    ret void
+//
+void test_implicit_args_v5() {
+  __constant amdhsa_implicit_kernarg_v5 *implicit_args
+    = (__constant amdhsa_implicit_kernarg_v5 *) 
__builtin_amdgcn_implicitarg_ptr();
+
+  use_u32(implicit_args->block_count[0]);
+  use_u32(implicit_args->block_count[1]);
+  use_u32(implicit_args->block_count[2]);
+
+  use_u16(implicit_args->group_size[0]);
+  use_u16(implicit_args->group_size[1]);
+  use_u16(implicit_args->group_size[2]);
+
+  use_u16(implicit_args->remainder[0]);
+  use_u16(implicit_args->remainder[1]);
+  use_u16(implicit_args->remainder[2]);
+
+  use_u64(implicit_args->global_offset[0]);
+  use_u64(implicit_args->global_offset[1]);
+  use_u64(implicit_args->global_offset[2]);
+
+  use_ptr(implicit_args->hostcall_buffer);
+  use_ptr(implicit_args->multigrid_sync_arg);
+  use_ptr(implicit_args->heap_v1);
+  use_ptr(implicit_args->default_queue);
+  use_ptr(implicit_args->completion_action);
+  use_ptr(implicit_args->queue_ptr);
+  use_u32(implicit_args->private_base);
+  use_u32(implicit_args->shared_base);
+}
+//.
+// CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+// CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[_ZTS26AMDHSA_IMPLICIT_KERNARG_V5PTR_TBAA8]] = !{[[META9:![0-9]+]], 
[[META9]], i64 0}
+// CHECK: [[META9]] = !{!"p1 _ZTS26amdhsa_implicit_kernarg_v5", 
[[META10:![0-9]+]], i64 0}
+// CHECK: [[META10]] = !{!"any pointer", [[META6]], i64 0}
+// CHECK: [[SHORT_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
+// CHECK: [[META12]] = !{!"short", [[META6]], i64 0}
+// CHECK: [[LONG_TBAA13]] = !{[[META14:![0-9]+]], [[META14]], i64 0}
+// CHECK: [[META14]] = !{!"long", [[META6]], i64 0}
+// CHECK: [[ANYPTR_TBAA15]] = !{[[META16:![0-9]+]], [[META10]], i64 80}
+// CHECK: [[META16]] = !{!"amdhsa_implicit_kernarg_v5", [[META6]], i64 0, 
[[META6]], i64 12, [[META6]], i64 18, [[META6]], i64 24, [[META6]], i64 40, 
[[META12]], i64 64, [[META6]], i64 66, [[META10]], i64 80, [[META10]], i64 88, 
[[META10]], i64 96, [[META10]], i64 104, [[META10]], i64 112, [[META6]], i64 
120, [[META5]], i64 192, [[META5]], i64 196, [[META10]], i64 200, [[META6]], 
i64 208}
+// CHECK: [[ANYPTR_TBAA17]] = !{[[META16]], [[META10]], i64 88}
+// CHECK: [[ANYPTR_TBAA18]] = !{[[META16]], [[META10]], i64 96}
+// CHECK: [[ANYPTR_TBAA19]] = !{[[META16]], [[META10]], i64 104}
+// CHECK: [[ANYPTR_TBAA20]] = !{[[META16]], [[META10]], i64 112}
+// CHECK: [[ANYPTR_TBAA21]] = !{[[META16]], [[META10]], i64 200}
+// CHECK: [[INT_TBAA22]] = !{[[META16]], [[META5]], i64 192}
+// CHECK: [[INT_TBAA23]] = !{[[META16]], [[META5]], i64 196}
+//.

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to