https://github.com/macurtis-amd updated https://github.com/llvm/llvm-project/pull/203352
>From 953485cedcc86698e00119674783d81470291d98 Mon Sep 17 00:00:00 2001 From: Matthew Curtis <[email protected]> Date: Thu, 11 Jun 2026 12:53:12 -0500 Subject: [PATCH] [Clang][AMDGPU] Add __builtin_amdgcn_s_buffer_load_<T> Expose llvm.amdgcn.s.buffer.load intrinsic overloads for OpenCL via __builtin_amdgcn_s_buffer_load_* builtins. Function prototypes: <return-type> __builtin_amdgcn_s_buffer_load_<suffix>( v4i32 rsrc, // 4-dword buffer resource in SGPRs int offset, // byte offset from the base of the buffer int aux); // cache-policy, control flags. Must be compile-time const. Where: return-type suffix ---------------- ------ char i8 unsigned char u8 short i16 unsigned short u16 int i32 half f16 float f32 v2i8 v2i8 v3i8 v3i8 v4i8 v4i8 v2i32 v2i32 v3i32 v3i32 v4i32 v4i32 v8i32 v8i32 v16i32 v16i32 v2f16 v2f16 v3f16 v3f16 v4f16 v4f16 v2f32 v2f32 v3f32 v3f32 v4f32 v4f32 v8f32 v8f32 v16f32 v16f32 --- clang/docs/AMDGPUSupport.rst | 7 + clang/include/clang/Basic/BuiltinsAMDGPU.td | 30 +++ .../include/clang/Basic/BuiltinsAMDGPUDocs.td | 39 +++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 33 +++ .../builtins-amdgcn-s-buffer-load.cl | 238 ++++++++++++++++++ .../builtins-amdgcn-s-buffer-load-error.cl | 125 +++++++++ 6 files changed, 472 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-s-buffer-load.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-s-buffer-load-error.cl diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst index 18e3de8abe92a..8ca537fa5d729 100644 --- a/clang/docs/AMDGPUSupport.rst +++ b/clang/docs/AMDGPUSupport.rst @@ -57,3 +57,10 @@ Predefined Macros - Defined if FP64 instruction is available (deprecated). Please note that the specific architecture and feature names will vary depending on the GPU. Also, some macros are deprecated and may be removed in future releases. + + +Target-Specific Builtins +======================== + +Clang exposes AMDGPU hardware intrinsics as target-specific builtins with the +``__builtin_amdgcn_`` prefix. These are documented in :doc:`AMDGPUBuiltinReference`. diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 8eed188b0f4b2..b24a2403ecec2 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -242,6 +242,10 @@ def __builtin_amdgcn_qsad_pk_u16_u8 : AMDGPUBuiltin<"uint64_t(uint64_t, unsigned def __builtin_amdgcn_mqsad_pk_u16_u8 : AMDGPUBuiltin<"uint64_t(uint64_t, unsigned int, uint64_t)", [Const], "mqsad-pk-insts">; def __builtin_amdgcn_mqsad_u32_u8 : AMDGPUBuiltin<"_ExtVector<4, unsigned int>(uint64_t, unsigned int, _ExtVector<4, unsigned int>)", [Const], "mqsad-insts">; +//===----------------------------------------------------------------------===// +// Buffer builtins. +//===----------------------------------------------------------------------===// + def __builtin_amdgcn_make_buffer_rsrc : AMDGPUBuiltin<"__amdgpu_buffer_rsrc_t(void *, short, int64_t, int)", [Const]>; def __builtin_amdgcn_raw_buffer_store_b8 : AMDGPUBuiltin<"void(unsigned char, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">; def __builtin_amdgcn_raw_buffer_store_b16 : AMDGPUBuiltin<"void(unsigned short, __amdgpu_buffer_rsrc_t, int, int, _Constant int)">; @@ -280,6 +284,32 @@ def __builtin_amdgcn_raw_ptr_buffer_load_async_lds : AMDGPUBuiltin<"void(__amdgp def __builtin_amdgcn_struct_ptr_buffer_load_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">; def __builtin_amdgcn_struct_ptr_buffer_load_async_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">; +let Documentation = [DocSBufferLoad], ArgNames = ["rsrc", "offset", "aux"] in { +def __builtin_amdgcn_s_buffer_load_i32 : AMDGPUBuiltin<"int(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v3i32 : AMDGPUBuiltin<"_Vector<3, int>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v4i32 : AMDGPUBuiltin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v8i32 : AMDGPUBuiltin<"_Vector<8, int>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v16i32 : AMDGPUBuiltin<"_Vector<16, int>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_f32 : AMDGPUBuiltin<"float(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v2f32 : AMDGPUBuiltin<"_Vector<2, float>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v3f32 : AMDGPUBuiltin<"_Vector<3, float>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v4f32 : AMDGPUBuiltin<"_Vector<4, float>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v8f32 : AMDGPUBuiltin<"_Vector<8, float>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v16f32 : AMDGPUBuiltin<"_Vector<16, float>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_i8 : AMDGPUBuiltin<"char(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_u8 : AMDGPUBuiltin<"unsigned char(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_i16 : AMDGPUBuiltin<"short(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_u16 : AMDGPUBuiltin<"unsigned short(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v2i8 : AMDGPUBuiltin<"_Vector<2, char>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v3i8 : AMDGPUBuiltin<"_Vector<3, char>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v4i8 : AMDGPUBuiltin<"_Vector<4, char>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_f16 : AMDGPUBuiltin<"_Float16(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v2f16 : AMDGPUBuiltin<"_Vector<2, _Float16>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v3f16 : AMDGPUBuiltin<"_Vector<3, _Float16>(_Vector<4, int>, int, _Constant int)", [Const]>; +def __builtin_amdgcn_s_buffer_load_v4f16 : AMDGPUBuiltin<"_Vector<4, _Float16>(_Vector<4, int>, int, _Constant int)", [Const]>; +} + //===----------------------------------------------------------------------===// // Global Available/Visible memory accesses. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td b/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td index 293431c5de7e8..d6bb56e08aa42 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td @@ -676,3 +676,42 @@ def DocTensorStoreFromLDS_GFX1250 : Documentation { Asynchronously copies a tensor from LDS into global memory. }]; } + +//===----------------------------------------------------------------------===// +// S-Buffer Load Builtins +//===----------------------------------------------------------------------===// + +def DocCatSBufferLoad : DocumentationCategory<"S-Buffer Load Builtins"> { + let Content = [{ +These builtins lower to ``llvm.amdgcn.s.buffer.load`` and issue an +``s_buffer_load`` when the byte offset is uniform across the wavefront. +When the offset is divergent, the backend may lower to a ``buffer_load``. + +Unlike the ``__builtin_amdgcn_raw_buffer_load_*`` family, these builtins +take the buffer resource as a 4-dword SGPR descriptor (``v4i32``) rather +than ``__amdgpu_buffer_rsrc_t``. + +The return type selects the load width. Separate builtins are provided for +each supported scalar and vector element type. +}]; +} + +def DocSBufferLoad : Documentation { + let Category = DocCatSBufferLoad; + let Content = [{ +Loads data from a buffer using an SGPR buffer descriptor. + +- ``rsrc``: 4-dword buffer resource in SGPRs. +- ``offset``: byte offset from the base of the buffer. May be variable; a + uniform offset enables ``s_buffer_load`` selection. +- ``aux``: cache-policy and control flags. Must be a compile-time constant. + The encoding is target-dependent. Common fields include: + + - Pre-GFX12: bit 0 = glc, bit 1 = slc, bit 2 = dlc (GFX10/GFX11), bit 3 = + swz, bit 4 = scc (GFX90a). + - GFX942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1. + - GFX12+: bits [0-2] = th, bits [3-4] = scope, bit 6 = swz. + + The volatile bit is not permitted for this intrinsic. +}]; +} diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 21f32b12c4fd1..b3f5322a93cb6 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -31,6 +31,15 @@ using namespace llvm; namespace { +static Value *emitAMDGPUSBufferLoadBuiltin(CodeGenFunction &CGF, + const CallExpr *E) { + llvm::Type *RetTy = CGF.ConvertType(E->getType()); + Function *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_s_buffer_load, RetTy); + return CGF.Builder.CreateCall(F, {CGF.EmitScalarExpr(E->getArg(0)), + CGF.EmitScalarExpr(E->getArg(1)), + CGF.EmitScalarExpr(E->getArg(2))}); +} + // Has second type mangled argument. static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, @@ -2156,6 +2165,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: return emitBuiltinWithOneOverloadedType<5>( *this, E, Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax); + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_i32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v2i32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v3i32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v4i32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v8i32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v16i32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_f32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v2f32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v3f32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v4f32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v8f32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v16f32: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_i8: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_u8: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_i16: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_u16: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v2i8: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v3i8: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v4i8: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_f16: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v2f16: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v3f16: + case AMDGPU::BI__builtin_amdgcn_s_buffer_load_v4f16: + return emitAMDGPUSBufferLoadBuiltin(*this, E); case AMDGPU::BI__builtin_amdgcn_s_prefetch_data: return emitBuiltinWithOneOverloadedType<2>( *this, E, Intrinsic::amdgcn_s_prefetch_data); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-s-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-s-buffer-load.cl new file mode 100644 index 0000000000000..0412c2cf4cc38 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-s-buffer-load.cl @@ -0,0 +1,238 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int v4i32 __attribute__((ext_vector_type(4))); +typedef int v2i32 __attribute__((ext_vector_type(2))); +typedef int v3i32 __attribute__((ext_vector_type(3))); +typedef int v8i32 __attribute__((ext_vector_type(8))); +typedef int v16i32 __attribute__((ext_vector_type(16))); +typedef float v2f32 __attribute__((ext_vector_type(2))); +typedef float v3f32 __attribute__((ext_vector_type(3))); +typedef float v4f32 __attribute__((ext_vector_type(4))); +typedef float v8f32 __attribute__((ext_vector_type(8))); +typedef float v16f32 __attribute__((ext_vector_type(16))); +typedef char v2i8 __attribute__((ext_vector_type(2))); +typedef char v3i8 __attribute__((ext_vector_type(3))); +typedef char v4i8 __attribute__((ext_vector_type(4))); +typedef half v2f16 __attribute__((ext_vector_type(2))); +typedef half v3f16 __attribute__((ext_vector_type(3))); +typedef half v4f16 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_s_buffer_load_i32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_i32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_i32_non_const_offset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_s_buffer_load_i32_non_const_offset(v4i32 rsrc, int offset) { + return __builtin_amdgcn_s_buffer_load_i32(rsrc, offset, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v2i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <2 x i32> [[TMP0]] +// +v2i32 test_amdgcn_s_buffer_load_v2i32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v2i32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v3i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <3 x i32> [[TMP0]] +// +v3i32 test_amdgcn_s_buffer_load_v3i32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v3i32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v4i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i32 test_amdgcn_s_buffer_load_v4i32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v4i32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v8i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <8 x i32> [[TMP0]] +// +v8i32 test_amdgcn_s_buffer_load_v8i32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v8i32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v16i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <16 x i32> [[TMP0]] +// +v16i32 test_amdgcn_s_buffer_load_v16i32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v16i32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret float [[TMP0]] +// +float test_amdgcn_s_buffer_load_f32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_f32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v2f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <2 x float> [[TMP0]] +// +v2f32 test_amdgcn_s_buffer_load_v2f32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v2f32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v3f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <3 x float> [[TMP0]] +// +v3f32 test_amdgcn_s_buffer_load_v3f32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v3f32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v4f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +v4f32 test_amdgcn_s_buffer_load_v4f32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v4f32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v8f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <8 x float> [[TMP0]] +// +v8f32 test_amdgcn_s_buffer_load_v8f32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v8f32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v16f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <16 x float> [[TMP0]] +// +v16f32 test_amdgcn_s_buffer_load_v16f32(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v16f32(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i8 [[TMP0]] +// +char test_amdgcn_s_buffer_load_i8(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_i8(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i8 [[TMP0]] +// +unsigned char test_amdgcn_s_buffer_load_u8(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_u8(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i16 [[TMP0]] +// +short test_amdgcn_s_buffer_load_i16(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_i16(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret i16 [[TMP0]] +// +unsigned short test_amdgcn_s_buffer_load_u16(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_u16(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret half [[TMP0]] +// +half test_amdgcn_s_buffer_load_f16(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_f16(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v2f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <2 x half> [[TMP0]] +// +v2f16 test_amdgcn_s_buffer_load_v2f16(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v2f16(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v3f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <3 x half> [[TMP0]] +// +v3f16 test_amdgcn_s_buffer_load_v3f16(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v3f16(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v4f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <4 x half> [[TMP0]] +// +v4f16 test_amdgcn_s_buffer_load_v4f16(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v4f16(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v2i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <2 x i8> [[TMP0]] +// +v2i8 test_amdgcn_s_buffer_load_v2i8(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v2i8(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v3i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <3 x i8> [[TMP0]] +// +v3i8 test_amdgcn_s_buffer_load_v3i8(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v3i8(rsrc, /*offset=*/0, /*aux=*/0); +} + +// CHECK-LABEL: @test_amdgcn_s_buffer_load_v4i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> [[RSRC:%.*]], i32 0, i32 0) +// CHECK-NEXT: ret <4 x i8> [[TMP0]] +// +v4i8 test_amdgcn_s_buffer_load_v4i8(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_v4i8(rsrc, /*offset=*/0, /*aux=*/0); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-s-buffer-load-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-s-buffer-load-error.cl new file mode 100644 index 0000000000000..0ef220872b449 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-s-buffer-load-error.cl @@ -0,0 +1,125 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int v4i32 __attribute__((ext_vector_type(4))); +typedef int v2i32 __attribute__((ext_vector_type(2))); +typedef int v3i32 __attribute__((ext_vector_type(3))); +typedef int v8i32 __attribute__((ext_vector_type(8))); +typedef int v16i32 __attribute__((ext_vector_type(16))); +typedef float v2f32 __attribute__((ext_vector_type(2))); +typedef float v3f32 __attribute__((ext_vector_type(3))); +typedef float v4f32 __attribute__((ext_vector_type(4))); +typedef float v8f32 __attribute__((ext_vector_type(8))); +typedef float v16f32 __attribute__((ext_vector_type(16))); +typedef char v2i8 __attribute__((ext_vector_type(2))); +typedef char v3i8 __attribute__((ext_vector_type(3))); +typedef char v4i8 __attribute__((ext_vector_type(4))); +typedef half v2f16 __attribute__((ext_vector_type(2))); +typedef half v3f16 __attribute__((ext_vector_type(3))); +typedef half v4f16 __attribute__((ext_vector_type(4))); + +int test_amdgcn_s_buffer_load_i32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_i32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_i32' must be a constant integer}} +} + +v2i32 test_amdgcn_s_buffer_load_v2i32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v2i32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v2i32' must be a constant integer}} +} + +v3i32 test_amdgcn_s_buffer_load_v3i32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v3i32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v3i32' must be a constant integer}} +} + +v4i32 test_amdgcn_s_buffer_load_v4i32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v4i32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v4i32' must be a constant integer}} +} + +v8i32 test_amdgcn_s_buffer_load_v8i32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v8i32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v8i32' must be a constant integer}} +} + +v16i32 test_amdgcn_s_buffer_load_v16i32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v16i32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v16i32' must be a constant integer}} +} + +float test_amdgcn_s_buffer_load_f32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_f32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_f32' must be a constant integer}} +} + +v2f32 test_amdgcn_s_buffer_load_v2f32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v2f32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v2f32' must be a constant integer}} +} + +v3f32 test_amdgcn_s_buffer_load_v3f32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v3f32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v3f32' must be a constant integer}} +} + +v4f32 test_amdgcn_s_buffer_load_v4f32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v4f32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v4f32' must be a constant integer}} +} + +v8f32 test_amdgcn_s_buffer_load_v8f32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v8f32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v8f32' must be a constant integer}} +} + +v16f32 test_amdgcn_s_buffer_load_v16f32_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v16f32(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v16f32' must be a constant integer}} +} + +char test_amdgcn_s_buffer_load_i8_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_i8(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_i8' must be a constant integer}} +} + +unsigned char test_amdgcn_s_buffer_load_u8_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_u8(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_u8' must be a constant integer}} +} + +short test_amdgcn_s_buffer_load_i16_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_i16(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_i16' must be a constant integer}} +} + +unsigned short test_amdgcn_s_buffer_load_u16_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_u16(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_u16' must be a constant integer}} +} + +v2i8 test_amdgcn_s_buffer_load_v2i8_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v2i8(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v2i8' must be a constant integer}} +} + +v3i8 test_amdgcn_s_buffer_load_v3i8_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v3i8(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v3i8' must be a constant integer}} +} + +v4i8 test_amdgcn_s_buffer_load_v4i8_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v4i8(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v4i8' must be a constant integer}} +} + +half test_amdgcn_s_buffer_load_f16_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_f16(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_f16' must be a constant integer}} +} + +v2f16 test_amdgcn_s_buffer_load_v2f16_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v2f16(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v2f16' must be a constant integer}} +} + +v3f16 test_amdgcn_s_buffer_load_v3f16_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v3f16(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v3f16' must be a constant integer}} +} + +v4f16 test_amdgcn_s_buffer_load_v4f16_non_const_aux(v4i32 rsrc, int offset, int aux) { + return __builtin_amdgcn_s_buffer_load_v4f16(rsrc, offset, aux); //expected-error{{argument to '__builtin_amdgcn_s_buffer_load_v4f16' must be a constant integer}} +} + +int test_amdgcn_s_buffer_load_i32_too_few_args(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_i32(rsrc, 0); //expected-error{{too few arguments to function call, expected 3, have 2}} +} + +int test_amdgcn_s_buffer_load_i32_too_many_args(v4i32 rsrc) { + return __builtin_amdgcn_s_buffer_load_i32(rsrc, 0, 0, 0); //expected-error{{too many arguments to function call, expected 3, have 4}} +} + +int test_amdgcn_s_buffer_load_i32_wrong_rsrc_type(__amdgpu_buffer_rsrc_t rsrc) { + return __builtin_amdgcn_s_buffer_load_i32(rsrc, 0, 0); //expected-error{{passing '__private __amdgpu_buffer_rsrc_t' to parameter of incompatible type '__attribute__((__vector_size__(4 * sizeof(int)))) int' (vector of 4 'int' values)}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
