https://github.com/ssahasra updated https://github.com/llvm/llvm-project/pull/199176
>From 9315208f004466ac908acdd2830856706492b9a7 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe <[email protected]> Date: Thu, 12 Mar 2026 12:30:16 +0530 Subject: [PATCH 1/4] [AMDGPU][Clang] add __builtin_amdgcn_av_(load|store)_b128 These builtins allow the program to request store-available and load-visible accesses as described in #191246. Each of them takes a __MEMORY_SCOPE_* operand that is then translated to target-specific cache policy bits. This patch was extracted from #172090. Co-authored-by: macurtis-amd <[email protected]> Assisted-by: Claude Opus 4.6z --- clang/docs/LanguageExtensions.rst | 28 ++ clang/include/clang/Basic/BuiltinsAMDGPU.td | 7 + clang/include/clang/Sema/SemaAMDGPU.h | 1 + clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 16 ++ clang/lib/Sema/SemaAMDGPU.cpp | 12 + .../builtins-amdgcn-global-load-store.cl | 250 ++++++++++++++++++ ...builtins-amdgcn-global-load-store-error.cl | 22 ++ ...s-amdgcn-global-load-store-target-error.cl | 26 ++ 8 files changed, 362 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index fbb9947f39d3e..9fa39c09b1f6c 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -5261,6 +5261,8 @@ builtin function, and are named with a ``__opencl_`` prefix. The macros and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.) +.. _langext-__scoped_atomic: + __scoped_atomic builtins ------------------------ @@ -5756,6 +5758,32 @@ returns the bit at the position of the current lane. It is almost equivalent to ``(mask & (1 << lane_id)) != 0``, except that its behavior is only defined if the given mask has the same value for all active lanes of the current wave. + +__builtin_amdgcn_av_{load,store}_b128 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Signature: + +.. code-block:: c + + typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u; + + v4u __builtin_amdgcn_av_load_b128(v4u *src, int scope); + + void __builtin_amdgcn_av_store_b128(v4u *dst, v4u data, int scope); + +Load or store a vector of 4 unsigned integers from or to memory with cache +behavior specified by ``scope``, which is one of the ``__MEMORY_SCOPE_*`` macros +defined for :ref:`scoped atomic builtins<langext-__c11_atomic>`. + +The pointer argument must point to the global or generic address space. + +These builtins are supported on gfx9, gfx10, gfx11, and gfx12 targets. + +They map to the LLVM intrinsics ``llvm.amdgcn.av.load.b128`` and +``llvm.amdgcn.av.store.b128`` documented in `User Guide for AMDGPU Backend +<https://llvm.org/docs/AMDGPUUsage.html>`_. + ARM/AArch64 Language Extensions ------------------------------- diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index d8020bdcc8458..afcafe4defdbe 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -280,6 +280,13 @@ def __builtin_amdgcn_raw_ptr_buffer_load_async_lds : AMDGPUBuiltin<"void(__amdgp def __builtin_amdgcn_struct_ptr_buffer_load_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">; def __builtin_amdgcn_struct_ptr_buffer_load_async_lds : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, void address_space<3> *, _Constant unsigned int, int, int, int, _Constant int, _Constant int)", [], "vmem-to-lds-load-insts">; +//===----------------------------------------------------------------------===// +// Global Available/Visible memory accesses. +//===----------------------------------------------------------------------===// + +def __builtin_amdgcn_av_load_b128: AMDGPUBuiltin<"_ExtVector<4, unsigned int>(_ExtVector<4, unsigned int> *, int)", [], "gfx9-insts">; +def __builtin_amdgcn_av_store_b128: AMDGPUBuiltin<"void(_ExtVector<4, unsigned int> *, _ExtVector<4, unsigned int>, int)", [], "gfx9-insts">; + //===----------------------------------------------------------------------===// // Async mark builtins. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h index d520f3df544f4..a6205534e0de3 100644 --- a/clang/include/clang/Sema/SemaAMDGPU.h +++ b/clang/include/clang/Sema/SemaAMDGPU.h @@ -38,6 +38,7 @@ class SemaAMDGPU : public SemaBase { bool checkAtomicOrderingCABIArg(Expr *E, bool MayLoad, bool MayStore); bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore); + bool checkAVLoadStore(CallExpr *TheCall, bool IsStore); bool checkAtomicMonitorLoad(CallExpr *TheCall); bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index a88dbb71b3ddf..21f32b12c4fd1 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -1009,6 +1009,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()}); return Builder.CreateCall(F, {Args}); } + case AMDGPU::BI__builtin_amdgcn_av_load_b128: + case AMDGPU::BI__builtin_amdgcn_av_store_b128: { + const bool IsStore = BuiltinID == AMDGPU::BI__builtin_amdgcn_av_store_b128; + SmallVector<Value *, 5> Args = {EmitScalarExpr(E->getArg(0))}; // addr + if (IsStore) + Args.push_back(EmitScalarExpr(E->getArg(1))); // data + const unsigned ScopeIdx = E->getNumArgs() - 1; + auto *ScopeExpr = + cast<llvm::ConstantInt>(EmitScalarExpr(E->getArg(ScopeIdx))); + Args.push_back(emitScopeMD(*this, ScopeExpr->getZExtValue())); + llvm::Function *F = + CGM.getIntrinsic(IsStore ? Intrinsic::amdgcn_av_store_b128 + : Intrinsic::amdgcn_av_load_b128, + {Args[0]->getType()}); + return Builder.CreateCall(F, Args); + } case AMDGPU::BI__builtin_amdgcn_get_fpenv: { Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv, {llvm::Type::getInt64Ty(getLLVMContext())}); diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 60f74fd15226f..757cdfbf20819 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -149,6 +149,10 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6: case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6: return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 15); + case AMDGPU::BI__builtin_amdgcn_av_load_b128: + return checkAVLoadStore(TheCall, /*IsStore=*/false); + case AMDGPU::BI__builtin_amdgcn_av_store_b128: + return checkAVLoadStore(TheCall, /*IsStore=*/true); case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: @@ -482,6 +486,14 @@ static bool checkScopeAsInt(SemaAMDGPU &S, Expr *Scope) { return false; } +bool SemaAMDGPU::checkAVLoadStore(CallExpr *TheCall, bool IsStore) { + if (checkGlobalOrFlatPointerArg(*this, TheCall)) + return true; + + Expr *Scope = TheCall->getArg(TheCall->getNumArgs() - 1); + return checkScopeAsInt(*this, Scope); +} + bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) { bool Fail = checkGlobalOrFlatPointerArg(*this, TheCall); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl new file mode 100644 index 0000000000000..63d7fcac16874 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-store.cl @@ -0,0 +1,250 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals smart +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s + +typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32; +typedef v4u32 __global *global_ptr_to_v4u32; + +//------------------------------------------------------------------------------ +// Global Load +//------------------------------------------------------------------------------ + +// CHECK-LABEL: @test_amdgcn_global_load_b128_wave( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) [[PTR:%.*]], metadata [[META7:![0-9]+]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_global_load_b128_wave(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_WVFRNT); +} + +// CHECK-LABEL: @test_amdgcn_global_load_b128_workgroup( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) [[PTR:%.*]], metadata [[META8:![0-9]+]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_global_load_b128_workgroup(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_WRKGRP); +} + +// CHECK-LABEL: @test_amdgcn_global_load_b128_device( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) [[PTR:%.*]], metadata [[META9:![0-9]+]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_global_load_b128_device(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_DEVICE); +} + +// CHECK-LABEL: @test_amdgcn_global_load_b128_system( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) [[PTR:%.*]], metadata [[META10:![0-9]+]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_global_load_b128_system(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: @test_amdgcn_global_load_b128_single( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) [[PTR:%.*]], metadata [[META11:![0-9]+]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_global_load_b128_single(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_SINGLE); +} + +// CHECK-LABEL: @test_amdgcn_global_load_b128_cluster( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p1(ptr addrspace(1) [[PTR:%.*]], metadata [[META12:![0-9]+]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_global_load_b128_cluster(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_CLUSTR); +} + +//------------------------------------------------------------------------------ +// Global Store +//------------------------------------------------------------------------------ + +// CHECK-LABEL: @test_amdgcn_global_store_b128_wave( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p1(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META7]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_global_store_b128_wave(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_WVFRNT); +} + +// CHECK-LABEL: @test_amdgcn_global_store_b128_workgroup( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p1(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META8]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_global_store_b128_workgroup(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_WRKGRP); +} + +// CHECK-LABEL: @test_amdgcn_global_store_b128_device( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p1(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META9]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_global_store_b128_device(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_DEVICE); +} + +// CHECK-LABEL: @test_amdgcn_global_store_b128_system( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p1(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META10]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_global_store_b128_system(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: @test_amdgcn_global_store_b128_single( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p1(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META11]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_global_store_b128_single(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_SINGLE); +} + +// CHECK-LABEL: @test_amdgcn_global_store_b128_cluster( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p1(ptr addrspace(1) [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META12]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_global_store_b128_cluster(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_CLUSTR); +} + +//------------------------------------------------------------------------------ +// Flat Load +//------------------------------------------------------------------------------ + +// CHECK-LABEL: @test_amdgcn_flat_load_b128_wave( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p0(ptr [[PTR:%.*]], metadata [[META7]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_flat_load_b128_wave(v4u32 * ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_WVFRNT); +} + +// CHECK-LABEL: @test_amdgcn_flat_load_b128_workgroup( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p0(ptr [[PTR:%.*]], metadata [[META8]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_flat_load_b128_workgroup(v4u32 * ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_WRKGRP); +} + +// CHECK-LABEL: @test_amdgcn_flat_load_b128_device( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p0(ptr [[PTR:%.*]], metadata [[META9]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_flat_load_b128_device(v4u32 * ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_DEVICE); +} + +// CHECK-LABEL: @test_amdgcn_flat_load_b128_system( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p0(ptr [[PTR:%.*]], metadata [[META10]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_flat_load_b128_system(v4u32 * ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: @test_amdgcn_flat_load_b128_single( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p0(ptr [[PTR:%.*]], metadata [[META11]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_flat_load_b128_single(v4u32 * ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_SINGLE); +} + +// CHECK-LABEL: @test_amdgcn_flat_load_b128_cluster( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.av.load.b128.p0(ptr [[PTR:%.*]], metadata [[META12]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4u32 test_amdgcn_flat_load_b128_cluster(v4u32 * ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_CLUSTR); +} + +//------------------------------------------------------------------------------ +// Flat Store +//------------------------------------------------------------------------------ + +// CHECK-LABEL: @test_amdgcn_flat_store_b128_wave( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p0(ptr [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META7]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_flat_store_b128_wave(v4u32 * ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_WVFRNT); +} + +// CHECK-LABEL: @test_amdgcn_flat_store_b128_workgroup( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p0(ptr [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META8]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_flat_store_b128_workgroup(v4u32 * ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_WRKGRP); +} + +// CHECK-LABEL: @test_amdgcn_flat_store_b128_device( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p0(ptr [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META9]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_flat_store_b128_device(v4u32 * ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_DEVICE); +} + +// CHECK-LABEL: @test_amdgcn_flat_store_b128_system( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p0(ptr [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META10]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_flat_store_b128_system(v4u32 * ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_SYSTEM); +} + +// CHECK-LABEL: @test_amdgcn_flat_store_b128_single( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p0(ptr [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META11]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_flat_store_b128_single(v4u32 * ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_SINGLE); +} + +// CHECK-LABEL: @test_amdgcn_flat_store_b128_cluster( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.av.store.b128.p0(ptr [[PTR:%.*]], <4 x i32> [[DATA:%.*]], metadata [[META12]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_flat_store_b128_cluster(v4u32 * ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_CLUSTR); +} +//. +// CHECK: [[META7]] = !{!"wavefront"} +// CHECK: [[META8]] = !{!"workgroup"} +// CHECK: [[META9]] = !{!"agent"} +// CHECK: [[META10]] = !{!""} +// CHECK: [[META11]] = !{!"singlethread"} +// CHECK: [[META12]] = !{!"cluster"} +//. diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl new file mode 100644 index 0000000000000..b2f7b46547632 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-error.cl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32; +typedef v4u32 __global *global_ptr_to_v4u32; +typedef v4u32 __private *private_ptr_to_v4u32; + +void test_amdgcn_av_store_b128_bad_ptr(private_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, __MEMORY_SCOPE_SYSTEM); //expected-error{{builtin requires a global or generic pointer}} +} + +void test_amdgcn_av_store_b128_bad_scope(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, 42); //expected-error{{synchronization scope argument to atomic operation is invalid}} +} + +v4u32 test_amdgcn_av_load_b128_bad_ptr(private_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, __MEMORY_SCOPE_SYSTEM); //expected-error{{builtin requires a global or generic pointer}} +} + +v4u32 test_amdgcn_av_load_b128_bad_scope(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, 42); //expected-error{{synchronization scope argument to atomic operation is invalid}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl new file mode 100644 index 0000000000000..e85b120661cfd --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl @@ -0,0 +1,26 @@ +// We test loads and stores separately because clang only seems to exit after +// the first 'target feature' error. + +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_LOAD -S -verify -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_LOAD -S -verify -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_LOAD -S -verify -o - %s + +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_STORE -S -verify -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_STORE -S -verify -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_STORE -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32; +typedef v4u32 __global *global_ptr_to_v4u32; + +#ifdef TEST_LOAD +v4u32 test_amdgcn_av_load_b128_target(global_ptr_to_v4u32 ptr) { + return __builtin_amdgcn_av_load_b128(ptr, 0); // expected-error{{'__builtin_amdgcn_av_load_b128' needs target feature gfx9-insts}} +} +#endif + +#ifdef TEST_STORE +void test_amdgcn_av_store_b128_target(global_ptr_to_v4u32 ptr, v4u32 data) { + __builtin_amdgcn_av_store_b128(ptr, data, 0); // expected-error{{'__builtin_amdgcn_av_store_b128' needs target feature gfx9-insts}} +} +#endif >From 43d1fb1d47b39fd914a6c7f0ac329193ea80fcee Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe <[email protected]> Date: Tue, 2 Jun 2026 16:57:20 +0530 Subject: [PATCH 2/4] add docs for the builtins; split the target test; add a host/device test --- clang/include/clang/Basic/BuiltinsAMDGPU.td | 10 +++++-- .../include/clang/Basic/BuiltinsAMDGPUDocs.td | 29 +++++++++++++++++++ clang/test/SemaHIP/amdgpu-av-load-store.hip | 20 +++++++++++++ ...s-amdgcn-global-load-store-target-error.cl | 26 ++++++++--------- 4 files changed, 69 insertions(+), 16 deletions(-) create mode 100644 clang/test/SemaHIP/amdgpu-av-load-store.hip diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index afcafe4defdbe..5236de8671ebb 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -284,8 +284,14 @@ def __builtin_amdgcn_struct_ptr_buffer_load_async_lds : AMDGPUBuiltin<"void(__am // Global Available/Visible memory accesses. //===----------------------------------------------------------------------===// -def __builtin_amdgcn_av_load_b128: AMDGPUBuiltin<"_ExtVector<4, unsigned int>(_ExtVector<4, unsigned int> *, int)", [], "gfx9-insts">; -def __builtin_amdgcn_av_store_b128: AMDGPUBuiltin<"void(_ExtVector<4, unsigned int> *, _ExtVector<4, unsigned int>, int)", [], "gfx9-insts">; +def __builtin_amdgcn_av_load_b128 + : AMDGPUBuiltin<"_ExtVector<4, unsigned int>(_ExtVector<4, unsigned int> *, int)", [], "gfx9-insts"> { + let Documentation = [DocAVLoadB128]; +} +def __builtin_amdgcn_av_store_b128 + : AMDGPUBuiltin<"void(_ExtVector<4, unsigned int> *, _ExtVector<4, unsigned int>, int)", [], "gfx9-insts"> { + let Documentation = [DocAVStoreB128]; +} //===----------------------------------------------------------------------===// // Async mark builtins. diff --git a/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td b/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td index a92b85d75d902..293431c5de7e8 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td @@ -573,6 +573,35 @@ WMMA with per-operand scale factors applied during the computation. }]; } +//===----------------------------------------------------------------------===// +// Global Available/Visible Memory Access Builtins +//===----------------------------------------------------------------------===// + +def DocCatAVLoadStore : DocumentationCategory<"Available/Visible Memory Access Builtins"> { + let Content = [{ +These builtins perform 128-bit global or flat memory loads and stores with +available/visible (AV) semantics. +}]; +} + +def DocAVLoadB128 : Documentation { + let Category = DocCatAVLoadStore; + let Content = [{ +Loads 128 bits (4 x i32) from the pointer ``ptr``. The pointer must be in +the global or generic address space. The ``scope`` argument specifies the +synchronization scope using a ``__MEMORY_SCOPE_*`` constant. +}]; +} + +def DocAVStoreB128 : Documentation { + let Category = DocCatAVLoadStore; + let Content = [{ +Stores 128 bits (4 x i32) of ``data`` to the pointer ``ptr``. The pointer +must be in the global or generic address space. The ``scope`` argument +specifies the synchronization scope using a ``__MEMORY_SCOPE_*`` constant. +}]; +} + //===----------------------------------------------------------------------===// // Wave Data Exchange Builtins //===----------------------------------------------------------------------===// diff --git a/clang/test/SemaHIP/amdgpu-av-load-store.hip b/clang/test/SemaHIP/amdgpu-av-load-store.hip new file mode 100644 index 0000000000000..1e9688e891228 --- /dev/null +++ b/clang/test/SemaHIP/amdgpu-av-load-store.hip @@ -0,0 +1,20 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify %s -fcuda-is-device +// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s + +// expected-no-diagnostics + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) + +typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32; + +__device__ void test_av_load_store_device(v4u32 *ptr, v4u32 data) { + v4u32 res = __builtin_amdgcn_av_load_b128(ptr, 0); + __builtin_amdgcn_av_store_b128(ptr, data, 0); +} + +__global__ void test_av_load_store_kernel(v4u32 *ptr, v4u32 data) { + v4u32 res = __builtin_amdgcn_av_load_b128(ptr, 0); + __builtin_amdgcn_av_store_b128(ptr, data, 0); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl index e85b120661cfd..cec85fbeb9446 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl @@ -1,26 +1,24 @@ -// We test loads and stores separately because clang only seems to exit after -// the first 'target feature' error. - -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_LOAD -S -verify -o - %s -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_LOAD -S -verify -o - %s -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_LOAD -S -verify -o - %s - -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx602 -DTEST_STORE -S -verify -o - %s -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx705 -DTEST_STORE -S -verify -o - %s -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx810 -DTEST_STORE -S -verify -o - %s +// RUN: split-file %s %t +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx602 -S -verify -o - %t/load.cl +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx705 -S -verify -o - %t/load.cl +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx810 -S -verify -o - %t/load.cl +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx602 -S -verify -o - %t/store.cl +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx705 -S -verify -o - %t/store.cl +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx810 -S -verify -o - %t/store.cl // REQUIRES: amdgpu-registered-target +//--- load.cl typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32; typedef v4u32 __global *global_ptr_to_v4u32; -#ifdef TEST_LOAD v4u32 test_amdgcn_av_load_b128_target(global_ptr_to_v4u32 ptr) { return __builtin_amdgcn_av_load_b128(ptr, 0); // expected-error{{'__builtin_amdgcn_av_load_b128' needs target feature gfx9-insts}} } -#endif -#ifdef TEST_STORE +//--- store.cl +typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int v4u32; +typedef v4u32 __global *global_ptr_to_v4u32; + void test_amdgcn_av_store_b128_target(global_ptr_to_v4u32 ptr, v4u32 data) { __builtin_amdgcn_av_store_b128(ptr, data, 0); // expected-error{{'__builtin_amdgcn_av_store_b128' needs target feature gfx9-insts}} } -#endif >From c2f14391158fae4765605c0619e4de08dd442321 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe <[email protected]> Date: Wed, 3 Jun 2026 12:21:37 +0530 Subject: [PATCH 3/4] add ArgNames; use target feature "flat-global-insts" --- clang/include/clang/Basic/BuiltinsAMDGPU.td | 8 +++++--- .../builtins-amdgcn-global-load-store-target-error.cl | 4 ++-- llvm/lib/TargetParser/AMDGPUTargetParser.cpp | 8 ++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 5236de8671ebb..36b62a5ee7c72 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -285,12 +285,14 @@ def __builtin_amdgcn_struct_ptr_buffer_load_async_lds : AMDGPUBuiltin<"void(__am //===----------------------------------------------------------------------===// def __builtin_amdgcn_av_load_b128 - : AMDGPUBuiltin<"_ExtVector<4, unsigned int>(_ExtVector<4, unsigned int> *, int)", [], "gfx9-insts"> { + : AMDGPUBuiltin<"_ExtVector<4, unsigned int>(_ExtVector<4, unsigned int> *, int)", [], "flat-global-insts"> { let Documentation = [DocAVLoadB128]; + let ArgNames = ["ptr", "scope"]; } def __builtin_amdgcn_av_store_b128 - : AMDGPUBuiltin<"void(_ExtVector<4, unsigned int> *, _ExtVector<4, unsigned int>, int)", [], "gfx9-insts"> { + : AMDGPUBuiltin<"void(_ExtVector<4, unsigned int> *, _ExtVector<4, unsigned int>, int)", [], "flat-global-insts"> { let Documentation = [DocAVStoreB128]; + let ArgNames = ["ptr", "data", "scope"]; } //===----------------------------------------------------------------------===// @@ -375,7 +377,7 @@ def __builtin_amdgcn_perm : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned i // GFX9+ only builtins. //===----------------------------------------------------------------------===// -def __builtin_amdgcn_fmed3h : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "gfx9-insts">; +def __builtin_amdgcn_fmed3h : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "flat-global-insts">; def __builtin_amdgcn_global_atomic_fadd_f64 : AMDGPUBuiltin<"double(double address_space<1> *, double)", [], "gfx90a-insts">; def __builtin_amdgcn_global_atomic_fadd_f32 : AMDGPUBuiltin<"float(float address_space<1> *, float)", [], "atomic-fadd-rtn-insts">; diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl index cec85fbeb9446..9a61513cdc05b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-store-target-error.cl @@ -12,7 +12,7 @@ typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int typedef v4u32 __global *global_ptr_to_v4u32; v4u32 test_amdgcn_av_load_b128_target(global_ptr_to_v4u32 ptr) { - return __builtin_amdgcn_av_load_b128(ptr, 0); // expected-error{{'__builtin_amdgcn_av_load_b128' needs target feature gfx9-insts}} + return __builtin_amdgcn_av_load_b128(ptr, 0); // expected-error{{'__builtin_amdgcn_av_load_b128' needs target feature flat-global-insts}} } //--- store.cl @@ -20,5 +20,5 @@ typedef __attribute__((__vector_size__(4 * sizeof(unsigned int)))) unsigned int typedef v4u32 __global *global_ptr_to_v4u32; void test_amdgcn_av_store_b128_target(global_ptr_to_v4u32 ptr, v4u32 data) { - __builtin_amdgcn_av_store_b128(ptr, data, 0); // expected-error{{'__builtin_amdgcn_av_store_b128' needs target feature gfx9-insts}} + __builtin_amdgcn_av_store_b128(ptr, data, 0); // expected-error{{'__builtin_amdgcn_av_store_b128' needs target feature flat-global-insts}} } diff --git a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp index 756b7c2154ca2..24e6ece329c4c 100644 --- a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp +++ b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp @@ -226,6 +226,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dpp"] = true; Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; @@ -280,6 +281,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dpp"] = true; Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; @@ -313,6 +315,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dpp"] = true; Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; @@ -354,6 +357,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dpp"] = true; Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; @@ -391,6 +395,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dpp"] = true; Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["image-insts"] = true; @@ -427,6 +432,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dpp"] = true; Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["gfx10-insts"] = true; Features["image-insts"] = true; Features["s-memrealtime"] = true; @@ -486,6 +492,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["dot7-insts"] = true; Features["dot10-insts"] = true; Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["gfx8-insts"] = true; Features["16-bit-insts"] = true; Features["dpp"] = true; @@ -532,6 +539,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, case GK_GFX900: case GK_GFX9_GENERIC: Features["gfx9-insts"] = true; + Features["flat-global-insts"] = true; Features["vmem-to-lds-load-insts"] = true; [[fallthrough]]; case GK_GFX810: >From 4c2061ad884a1727f097e94adea7a569e0a1957b Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe <[email protected]> Date: Tue, 9 Jun 2026 17:30:22 +0530 Subject: [PATCH 4/4] fix failing tests --- clang/include/clang/Basic/BuiltinsAMDGPU.td | 2 +- clang/test/CodeGen/amdgpu-builtin-is-invocable.c | 2 +- clang/test/CodeGen/amdgpu-builtin-processor-is.c | 2 +- clang/test/CodeGen/link-builtin-bitcode.c | 6 +++--- clang/test/CodeGenCXX/dynamic-cast-address-space.cpp | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 36b62a5ee7c72..8eed188b0f4b2 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -377,7 +377,7 @@ def __builtin_amdgcn_perm : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned i // GFX9+ only builtins. //===----------------------------------------------------------------------===// -def __builtin_amdgcn_fmed3h : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "flat-global-insts">; +def __builtin_amdgcn_fmed3h : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "gfx9-insts">; def __builtin_amdgcn_global_atomic_fadd_f64 : AMDGPUBuiltin<"double(double address_space<1> *, double)", [], "gfx90a-insts">; def __builtin_amdgcn_global_atomic_fadd_f32 : AMDGPUBuiltin<"float(float address_space<1> *, float)", [], "atomic-fadd-rtn-insts">; diff --git a/clang/test/CodeGen/amdgpu-builtin-is-invocable.c b/clang/test/CodeGen/amdgpu-builtin-is-invocable.c index 3f988cbdf7cee..8de4b0ec8bc20 100644 --- a/clang/test/CodeGen/amdgpu-builtin-is-invocable.c +++ b/clang/test/CodeGen/amdgpu-builtin-is-invocable.c @@ -54,7 +54,7 @@ void foo() { // AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" } // AMDGCN-GFX1010: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) } //. -// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } +// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+flat-global-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } // AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind } // AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) } //. diff --git a/clang/test/CodeGen/amdgpu-builtin-processor-is.c b/clang/test/CodeGen/amdgpu-builtin-processor-is.c index 308eec1d212a5..8dfbb55566598 100644 --- a/clang/test/CodeGen/amdgpu-builtin-processor-is.c +++ b/clang/test/CodeGen/amdgpu-builtin-processor-is.c @@ -63,7 +63,7 @@ void foo() { //. // AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" } //. -// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } +// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+flat-global-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } // AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind } // AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) } //. diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c index ed5abca7f34b0..8849ee735ce04 100644 --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -44,6 +44,6 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] { // CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" } -// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } +// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-global-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+flat-global-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-global-insts,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index dbca787e26da2..8252a511d6b44 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -107,9 +107,9 @@ const B& f(A *a) { // CHECK: attributes #[[ATTR3]] = { nounwind } // CHECK: attributes #[[ATTR4]] = { noreturn } //. -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+flat-global-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+flat-global-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+mqsad-insts,+mqsad-pk-insts,+msad-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
