Author: Stanislav Mekhanoshin Date: 2025-08-26T13:10:13-07:00 New Revision: 5321335f97eb6f638d37c3ed28043e9f69e23720
URL: https://github.com/llvm/llvm-project/commit/5321335f97eb6f638d37c3ed28043e9f69e23720 DIFF: https://github.com/llvm/llvm-project/commit/5321335f97eb6f638d37c3ed28043e9f69e23720.diff LOG: [AMDGCN] Add missing gfx1250 clang tests. NFC. (#155478) Added: Modified: clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl clang/test/Driver/cuda-bad-arch.cu clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl Removed: ################################################################################ diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl index f300b05fe798a..cdfe9fcd89091 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl @@ -1,6 +1,7 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s typedef float v2f __attribute__((ext_vector_type(2))); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 4ff0571239e71..23af19d8ad950 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -58,6 +58,58 @@ void test_s_wait_tensorcnt() { __builtin_amdgcn_s_wait_tensorcnt(0); } +// CHECK-LABEL: @test_bitop3_b32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 1) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_bitop3_b32(global uint* out, uint a, uint b, uint c) { + *out = __builtin_amdgcn_bitop3_b32(a, b, c, 1); +} + +// CHECK-LABEL: @test_bitop3_b16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store i16 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store i16 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i32 1) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP3]], ptr addrspace(1) [[TMP4]], align 2 +// CHECK-NEXT: ret void +// +void test_bitop3_b16(global ushort* out, ushort a, ushort b, ushort c) { + *out = __builtin_amdgcn_bitop3_b16(a, b, c, 1); +} + // CHECK-LABEL: @test_prng_b32( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -1258,6 +1310,145 @@ void test_prefetch(generic void *fptr, global void *gptr) { __builtin_amdgcn_global_prefetch(gptr, 8); } +// CHECK-LABEL: @test_global_add_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store float [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], float [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.ignore.denormal.mode [[META4]] +// CHECK-NEXT: ret float [[TMP2]] +// +float test_global_add_f32(global float *addr, float x) { + return __builtin_amdgcn_global_atomic_fadd_f32(addr, x); +} + +// CHECK-LABEL: @test_global_add_half2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x half> [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], <2 x half> [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// CHECK-NEXT: ret <2 x half> [[TMP2]] +// +half2 test_global_add_half2(global half2 *addr, half2 x) { + return __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x); +} + +// CHECK-LABEL: @test_flat_add_2f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x half> [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], <2 x half> [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// CHECK-NEXT: ret <2 x half> [[TMP2]] +// +half2 test_flat_add_2f16(generic half2 *addr, half2 x) { + return __builtin_amdgcn_flat_atomic_fadd_v2f16(addr, x); +} + +// CHECK-LABEL: @test_flat_add_2bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i16> [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to <2 x bfloat> +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr [[TMP0]], <2 x bfloat> [[TMP2]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[TMP3]] to <2 x i16> +// CHECK-NEXT: ret <2 x i16> [[TMP4]] +// +short2 test_flat_add_2bf16(generic short2 *addr, short2 x) { + return __builtin_amdgcn_flat_atomic_fadd_v2bf16(addr, x); +} + +// CHECK-LABEL: @test_global_add_2bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i16> [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[ADDR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to <2 x bfloat> +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP0]], <2 x bfloat> [[TMP2]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[TMP3]] to <2 x i16> +// CHECK-NEXT: ret <2 x i16> [[TMP4]] +// +short2 test_global_add_2bf16(global short2 *addr, short2 x) { + return __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x); +} + +// CHECK-LABEL: @test_local_add_2f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(3) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <2 x i16> [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[ADDR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to <2 x bfloat> +// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP0]], <2 x bfloat> [[TMP2]] syncscope("agent") monotonic, align 4 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[TMP3]] to <2 x i16> +// CHECK-NEXT: ret <2 x i16> [[TMP4]] +// +short2 test_local_add_2f16(local short2 *addr, short2 x) { + return __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x); +} + +// CHECK-LABEL: @test_local_add_2bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(3) [[ADDR:%.*]], ptr [[ADDR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <2 x half> [[X:%.*]], ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(3), ptr [[ADDR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP0]], <2 x half> [[TMP1]] syncscope("agent") monotonic, align 4 +// CHECK-NEXT: ret <2 x half> [[TMP2]] +// +half2 test_local_add_2bf16(local half2 *addr, half2 x) { + return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x); +} + // CHECK-LABEL: @test_cvt_pk_fp8_f32_e5m3( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/clang/test/Driver/cuda-bad-arch.cu b/clang/test/Driver/cuda-bad-arch.cu index 85231a5b9705a..6ac72296049bc 100644 --- a/clang/test/Driver/cuda-bad-arch.cu +++ b/clang/test/Driver/cuda-bad-arch.cu @@ -25,6 +25,8 @@ // RUN: | FileCheck -check-prefix OK %s // RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx942 -c %s 2>&1 \ // RUN: | FileCheck -check-prefix OK %s +// RUN: %clang -### -x hip --target=x86_64-linux-gnu -nogpulib -nogpuinc --cuda-gpu-arch=gfx1250 -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix OK %s // We don't allow using NVPTX/AMDGCN for host compilation. // RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \ diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl index 8f34cccaecb7a..4a28f9acdecf7 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl @@ -23,6 +23,11 @@ void test_setprio_inc_wg(short a) { __builtin_amdgcn_s_setprio_inc_wg(a); // expected-error {{'__builtin_amdgcn_s_setprio_inc_wg' must be a constant integer}} } +void test_bitop3_args(global uint* out, uint a, uint b, uint c) { + *out = __builtin_amdgcn_bitop3_b32(a, b, c, a); // expected-error {{argument to '__builtin_amdgcn_bitop3_b32' must be a constant integer}} + *out = __builtin_amdgcn_bitop3_b16((ushort)a, (ushort)b, (ushort)c, a); // expected-error {{argument to '__builtin_amdgcn_bitop3_b16' must be a constant integer}} +} + void test_s_monitor_sleep(short a) { __builtin_amdgcn_s_monitor_sleep(a); // expected-error {{'__builtin_amdgcn_s_monitor_sleep' must be a constant integer}} } @@ -43,6 +48,12 @@ void test__builtin_amdgcn_cvt_f16_bf8(int a, int b) { __builtin_amdgcn_cvt_f16_bf8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_bf8' must be a constant integer}} } +void test_cvt_sr_f8_f16(global int* out, uint sr, int old, int sel) +{ + *out = __builtin_amdgcn_cvt_sr_bf8_f16(1.0, sr, old, sel); // expected-error {{'__builtin_amdgcn_cvt_sr_bf8_f16' must be a constant integer}} + *out = __builtin_amdgcn_cvt_sr_fp8_f16(1.0, sr, old, sel); // expected-error {{'__builtin_amdgcn_cvt_sr_fp8_f16' must be a constant integer}} +} + void test_cvt_scale_pk(global half8 *outh8, global bfloat8 *outy8, uint2 src2, global float32 *outf32, global half16 *outh16, global bfloat16 *outy16, global float16 *outf16, uint3 src3, @@ -92,6 +103,34 @@ void test_amdgcn_load_monitor(global int* b32gaddr, global v2i* b64gaddr, global *b128out = __builtin_amdgcn_flat_load_monitor_b128(b128faddr, cpol); // expected-error {{'__builtin_amdgcn_flat_load_monitor_b128' must be a constant integer}} } +void test_amdgcn_async_load_store_lds_offset(global char* gaddr8, global int *gaddr32, global v2i* gaddr64, global v4i* gaddr128, local char* laddr8, + local int *laddr32, local v2i* laddr64, local v4i* laddr128, int offset, int mask) +{ + __builtin_amdgcn_global_store_async_from_lds_b8(gaddr8, laddr8, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b8' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b32(gaddr32, laddr32, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b32' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b64(gaddr64, laddr64, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b64' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b128(gaddr128, laddr128, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b128' must be a constant integer}} + + __builtin_amdgcn_global_store_async_from_lds_b8(gaddr8, laddr8, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b8' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b32(gaddr32, laddr32, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b32' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b64(gaddr64, laddr64, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b64' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b128(gaddr128, laddr128, offset, 0); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b128' must be a constant integer}} +} + +void test_amdgcn_async_load_store_lds_cpol(global char* gaddr8, global int *gaddr32, global v2i* gaddr64, global v4i* gaddr128, local char* laddr8, + local int *laddr32, local v2i* laddr64, local v4i* laddr128, int cpol, int mask) +{ + __builtin_amdgcn_global_store_async_from_lds_b8(gaddr8, laddr8, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b8' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b32(gaddr32, laddr32, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b32' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b64(gaddr64, laddr64, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b64' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b128(gaddr128, laddr128, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b128' must be a constant integer}} + + __builtin_amdgcn_global_store_async_from_lds_b8(gaddr8, laddr8, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b8' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b32(gaddr32, laddr32, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b32' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b64(gaddr64, laddr64, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b64' must be a constant integer}} + __builtin_amdgcn_global_store_async_from_lds_b128(gaddr128, laddr128, 16, cpol); // expected-error {{'__builtin_amdgcn_global_store_async_from_lds_b128' must be a constant integer}} +} + void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol) { __builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, cpol); // expected-error {{'__builtin_amdgcn_tensor_load_to_lds' must be a constant integer}} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl index c5440ed1a75ae..d7045cdf55837 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl @@ -1,6 +1,11 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1200 -verify -S -o - %s -void test() { +typedef unsigned int uint; +typedef unsigned short int ushort; + +void test(global uint* out, uint a, uint b, uint c) { __builtin_amdgcn_s_setprio_inc_wg(1); // expected-error {{'__builtin_amdgcn_s_setprio_inc_wg' needs target feature setprio-inc-wg-inst}} + *out = __builtin_amdgcn_bitop3_b32(a, b, c, 1); // expected-error {{'__builtin_amdgcn_bitop3_b32' needs target feature bitop3-insts}} + *out = __builtin_amdgcn_bitop3_b16((ushort)a, (ushort)b, (ushort)c, 1); // expected-error {{'__builtin_amdgcn_bitop3_b16' needs target feature bitop3-insts}} } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits