https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/182331
>From 793bb3f18edb52dca03e25e02edbe50b4ca3de24 Mon Sep 17 00:00:00 2001 From: ranapratap55 <[email protected]> Date: Thu, 19 Feb 2026 14:27:38 +0530 Subject: [PATCH 1/2] [AMDGPU] Update f16 builtin definitions to use _Float16 instead of __fp16 --- clang/include/clang/Basic/BuiltinsAMDGPU.td | 22 +- .../CodeGenHIP/builtins-amdgcn-vi-f16.hip | 235 ++++++++++++++++++ 2 files changed, 246 insertions(+), 11 deletions(-) create mode 100644 clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 86b10eba55e8e..39dee9121bfc0 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -255,17 +255,17 @@ def __builtin_amdgcn_interp_mov : AMDGPUBuiltin<"float(unsigned int, unsigned in // VI+ only builtins. //===----------------------------------------------------------------------===// -def __builtin_amdgcn_div_fixuph : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_rcph : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_sqrth : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_rsqh : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_sinh : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_cosh : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_ldexph : AMDGPUBuiltin<"__fp16(__fp16, int)", [Const], "16-bit-insts">; -def __builtin_amdgcn_frexp_manth : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_frexp_exph : AMDGPUBuiltin<"short(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_fracth : AMDGPUBuiltin<"__fp16(__fp16)", [Const], "16-bit-insts">; -def __builtin_amdgcn_classh : AMDGPUBuiltin<"bool(__fp16, int)", [Const], "16-bit-insts">; +def __builtin_amdgcn_div_fixuph : AMDGPUBuiltin<"_Float16(_Float16, _Float16, _Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_rcph : AMDGPUBuiltin<"_Float16(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_sqrth : AMDGPUBuiltin<"_Float16(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_rsqh : AMDGPUBuiltin<"_Float16(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_sinh : AMDGPUBuiltin<"_Float16(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_cosh : AMDGPUBuiltin<"_Float16(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_ldexph : AMDGPUBuiltin<"_Float16(_Float16, int)", [Const], "16-bit-insts">; +def __builtin_amdgcn_frexp_manth : AMDGPUBuiltin<"_Float16(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_frexp_exph : AMDGPUBuiltin<"short(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_fracth : AMDGPUBuiltin<"_Float16(_Float16)", [Const], "16-bit-insts">; +def __builtin_amdgcn_classh : AMDGPUBuiltin<"bool(_Float16, int)", [Const], "16-bit-insts">; def __builtin_amdgcn_s_memrealtime : AMDGPUBuiltin<"uint64_t()", [], "s-memrealtime">; def __builtin_amdgcn_mov_dpp : AMDGPUBuiltin<"int(int, _Constant int, _Constant int, _Constant int, _Constant bool)", [Const, CustomTypeChecking], "dpp">; def __builtin_amdgcn_update_dpp : AMDGPUBuiltin<"int(int, int, _Constant int, _Constant int, _Constant int, _Constant bool)", [Const, CustomTypeChecking], "dpp">; diff --git a/clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip b/clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip new file mode 100644 index 0000000000000..9429b78665f95 --- /dev/null +++ b/clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip @@ -0,0 +1,235 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu tonga -emit-llvm -fcuda-is-device -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -fcuda-is-device -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm -fcuda-is-device -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1012 -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#define __device__ __attribute__((device)) + +// CHECK-LABEL: define dso_local void @_Z22test_div_fixup_f16_hipPDF16_DF16_DF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store half [[B]], ptr [[B_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store half [[C]], ptr [[C_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = call contract half @llvm.amdgcn.div.fixup.f16(half [[TMP0]], half [[TMP1]], half [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP3]], ptr [[TMP4]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_div_fixup_f16_hip(_Float16 *out, _Float16 a, _Float16 b, _Float16 c) { + *out = __builtin_amdgcn_div_fixuph(a, b, c); +} + +// CHECK-LABEL: define dso_local void @_Z16test_rcp_f16_hipPDF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call contract half @llvm.amdgcn.rcp.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_rcp_f16_hip(_Float16 *out, _Float16 a) { + *out = __builtin_amdgcn_rcph(a); +} + +// CHECK-LABEL: define dso_local void @_Z17test_sqrt_f16_hipPDF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call contract half @llvm.amdgcn.sqrt.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_sqrt_f16_hip(_Float16 *out, _Float16 a) { + *out = __builtin_amdgcn_sqrth(a); +} + +// CHECK-LABEL: define dso_local void @_Z16test_rsq_f16_hipPDF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call contract half @llvm.amdgcn.rsq.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_rsq_f16_hip(_Float16 *out, _Float16 a) { + *out = __builtin_amdgcn_rsqh(a); +} + +// CHECK-LABEL: define dso_local void @_Z16test_sin_f16_hipPDF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call contract half @llvm.amdgcn.sin.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_sin_f16_hip(_Float16 *out, _Float16 a) { + *out = __builtin_amdgcn_sinh(a); +} + +// CHECK-LABEL: define dso_local void @_Z16test_cos_f16_hipPDF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call contract half @llvm.amdgcn.cos.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_cos_f16_hip(_Float16 *out, _Float16 a) { + *out = __builtin_amdgcn_cosh(a); +} + +// CHECK-LABEL: define dso_local void @_Z18test_ldexp_f16_hipPDF16_DF16_i( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: [[TMP3:%.*]] = call contract half @llvm.ldexp.f16.i16(half [[TMP0]], i16 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP3]], ptr [[TMP4]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_ldexp_f16_hip(_Float16 *out, _Float16 a, int b) { + *out = __builtin_amdgcn_ldexph(a, b); +} + +// CHECK-LABEL: define dso_local void @_Z23test_frexp_mant_f16_hipPDF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call contract half @llvm.amdgcn.frexp.mant.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_frexp_mant_f16_hip(_Float16 *out, _Float16 a) { + *out = __builtin_amdgcn_frexp_manth(a); +} + +// CHECK-LABEL: define dso_local void @_Z22test_frexp_exp_f16_hipPsDF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_frexp_exp_f16_hip(short *out, _Float16 a) { + *out = __builtin_amdgcn_frexp_exph(a); +} + +// CHECK-LABEL: define dso_local void @_Z18test_fract_f16_hipPDF16_DF16_( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call contract half @llvm.amdgcn.fract.f16(half [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_fract_f16_hip(_Float16 *out, _Float16 a) { + *out = __builtin_amdgcn_fracth(a); +} + +// CHECK-LABEL: define dso_local void @_Z18test_class_f16_hipPDF16_DF16_i( +// CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.class.f16(half [[TMP0]], i32 [[TMP1]]) +// CHECK-NEXT: [[CONV:%.*]] = uitofp i1 [[TMP2]] to half +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store half [[CONV]], ptr [[TMP3]], align 2 +// CHECK-NEXT: ret void +// +__device__ void test_class_f16_hip(_Float16 *out, _Float16 a, int b) { + *out = __builtin_amdgcn_classh(a, b); +} >From b0bf77111f847860c3e7db740c3e8021af4e9e2d Mon Sep 17 00:00:00 2001 From: ranapratap55 <[email protected]> Date: Thu, 19 Feb 2026 23:41:10 +0530 Subject: [PATCH 2/2] [AMDGPU] Removed _hip suffix from test names --- .../CodeGenHIP/builtins-amdgcn-vi-f16.hip | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip b/clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip index 9429b78665f95..08523f7c6b1b3 100644 --- a/clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip +++ b/clang/test/CodeGenHIP/builtins-amdgcn-vi-f16.hip @@ -7,7 +7,7 @@ #define __device__ __attribute__((device)) -// CHECK-LABEL: define dso_local void @_Z22test_div_fixup_f16_hipPDF16_DF16_DF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]], half noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -30,11 +30,11 @@ // CHECK-NEXT: store half [[TMP3]], ptr [[TMP4]], align 2 // CHECK-NEXT: ret void // -__device__ void test_div_fixup_f16_hip(_Float16 *out, _Float16 a, _Float16 b, _Float16 c) { +__device__ void test_div_fixup_f16(_Float16 *out, _Float16 a, _Float16 b, _Float16 c) { *out = __builtin_amdgcn_div_fixuph(a, b, c); } -// CHECK-LABEL: define dso_local void @_Z16test_rcp_f16_hipPDF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z12test_rcp_f16PDF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -49,11 +49,11 @@ __device__ void test_div_fixup_f16_hip(_Float16 *out, _Float16 a, _Float16 b, _F // CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_rcp_f16_hip(_Float16 *out, _Float16 a) { +__device__ void test_rcp_f16(_Float16 *out, _Float16 a) { *out = __builtin_amdgcn_rcph(a); } -// CHECK-LABEL: define dso_local void @_Z17test_sqrt_f16_hipPDF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z13test_sqrt_f16PDF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -68,11 +68,11 @@ __device__ void test_rcp_f16_hip(_Float16 *out, _Float16 a) { // CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_sqrt_f16_hip(_Float16 *out, _Float16 a) { +__device__ void test_sqrt_f16(_Float16 *out, _Float16 a) { *out = __builtin_amdgcn_sqrth(a); } -// CHECK-LABEL: define dso_local void @_Z16test_rsq_f16_hipPDF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z12test_rsq_f16PDF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -87,11 +87,11 @@ __device__ void test_sqrt_f16_hip(_Float16 *out, _Float16 a) { // CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_rsq_f16_hip(_Float16 *out, _Float16 a) { +__device__ void test_rsq_f16(_Float16 *out, _Float16 a) { *out = __builtin_amdgcn_rsqh(a); } -// CHECK-LABEL: define dso_local void @_Z16test_sin_f16_hipPDF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z12test_sin_f16PDF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -106,11 +106,11 @@ __device__ void test_rsq_f16_hip(_Float16 *out, _Float16 a) { // CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_sin_f16_hip(_Float16 *out, _Float16 a) { +__device__ void test_sin_f16(_Float16 *out, _Float16 a) { *out = __builtin_amdgcn_sinh(a); } -// CHECK-LABEL: define dso_local void @_Z16test_cos_f16_hipPDF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z12test_cos_f16PDF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -125,11 +125,11 @@ __device__ void test_sin_f16_hip(_Float16 *out, _Float16 a) { // CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_cos_f16_hip(_Float16 *out, _Float16 a) { +__device__ void test_cos_f16(_Float16 *out, _Float16 a) { *out = __builtin_amdgcn_cosh(a); } -// CHECK-LABEL: define dso_local void @_Z18test_ldexp_f16_hipPDF16_DF16_i( +// CHECK-LABEL: define dso_local void @_Z14test_ldexp_f16PDF16_DF16_i( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -149,11 +149,11 @@ __device__ void test_cos_f16_hip(_Float16 *out, _Float16 a) { // CHECK-NEXT: store half [[TMP3]], ptr [[TMP4]], align 2 // CHECK-NEXT: ret void // -__device__ void test_ldexp_f16_hip(_Float16 *out, _Float16 a, int b) { +__device__ void test_ldexp_f16(_Float16 *out, _Float16 a, int b) { *out = __builtin_amdgcn_ldexph(a, b); } -// CHECK-LABEL: define dso_local void @_Z23test_frexp_mant_f16_hipPDF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z19test_frexp_mant_f16PDF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -168,11 +168,11 @@ __device__ void test_ldexp_f16_hip(_Float16 *out, _Float16 a, int b) { // CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_frexp_mant_f16_hip(_Float16 *out, _Float16 a) { +__device__ void test_frexp_mant_f16(_Float16 *out, _Float16 a) { *out = __builtin_amdgcn_frexp_manth(a); } -// CHECK-LABEL: define dso_local void @_Z22test_frexp_exp_f16_hipPsDF16_( +// CHECK-LABEL: define dso_local void @_Z18test_frexp_exp_f16PsDF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -187,11 +187,11 @@ __device__ void test_frexp_mant_f16_hip(_Float16 *out, _Float16 a) { // CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_frexp_exp_f16_hip(short *out, _Float16 a) { +__device__ void test_frexp_exp_f16(short *out, _Float16 a) { *out = __builtin_amdgcn_frexp_exph(a); } -// CHECK-LABEL: define dso_local void @_Z18test_fract_f16_hipPDF16_DF16_( +// CHECK-LABEL: define dso_local void @_Z14test_fract_f16PDF16_DF16_( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -206,11 +206,11 @@ __device__ void test_frexp_exp_f16_hip(short *out, _Float16 a) { // CHECK-NEXT: store half [[TMP1]], ptr [[TMP2]], align 2 // CHECK-NEXT: ret void // -__device__ void test_fract_f16_hip(_Float16 *out, _Float16 a) { +__device__ void test_fract_f16(_Float16 *out, _Float16 a) { *out = __builtin_amdgcn_fracth(a); } -// CHECK-LABEL: define dso_local void @_Z18test_class_f16_hipPDF16_DF16_i( +// CHECK-LABEL: define dso_local void @_Z14test_class_f16PDF16_DF16_i( // CHECK-SAME: ptr noundef [[OUT:%.*]], half noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -230,6 +230,6 @@ __device__ void test_fract_f16_hip(_Float16 *out, _Float16 a) { // CHECK-NEXT: store half [[CONV]], ptr [[TMP3]], align 2 // CHECK-NEXT: ret void // -__device__ void test_class_f16_hip(_Float16 *out, _Float16 a, int b) { +__device__ void test_class_f16(_Float16 *out, _Float16 a, int b) { *out = __builtin_amdgcn_classh(a, b); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
