https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/201761
>From 9fc012d3d2ad23d7bebf9c649c1e71f6aacfb241 Mon Sep 17 00:00:00 2001 From: ranapratap55 <[email protected]> Date: Fri, 5 Jun 2026 11:40:44 +0530 Subject: [PATCH] [CIR][AMDGPU] Adds lowering for amdgcn extended image sample/gather4 builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 62 +++- .../builtins-amdgcn-extended-image.hip | 350 ++++++++++++++++++ 2 files changed, 400 insertions(+), 12 deletions(-) create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index c22d7d8f8e3b1..58bfcbe633667 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -650,69 +650,107 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, *this, expr, "amdgcn.image.store.mip.cube", true); case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.2darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.cube", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.lz.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.l.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.d.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.lz.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.l.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.d.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.lz.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.l.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.d.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.lz.cube", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.l.cube", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.lz.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.l.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.d.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.lz.2darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.l.2darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: - case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; - } - case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; - } + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.d.2darray", false); + case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.gather4.lz.2d", false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { cgm.errorNYI(expr->getSourceRange(), diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip new file mode 100644 index 0000000000000..c69925399d900 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip @@ -0,0 +1,350 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1100 -target-feature +extended-image-insts \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1100 -target-feature +extended-image-insts \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1100 -target-feature +extended-image-insts \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef int int4 __attribute__((ext_vector_type(4))); +typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); + +// CIR-LABEL: @_Z24test_gather4_lz_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.gather4.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z24test_gather4_lz_2d_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_gather4_lz_2d_v4f32(float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_1d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_1d_v4f32fu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_1d_v4f32(float s, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(15, s, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_1d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_1d_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_1d_v4f32(float s, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(15, s, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_1d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_1d_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_1d_v4f32(float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(15, dsdx, dsdy, s, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_2d_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_2d_v4f32(float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(15, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_2d_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_2d_v4f32(float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, s, t, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_2d_v4f32ffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_2d_v4f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(15, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_3d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_3d_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_3d_v4f32(float s, float t, float r, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(15, s, t, r, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_3d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_3d_v4f32ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_3d_v4f32(float s, float t, float r, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, s, t, r, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_3d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_3d_v4f32fffffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_3d_v4f32(float dsdx, float dtdx, float drdx, float dsdy, float dtdy, float drdy, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, dsdx, dtdx, drdx, dsdy, dtdy, drdy, s, t, r, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z25test_sample_lz_cube_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z25test_sample_lz_cube_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_cube_v4f32(float s, float t, float face, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, s, t, face, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z24test_sample_l_cube_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z24test_sample_l_cube_v4f32ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_cube_v4f32(float s, float t, float face, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, s, t, face, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z28test_sample_lz_1darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z28test_sample_lz_1darray_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_1darray_v4f32(float s, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, s, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_l_1darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z27test_sample_l_1darray_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_1darray_v4f32(float s, float slice, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, s, slice, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_d_1darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z27test_sample_d_1darray_v4f32ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_1darray_v4f32(float dsdx, float dsdy, float s, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, dsdx, dsdy, s, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z28test_sample_lz_2darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z28test_sample_lz_2darray_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_2darray_v4f32(float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, s, t, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_l_2darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z27test_sample_l_2darray_v4f32ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_2darray_v4f32(float s, float t, float slice, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, s, t, slice, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_d_2darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z27test_sample_d_2darray_v4f32fffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_2darray_v4f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_1d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z23test_sample_lz_1d_v4f16fu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_lz_1d_v4f16(float s, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(15, s, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_1d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z22test_sample_l_1d_v4f16ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_l_1d_v4f16(float s, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(15, s, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_1d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z22test_sample_d_1d_v4f16fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_d_1d_v4f16(float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(15, dsdx, dsdy, s, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_2d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z23test_sample_lz_2d_v4f16ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_lz_2d_v4f16(float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(15, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_2d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z22test_sample_l_2d_v4f16fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_l_2d_v4f16(float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(15, s, t, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_2d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z22test_sample_d_2d_v4f16ffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_d_2d_v4f16(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(15, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_3d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z23test_sample_lz_3d_v4f16fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_lz_3d_v4f16(float s, float t, float r, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(15, s, t, r, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_3d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z22test_sample_l_3d_v4f16ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_l_3d_v4f16(float s, float t, float r, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(15, s, t, r, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_3d_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z22test_sample_d_3d_v4f16fffffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_d_3d_v4f16(float dsdx, float dtdx, float drdx, float dsdy, float dtdy, float drdy, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(15, dsdx, dtdx, drdx, dsdy, dtdy, drdy, s, t, r, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z25test_sample_lz_cube_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z25test_sample_lz_cube_v4f16fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_lz_cube_v4f16(float s, float t, float face, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(15, s, t, face, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z24test_sample_l_cube_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z24test_sample_l_cube_v4f16ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_l_cube_v4f16(float s, float t, float face, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(15, s, t, face, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z28test_sample_lz_1darray_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z28test_sample_lz_1darray_v4f16ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_lz_1darray_v4f16(float s, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(15, s, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_l_1darray_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z27test_sample_l_1darray_v4f16fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_l_1darray_v4f16(float s, float slice, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(15, s, slice, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_d_1darray_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z27test_sample_d_1darray_v4f16ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_d_1darray_v4f16(float dsdx, float dsdy, float s, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(15, dsdx, dsdy, s, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z28test_sample_lz_2darray_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z28test_sample_lz_2darray_v4f16fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_lz_2darray_v4f16(float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(15, s, t, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_l_2darray_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z27test_sample_l_2darray_v4f16ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_l_2darray_v4f16(float s, float t, float slice, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(15, s, t, slice, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_d_2darray_v4f16 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16> +// LLVM: define{{.*}} <4 x half> @_Z27test_sample_d_2darray_v4f16fffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ half4 test_sample_d_2darray_v4f16(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(15, dsdx, dtdx, dsdy, dtdy, s, t, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z21test_sample_lz_2d_f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float +// LLVM: define{{.*}} float @_Z21test_sample_lz_2d_f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float test_sample_lz_2d_f32(float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z20test_sample_l_2d_f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float +// LLVM: define{{.*}} float @_Z20test_sample_l_2d_f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float test_sample_l_2d_f32(float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, s, t, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z20test_sample_d_2d_f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float +// LLVM: define{{.*}} float @_Z20test_sample_d_2d_f32ffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float test_sample_d_2d_f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z26test_sample_lz_2darray_f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float +// LLVM: define{{.*}} float @_Z26test_sample_lz_2darray_f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float test_sample_lz_2darray_f32(float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, s, t, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z25test_sample_l_2darray_f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float +// LLVM: define{{.*}} float @_Z25test_sample_l_2darray_f32ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float test_sample_l_2darray_f32(float s, float t, float slice, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, s, t, slice, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z25test_sample_d_2darray_f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float +// LLVM: define{{.*}} float @_Z25test_sample_d_2darray_f32fffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float test_sample_d_2darray_f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, slice, tex, samp, 0, 120, 110); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
