llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Rana Pratap Reddy (ranapratap55) <details> <summary>Changes</summary> Support for lowering of` __builtin_amdgcn_image_sample/gather4` for AMDGPU builtins to clangIR. Followed similar lowering from clang->llvmir: `clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp`. Upstreaming clangIR PR: [llvm/clangir#<!-- -->2083](https://github.com/llvm/clangir/pull/2083) --- Patch is 42.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/201761.diff 2 Files Affected: - (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp (+50-12) - (added) clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip (+350) ``````````diff diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index c22d7d8f8e3b1..4506eeb61a4fb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -650,69 +650,107 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, *this, expr, "amdgcn.image.store.mip.cube", true); case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, expr, "amdgcn.image.sample.2darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.cube", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.lz.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.l.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.d.1d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.lz.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.l.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.d.2d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.lz.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.l.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.d.3d", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.lz.cube", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.l.cube", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.lz.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.l.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.d.1darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.lz.2darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.l.2darray", false); case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: - case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; - } - case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; - } + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.sample.d.2darray", false); + case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: + return emitAMDGCNImageOverloadedReturnType(*this, expr, + "amdgcn.image.gather4.lz.2d", false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { cgm.errorNYI(expr->getSourceRange(), diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip new file mode 100644 index 0000000000000..c69925399d900 --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip @@ -0,0 +1,350 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1100 -target-feature +extended-image-insts \ +// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1100 -target-feature +extended-image-insts \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1100 -target-feature +extended-image-insts \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef int int4 __attribute__((ext_vector_type(4))); +typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); + +// CIR-LABEL: @_Z24test_gather4_lz_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.gather4.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z24test_gather4_lz_2d_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_gather4_lz_2d_v4f32(float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_1d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_1d_v4f32fu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_1d_v4f32(float s, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(15, s, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_1d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_1d_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_1d_v4f32(float s, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(15, s, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_1d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_1d_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_1d_v4f32(float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(15, dsdx, dsdy, s, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_2d_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_2d_v4f32(float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(15, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_2d_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_2d_v4f32(float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, s, t, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_2d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_2d_v4f32ffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_2d_v4f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(15, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z23test_sample_lz_3d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_3d_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_3d_v4f32(float s, float t, float r, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(15, s, t, r, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_l_3d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_3d_v4f32ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_3d_v4f32(float s, float t, float r, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, s, t, r, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z22test_sample_d_3d_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_3d_v4f32fffffffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_d_3d_v4f32(float dsdx, float dtdx, float drdx, float dsdy, float dtdy, float drdy, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, dsdx, dtdx, drdx, dsdy, dtdy, drdy, s, t, r, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z25test_sample_lz_cube_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z25test_sample_lz_cube_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_cube_v4f32(float s, float t, float face, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, s, t, face, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z24test_sample_l_cube_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z24test_sample_l_cube_v4f32ffffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_l_cube_v4f32(float s, float t, float face, float lod, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, s, t, face, lod, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z28test_sample_lz_1darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z28test_sample_lz_1darray_v4f32ffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}}) +__device__ float4 test_sample_lz_1darray_v4f32(float s, float slice, __amdgpu_texture_t tex, int4 samp) { + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, s, slice, tex, samp, 0, 120, 110); +} + +// CIR-LABEL: @_Z27test_sample_l_1darray_v4f32 +// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float> +// LLVM: define{{.*}} <4 x float> @_Z27test_sample_l_1darray_v4f32fffu18__amdgpu_texture_tDv4_i( +// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/201761 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
