https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/197380
>From d842dff0d6fc57a0e1f45609d3d368b34085b576 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Wed, 13 May 2026 08:51:29 +0200 Subject: [PATCH 1/3] [Clang][HLSL] Fix EmitRuntimeCall to use C calling convention for intrinsics Fix HLSL builtin to SPIR-V intrinsic lowering: intrinsic calls must use CallingConv::C. --- clang/lib/CodeGen/CGCall.cpp | 6 +- .../builtins/AllMemoryBarrier.hlsl | 2 +- .../AllMemoryBarrierWithGroupSync.hlsl | 2 +- .../builtins/DeviceMemoryBarrier.hlsl | 2 +- .../DeviceMemoryBarrierWithGroupSync.hlsl | 2 +- .../builtins/GroupMemoryBarrier.hlsl | 2 +- .../GroupMemoryBarrierWithGroupSync.hlsl | 2 +- .../CodeGenHLSL/builtins/QuadReadAcrossX.hlsl | 88 +++++++++---------- .../CodeGenHLSL/builtins/QuadReadAcrossY.hlsl | 2 +- .../builtins/WaveActiveAllEqual.hlsl | 6 +- .../builtins/WaveActiveAllTrue.hlsl | 2 +- .../builtins/WaveActiveAnyTrue.hlsl | 2 +- .../builtins/WaveActiveBallot.hlsl | 2 +- .../builtins/WaveActiveBitAnd.hlsl | 2 +- .../CodeGenHLSL/builtins/WaveActiveBitOr.hlsl | 2 +- .../builtins/WaveActiveBitXor.hlsl | 2 +- .../CodeGenHLSL/builtins/WaveActiveMax.hlsl | 6 +- .../CodeGenHLSL/builtins/WaveActiveMin.hlsl | 6 +- .../builtins/WaveActiveProduct.hlsl | 6 +- .../CodeGenHLSL/builtins/WaveActiveSum.hlsl | 6 +- .../builtins/WavePrefixCountBits.hlsl | 2 +- .../builtins/WavePrefixProduct.hlsl | 6 +- .../CodeGenHLSL/builtins/WavePrefixSum.hlsl | 6 +- .../CodeGenHLSL/builtins/WaveReadLaneAt.hlsl | 18 ++-- .../builtins/wave_get_lane_count.hlsl | 4 +- .../builtins/wave_is_first_lane.hlsl | 4 +- 26 files changed, 97 insertions(+), 93 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a2b9c945788ee..73e189bec93ff 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5233,7 +5233,11 @@ llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { llvm::CallInst *call = Builder.CreateCall( callee, args, getBundlesForFunclet(callee.getCallee()), name); - call->setCallingConv(getRuntimeCC()); + // Intrinsics must use CallingConv::C; only apply the runtime CC to + // non-intrinsic callees. + if (auto *F = dyn_cast<llvm::Function>(callee.getCallee()); + !F || !F->isIntrinsic()) + call->setCallingConv(getRuntimeCC()); if (CGM.shouldEmitConvergenceTokens() && call->isConvergent()) return cast<llvm::CallInst>(addConvergenceControlToken(call)); diff --git a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl index 90d51c716c771..0fa798a16b805 100644 --- a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl +++ b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl @@ -11,7 +11,7 @@ // CHECK-SPIRV: define hidden spir_func void @ void test_AllMemoryBarrier() { // CHECK-DXIL: call void @llvm.[[TARGET]].all.memory.barrier() -// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].all.memory.barrier() +// CHECK-SPIRV: call void @llvm.[[TARGET]].all.memory.barrier() AllMemoryBarrier(); } diff --git a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl index 6ddb69671e094..b4a3371f7628f 100644 --- a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl +++ b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl @@ -11,7 +11,7 @@ // CHECK-SPIRV: define hidden spir_func void @ void test_AllMemoryBarrierWithGroupSync() { // CHECK-DXIL: call void @llvm.[[TARGET]].all.memory.barrier.with.group.sync() -// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].all.memory.barrier.with.group.sync() +// CHECK-SPIRV: call void @llvm.[[TARGET]].all.memory.barrier.with.group.sync() AllMemoryBarrierWithGroupSync(); } diff --git a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl index e2c08f7775c8c..d9613aedc1cc6 100644 --- a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl +++ b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl @@ -11,7 +11,7 @@ // CHECK-SPIRV: define hidden spir_func void @ void test_DeviceMemoryBarrier() { // CHECK-DXIL: call void @llvm.[[TARGET]].device.memory.barrier() -// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].device.memory.barrier() +// CHECK-SPIRV: call void @llvm.[[TARGET]].device.memory.barrier() DeviceMemoryBarrier(); } diff --git a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl index fa455f5f8338b..bea7d7391aec2 100644 --- a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl +++ b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl @@ -11,7 +11,7 @@ // CHECK-SPIRV: define hidden spir_func void @ void test_DeviceMemoryBarrierWithGroupSync() { // CHECK-DXIL: call void @llvm.[[TARGET]].device.memory.barrier.with.group.sync() -// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].device.memory.barrier.with.group.sync() +// CHECK-SPIRV: call void @llvm.[[TARGET]].device.memory.barrier.with.group.sync() DeviceMemoryBarrierWithGroupSync(); } diff --git a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl index b52819973f677..d33baeac940b6 100644 --- a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl +++ b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl @@ -11,7 +11,7 @@ // CHECK-SPIRV: define hidden spir_func void @ void test_GroupMemoryBarrier() { // CHECK-DXIL: call void @llvm.[[TARGET]].group.memory.barrier() -// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].group.memory.barrier() +// CHECK-SPIRV: call void @llvm.[[TARGET]].group.memory.barrier() GroupMemoryBarrier(); } diff --git a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl index e709ed3616f0d..b69f67cb8dfaa 100644 --- a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl +++ b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl @@ -11,7 +11,7 @@ // CHECK-SPIRV: define hidden spir_func void @ void test_GroupMemoryBarrierWithGroupSync() { // CHECK-DXIL: call void @llvm.[[TARGET]].group.memory.barrier.with.group.sync() -// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].group.memory.barrier.with.group.sync() +// CHECK-SPIRV: call void @llvm.[[TARGET]].group.memory.barrier.with.group.sync() GroupMemoryBarrierWithGroupSync(); } diff --git a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl index 54dd82b9fd485..f6bf05e524964 100644 --- a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl +++ b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl @@ -1,169 +1,169 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-compute %s -fnative-half-type -fnative-int16-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=dx -DCC="" +// RUN: --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=dx -DCC="" +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type -fnative-int16-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=spv -DCC="spir_func " +// RUN: --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=spv // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=spv -DCC="spir_func " +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=spv -// CHECK: %[[RET:.*]] = call [[CC]]i32 @llvm.[[TARGET]].quad.read.across.x.i32(i32 %[[#]]) +// CHECK: %[[RET:.*]] = call i32 @llvm.[[TARGET]].quad.read.across.x.i32(i32 %[[#]]) // CHECK: ret i32 %[[RET]] int test_int(int expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> @llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i32> @llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]]) // CHECK: ret <2 x i32> %[[RET]] int2 test_int2(int2 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> @llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i32> @llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]]) // CHECK: ret <3 x i32> %[[RET]] int3 test_int3(int3 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> @llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i32> @llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]]) // CHECK: ret <4 x i32> %[[RET]] int4 test_int4(int4 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]i32 @llvm.[[TARGET]].quad.read.across.x.i32(i32 %[[#]]) +// CHECK: %[[RET:.*]] = call i32 @llvm.[[TARGET]].quad.read.across.x.i32(i32 %[[#]]) // CHECK: ret i32 %[[RET]] uint test_uint(uint expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> @llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i32> @llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]]) // CHECK: ret <2 x i32> %[[RET]] uint2 test_uint2(uint2 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> @llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i32> @llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]]) // CHECK: ret <3 x i32> %[[RET]] uint3 test_uint3(uint3 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> @llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i32> @llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]]) // CHECK: ret <4 x i32> %[[RET]] uint4 test_uint4(uint4 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]i64 @llvm.[[TARGET]].quad.read.across.x.i64(i64 %[[#]]) +// CHECK: %[[RET:.*]] = call i64 @llvm.[[TARGET]].quad.read.across.x.i64(i64 %[[#]]) // CHECK: ret i64 %[[RET]] int64_t test_int64_t(int64_t expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> @llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i64> @llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]]) // CHECK: ret <2 x i64> %[[RET]] int64_t2 test_int64_t2(int64_t2 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> @llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i64> @llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]]) // CHECK: ret <3 x i64> %[[RET]] int64_t3 test_int64_t3(int64_t3 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> @llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i64> @llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]]) // CHECK: ret <4 x i64> %[[RET]] int64_t4 test_int64_t4(int64_t4 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]i64 @llvm.[[TARGET]].quad.read.across.x.i64(i64 %[[#]]) +// CHECK: %[[RET:.*]] = call i64 @llvm.[[TARGET]].quad.read.across.x.i64(i64 %[[#]]) // CHECK: ret i64 %[[RET]] uint64_t test_uint64_t(uint64_t expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> @llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i64> @llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]]) // CHECK: ret <2 x i64> %[[RET]] uint64_t2 test_uint64_t2(uint64_t2 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> @llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i64> @llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]]) // CHECK: ret <3 x i64> %[[RET]] uint64_t3 test_uint64_t3(uint64_t3 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> @llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i64> @llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]]) // CHECK: ret <4 x i64> %[[RET]] uint64_t4 test_uint64_t4(uint64_t4 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float @llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]]) // CHECK: ret float %[[RET]] float test_float(float expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> @llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]]) // CHECK: ret <2 x float> %[[RET]] float2 test_float2(float2 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> @llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]]) // CHECK: ret <3 x float> %[[RET]] float3 test_float3(float3 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> @llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]]) // CHECK: ret <4 x float> %[[RET]] float4 test_float4(float4 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]double @llvm.[[TARGET]].quad.read.across.x.f64(double %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn double @llvm.[[TARGET]].quad.read.across.x.f64(double %[[#]]) // CHECK: ret double %[[RET]] double test_double(double expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x double> @llvm.[[TARGET]].quad.read.across.x.v2f64(<2 x double> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.[[TARGET]].quad.read.across.x.v2f64(<2 x double> %[[#]]) // CHECK: ret <2 x double> %[[RET]] double2 test_double2(double2 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x double> @llvm.[[TARGET]].quad.read.across.x.v3f64(<3 x double> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.[[TARGET]].quad.read.across.x.v3f64(<3 x double> %[[#]]) // CHECK: ret <3 x double> %[[RET]] double3 test_double3(double3 expr) { return QuadReadAcrossX(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x double> @llvm.[[TARGET]].quad.read.across.x.v4f64(<4 x double> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].quad.read.across.x.v4f64(<4 x double> %[[#]]) // CHECK: ret <4 x double> %[[RET]] double4 test_double4(double4 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]half @llvm.[[TARGET]].quad.read.across.x.f16(half %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].quad.read.across.x.f16(half %[[#]]) // CHECK-NATIVE_HALF: ret half %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float @llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]]) // CHECK-NO_HALF: ret float %[[RET]] half test_half(half expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x half> @llvm.[[TARGET]].quad.read.across.x.v2f16(<2 x half> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[TARGET]].quad.read.across.x.v2f16(<2 x half> %[[#]]) // CHECK-NATIVE_HALF: ret <2 x half> %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> @llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]]) // CHECK-NO_HALF: ret <2 x float> %[[RET]] half2 test_half2(half2 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x half> @llvm.[[TARGET]].quad.read.across.x.v3f16(<3 x half> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[TARGET]].quad.read.across.x.v3f16(<3 x half> %[[#]]) // CHECK-NATIVE_HALF: ret <3 x half> %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> @llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]]) // CHECK-NO_HALF: ret <3 x float> %[[RET]] half3 test_half3(half3 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x half> @llvm.[[TARGET]].quad.read.across.x.v4f16(<4 x half> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].quad.read.across.x.v4f16(<4 x half> %[[#]]) // CHECK-NATIVE_HALF: ret <4 x half> %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> @llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]]) // CHECK-NO_HALF: ret <4 x float> %[[RET]] half4 test_half4(half4 expr) { return QuadReadAcrossX(expr); } #ifdef __HLSL_ENABLE_16_BIT -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 @llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 @llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]]) // CHECK-NATIVE_HALF: ret i16 %[[RET]] int16_t test_int16_t(int16_t expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> @llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> @llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]] int16_t2 test_int16_t2(int16_t2 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> @llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> @llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]] int16_t3 test_int16_t3(int16_t3 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> @llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> @llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]] int16_t4 test_int16_t4(int16_t4 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 @llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 @llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]]) // CHECK-NATIVE_HALF: ret i16 %[[RET]] uint16_t test_uint16_t(uint16_t expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> @llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> @llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]] uint16_t2 test_uint16_t2(uint16_t2 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> @llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> @llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]] uint16_t3 test_uint16_t3(uint16_t3 expr) { return QuadReadAcrossX(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> @llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> @llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]] uint16_t4 test_uint16_t4(uint16_t4 expr) { return QuadReadAcrossX(expr); } #endif diff --git a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl index 313c287dc1a7d..95ecd575e56fc 100644 --- a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl +++ b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl @@ -16,7 +16,7 @@ // Capture the expected interchange format so not every check needs to be duplicated // CHECK-DXIL: %[[RET:.*]] = call [[CC:]]i32 @llvm.[[ICF:dx]].quad.read.across.y.i32(i32 %[[#]]) -// CHECK-SPIRV: %[[RET:.*]] = call [[CC:spir_func ]]i32 @llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]]) +// CHECK-SPIRV: %[[RET:.*]] = call [[CC:]]i32 @llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]]) // CHECK: ret i32 %[[RET]] int test_int(int expr) { return QuadReadAcrossY(expr); } diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl index 323aa439984f9..f8bcdfdb3333f 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl @@ -9,7 +9,7 @@ // CHECK-LABEL: test_int bool test_int(int expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func i1 @llvm.spv.wave.all.equal.i32(i32 + // CHECK-SPIRV: %[[RET:.*]] = call i1 @llvm.spv.wave.all.equal.i32(i32 // CHECK-DXIL: %[[RET:.*]] = call i1 @llvm.dx.wave.all.equal.i32(i32 // CHECK: ret i1 %[[RET]] return WaveActiveAllEqual(expr); @@ -20,7 +20,7 @@ bool test_int(int expr) { // CHECK-LABEL: test_uint64_t bool test_uint64_t(uint64_t expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func i1 @llvm.spv.wave.all.equal.i64(i64 + // CHECK-SPIRV: %[[RET:.*]] = call i1 @llvm.spv.wave.all.equal.i64(i64 // CHECK-DXIL: %[[RET:.*]] = call i1 @llvm.dx.wave.all.equal.i64(i64 // CHECK: ret i1 %[[RET]] return WaveActiveAllEqual(expr); @@ -33,7 +33,7 @@ bool test_uint64_t(uint64_t expr) { // CHECK-LABEL: test_floatv4 bool4 test_floatv4(float4 expr) { - // CHECK-SPIRV: %[[RET1:.*]] = call spir_func <4 x i1> @llvm.spv.wave.all.equal.v4f32(<4 x float> + // CHECK-SPIRV: %[[RET1:.*]] = call <4 x i1> @llvm.spv.wave.all.equal.v4f32(<4 x float> // CHECK-DXIL: %[[RET1:.*]] = call <4 x i1> @llvm.dx.wave.all.equal.v4f32(<4 x float> // CHECK: ret <4 x i1> %[[RET1]] return WaveActiveAllEqual(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl index f499fc97f43fc..94060ceb97e66 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl @@ -10,7 +10,7 @@ // CHECK-LABEL: define {{.*}}test bool test(bool p1) { // CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func i1 @llvm.spv.wave.all(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call i1 @llvm.spv.wave.all(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ] // CHECK-DXIL: %[[RET:.*]] = call i1 @llvm.dx.wave.all(i1 %{{[a-zA-Z0-9]+}}) // CHECK: ret i1 %[[RET]] return WaveActiveAllTrue(p1); diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl index 3655cdb443fa9..c4b8239448f2c 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl @@ -10,7 +10,7 @@ // CHECK-LABEL: define {{.*}}test bool test(bool p1) { // CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func i1 @llvm.spv.wave.any(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call i1 @llvm.spv.wave.any(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ] // CHECK-DXIL: %[[RET:.*]] = call i1 @llvm.dx.wave.any(i1 %{{[a-zA-Z0-9]+}}) // CHECK: ret i1 %[[RET]] return WaveActiveAnyTrue(p1); diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl index df2d854a64247..4c7d5cd2a1c4a 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl @@ -10,7 +10,7 @@ // CHECK-LABEL: define {{.*}}test uint4 test(bool p1) { // CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[SPIRVRET:.*]] = call spir_func <4 x i32> @llvm.spv.subgroup.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ] + // CHECK-SPIRV: %[[SPIRVRET:.*]] = call <4 x i32> @llvm.spv.subgroup.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ] // CHECK-DXIL: %[[WAB:.*]] = call { i32, i32, i32, i32 } @llvm.dx.wave.ballot.i32(i1 %{{[a-zA-Z0-9]+}}) // CHECK-DXIL-NEXT: extractvalue { i32, i32, i32, i32 } {{.*}} 0 // CHECK-DXIL-NEXT: insertelement <4 x i32> poison, i32 {{.*}}, i32 0 diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl index a6da9678d7275..1e1801e49540a 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ // RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func" +// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call" // Test basic lowering to runtime function call. diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl index 80364724448fa..e7531b6013166 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ // RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func" +// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call" // Test basic lowering to runtime function call. diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl index 9c94663390843..b03cb51c6df04 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ // RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func" +// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call" // Test basic lowering to runtime function call. diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl index be05a17cc3692..a4628ad103e0d 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl @@ -9,7 +9,7 @@ // CHECK-LABEL: test_int int test_int(int expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.max.i32([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.reduce.max.i32([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.max.i32([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveMax(expr); @@ -20,7 +20,7 @@ int test_int(int expr) { // CHECK-LABEL: test_uint64_t uint64_t test_uint64_t(uint64_t expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.umax.i64([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.reduce.umax.i64([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.umax.i64([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveMax(expr); @@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr) { - // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.reduce.max.v4f32([[TY1]] %[[#]] + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.spv.wave.reduce.max.v4f32([[TY1]] %[[#]] // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.reduce.max.v4f32([[TY1]] %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WaveActiveMax(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl index 1194f842deed6..f2e3686947f51 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl @@ -9,7 +9,7 @@ // CHECK-LABEL: test_int int test_int(int expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.min.i32([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.reduce.min.i32([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.min.i32([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveMin(expr); @@ -20,7 +20,7 @@ int test_int(int expr) { // CHECK-LABEL: test_uint64_t uint64_t test_uint64_t(uint64_t expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.umin.i64([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.reduce.umin.i64([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.umin.i64([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveMin(expr); @@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr) { - // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.reduce.min.v4f32([[TY1]] %[[#]] + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.spv.wave.reduce.min.v4f32([[TY1]] %[[#]] // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.reduce.min.v4f32([[TY1]] %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WaveActiveMin(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl index 3a8320e7333fc..0247b7cbeb0f6 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl @@ -9,7 +9,7 @@ // CHECK-LABEL: test_int int test_int(int expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.product.i32([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.product.i32([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.product.i32([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveProduct(expr); @@ -20,7 +20,7 @@ int test_int(int expr) { // CHECK-LABEL: test_uint64_t uint64_t test_uint64_t(uint64_t expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.product.i64([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.product.i64([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.uproduct.i64([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveProduct(expr); @@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr) { - // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.product.v4f32([[TY1]] %[[#]] + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.spv.wave.product.v4f32([[TY1]] %[[#]] // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.product.v4f32([[TY1]] %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WaveActiveProduct(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl index 1fc93c62c8db0..6caa3d775f0d2 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl @@ -9,7 +9,7 @@ // CHECK-LABEL: test_int int test_int(int expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.sum.i32([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.reduce.sum.i32([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.sum.i32([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveSum(expr); @@ -20,7 +20,7 @@ int test_int(int expr) { // CHECK-LABEL: test_uint64_t uint64_t test_uint64_t(uint64_t expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.sum.i64([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.reduce.sum.i64([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.usum.i64([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveActiveSum(expr); @@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr) { - // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]] %[[#]] + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]] %[[#]] // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.reduce.sum.v4f32([[TY1]] %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WaveActiveSum(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl b/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl index 25d9074b08a68..bfd42740ac4ed 100644 --- a/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl @@ -18,7 +18,7 @@ int test_int(bool expr) { // CHECK: %[[LOADEDVAL:.*]] = load i32, ptr %[[EXPRADDR]], align 4 // CHECK: %[[TRUNCLOADEDVAL:.*]] = icmp ne i32 %[[LOADEDVAL]], 0 - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.subgroup.prefix.bit.count(i1 %[[TRUNCLOADEDVAL]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.subgroup.prefix.bit.count(i1 %[[TRUNCLOADEDVAL]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.prefix.bit.count(i1 %[[TRUNCLOADEDVAL]]) // CHECK: ret [[TY]] %[[RET]] return WavePrefixCountBits(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl b/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl index a45cbf29b87f2..a4dc01527a7f2 100644 --- a/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl @@ -9,7 +9,7 @@ // CHECK-LABEL: test_int int test_int(int expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.prefix.product.i32([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.prefix.product.i32([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.prefix.product.i32([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WavePrefixProduct(expr); @@ -20,7 +20,7 @@ int test_int(int expr) { // CHECK-LABEL: test_uint64_t uint64_t test_uint64_t(uint64_t expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.prefix.product.i64([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.prefix.product.i64([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.prefix.uproduct.i64([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WavePrefixProduct(expr); @@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr) { - // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.prefix.product.v4f32([[TY1]] %[[#]] + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.spv.wave.prefix.product.v4f32([[TY1]] %[[#]] // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.prefix.product.v4f32([[TY1]] %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WavePrefixProduct(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl b/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl index f22aa69ba45d5..a1df3fe02c802 100644 --- a/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl @@ -9,7 +9,7 @@ // CHECK-LABEL: test_int int test_int(int expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.prefix.sum.i32([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.prefix.sum.i32([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.prefix.sum.i32([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WavePrefixSum(expr); @@ -20,7 +20,7 @@ int test_int(int expr) { // CHECK-LABEL: test_uint64_t uint64_t test_uint64_t(uint64_t expr) { - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.prefix.sum.i64([[TY]] %[[#]]) + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.prefix.sum.i64([[TY]] %[[#]]) // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.prefix.usum.i64([[TY]] %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WavePrefixSum(expr); @@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr) { - // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.prefix.sum.v4f32([[TY1]] %[[#]] + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.spv.wave.prefix.sum.v4f32([[TY1]] %[[#]] // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.prefix.sum.v4f32([[TY1]] %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WavePrefixSum(expr); diff --git a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl index da6cbc40a79bb..24252f3fa3207 100644 --- a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl +++ b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl @@ -10,7 +10,7 @@ // CHECK-LABEL: test_int int test_int(int expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -22,7 +22,7 @@ int test_int(int expr, uint idx) { // CHECK-LABEL: test_uint uint test_uint(uint expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -31,7 +31,7 @@ uint test_uint(uint expr, uint idx) { // CHECK-LABEL: test_int64_t int64_t test_int64_t(int64_t expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -43,7 +43,7 @@ int64_t test_int64_t(int64_t expr, uint idx) { // CHECK-LABEL: test_uint64_t uint64_t test_uint64_t(uint64_t expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok0]]) ] // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -53,7 +53,7 @@ uint64_t test_uint64_t(uint64_t expr, uint idx) { // CHECK-LABEL: test_int16 int16_t test_int16(int16_t expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok1:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok1]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok1]]) ] // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -65,7 +65,7 @@ int16_t test_int16(int16_t expr, uint idx) { // CHECK-LABEL: test_uint16 uint16_t test_uint16(uint16_t expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok1:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok1]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call [[TY:.*]] @llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok1]]) ] // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -77,7 +77,7 @@ uint16_t test_uint16(uint16_t expr, uint idx) { // CHECK-LABEL: test_half half test_half(half expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok2:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY:.*]] @llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok2]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok2]]) ] // CHECK-DXIL: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.dx.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -89,7 +89,7 @@ half test_half(half expr, uint idx) { // CHECK-LABEL: test_double double test_double(double expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok3:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY:.*]] @llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok3]]) ] + // CHECK-SPIRV: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok3]]) ] // CHECK-DXIL: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] @llvm.dx.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY]] %[[RET]] return WaveReadLaneAt(expr, idx); @@ -101,7 +101,7 @@ double test_double(double expr, uint idx) { // CHECK-LABEL: test_floatv4 float4 test_floatv4(float4 expr, uint idx) { // CHECK-SPIRV: %[[#entry_tok4:]] = call token @llvm.experimental.convergence.entry() - // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok4]]) ] + // CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) [ "convergencectrl"(token %[[#entry_tok4]]) ] // CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) // CHECK: ret [[TY1]] %[[RET1]] return WaveReadLaneAt(expr, idx); diff --git a/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl index 8072f6d4ea206..fdf019262d8cb 100644 --- a/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl +++ b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl @@ -14,13 +14,13 @@ void main() { while (a) { // CHECK-DXIL: %[[#]] = call i32 @llvm.dx.wave.get.lane.count() -// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count() +// CHECK-SPIRV: %[[#]] = call i32 @llvm.spv.wave.get.lane.count() // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#loop_tok]]) ] a = WaveGetLaneCount(); } // CHECK-DXIL: %[[#]] = call i32 @llvm.dx.wave.get.lane.count() -// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count() +// CHECK-SPIRV: %[[#]] = call i32 @llvm.spv.wave.get.lane.count() // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#entry_tok]]) ] b = WaveGetLaneCount(); } diff --git a/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl b/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl index 2fb6defb896f9..18860c321eb91 100644 --- a/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl +++ b/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl @@ -13,7 +13,7 @@ void main() { while (true) { // CHECK-DXIL: %[[#]] = call i1 @llvm.dx.wave.is.first.lane() -// CHECK-SPIRV: %[[#]] = call spir_func i1 @llvm.spv.wave.is.first.lane() +// CHECK-SPIRV: %[[#]] = call i1 @llvm.spv.wave.is.first.lane() // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#loop_tok]]) ] if (WaveIsFirstLane()) { break; @@ -21,7 +21,7 @@ void main() { } // CHECK-DXIL: %[[#]] = call i1 @llvm.dx.wave.is.first.lane() -// CHECK-SPIRV: %[[#]] = call spir_func i1 @llvm.spv.wave.is.first.lane() +// CHECK-SPIRV: %[[#]] = call i1 @llvm.spv.wave.is.first.lane() // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#entry_tok]]) ] if (WaveIsFirstLane()) { return; >From ef4633d82c5ed0888841a4fbb97327e614e8d1d2 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Wed, 13 May 2026 10:11:02 +0200 Subject: [PATCH 2/3] Add new EmitIntrinsicCall and use in CGHLSLBuiltins --- clang/lib/CodeGen/CGCall.cpp | 25 ++++- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 104 +++++++++--------- clang/lib/CodeGen/CodeGenFunction.h | 5 + .../CodeGenHLSL/builtins/QuadReadAcrossY.hlsl | 82 +++++++------- 4 files changed, 118 insertions(+), 98 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 73e189bec93ff..b698d4489b1cf 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5233,17 +5233,32 @@ llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { llvm::CallInst *call = Builder.CreateCall( callee, args, getBundlesForFunclet(callee.getCallee()), name); - // Intrinsics must use CallingConv::C; only apply the runtime CC to - // non-intrinsic callees. - if (auto *F = dyn_cast<llvm::Function>(callee.getCallee()); - !F || !F->isIntrinsic()) - call->setCallingConv(getRuntimeCC()); + call->setCallingConv(getRuntimeCC()); if (CGM.shouldEmitConvergenceTokens() && call->isConvergent()) return cast<llvm::CallInst>(addConvergenceControlToken(call)); return call; } +llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee, + const llvm::Twine &Name) { + return EmitIntrinsicCall(Callee, {}, Name); +} + +llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee, + ArrayRef<llvm::Value *> Args, + const llvm::Twine &Name) { + assert(dyn_cast<llvm::Function>(Callee.getCallee()) && + cast<llvm::Function>(Callee.getCallee())->isIntrinsic() && + "EmitIntrinsicCall called with non-intrinsic callee"); + llvm::CallInst *Call = Builder.CreateCall( + Callee, Args, getBundlesForFunclet(Callee.getCallee()), Name); + + if (CGM.shouldEmitConvergenceTokens() && Call->isConvergent()) + return cast<llvm::CallInst>(addConvergenceControlToken(Call)); + return Call; +} + /// Emits a call or invoke to the given noreturn runtime function. void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke( llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args) { diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 82b03d7d5f069..2e672442f281a 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -174,7 +174,7 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF, // Call DXIL intrinsic: returns { i32, i32, i32, i32 } llvm::Function *Fn = CGF.CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32}); - Value *StructVal = CGF.EmitRuntimeCall(Fn, Cond); + Value *StructVal = CGF.EmitIntrinsicCall(Fn, Cond); assert(StructVal->getType() == Struct4I32 && "dx.wave.ballot must return {i32,i32,i32,i32}"); @@ -190,7 +190,7 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF, } if (CGF.CGM.getTarget().getTriple().isSPIRV()) - return CGF.EmitRuntimeCall( + return CGF.EmitIntrinsicCall( CGF.CGM.getIntrinsic(Intrinsic::spv_subgroup_ballot), Cond); llvm_unreachable( @@ -1288,7 +1288,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID = getPrefixCountBitsIntrinsic(getTarget().getTriple().getArch()); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), IID), ArrayRef{Op}, "hlsl.wave.prefix.bit.count"); } @@ -1335,9 +1335,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Value *Op = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - {Op}); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), ID, {Op->getType()}), + {Op}); } case Builtin::BI__builtin_hlsl_wave_active_all_true: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1345,7 +1345,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "Intrinsic WaveActiveAllTrue operand must be a bool"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); } case Builtin::BI__builtin_hlsl_wave_active_any_true: { @@ -1354,7 +1354,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "Intrinsic WaveActiveAnyTrue operand must be a bool"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); } case Builtin::BI__builtin_hlsl_wave_active_bit_or: { @@ -1364,9 +1364,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "representation"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - ArrayRef{Op}, "hlsl.wave.active.bit.or"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), ID, {Op->getType()}), + ArrayRef{Op}, "hlsl.wave.active.bit.or"); } case Builtin::BI__builtin_hlsl_wave_active_bit_xor: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1375,9 +1375,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "representation"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - ArrayRef{Op}, "hlsl.wave.active.bit.xor"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), ID, {Op->getType()}), + ArrayRef{Op}, "hlsl.wave.active.bit.xor"); } case Builtin::BI__builtin_hlsl_wave_active_bit_and: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1386,9 +1386,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "representation"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - ArrayRef{Op}, "hlsl.wave.active.bit.and"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), ID, {Op->getType()}), + ArrayRef{Op}, "hlsl.wave.active.bit.and"); } case Builtin::BI__builtin_hlsl_wave_active_ballot: { [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1400,7 +1400,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, case Builtin::BI__builtin_hlsl_wave_active_count_bits: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), ArrayRef{OpExpr}); } @@ -1410,9 +1410,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID = getWaveActiveSumIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.sum"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), IID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.wave.active.sum"); } case Builtin::BI__builtin_hlsl_wave_active_product: { // Due to the use of variadic arguments, explicitly retrieve argument @@ -1420,9 +1420,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID = getWaveActiveProductIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.product"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), IID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.wave.active.product"); } case Builtin::BI__builtin_hlsl_wave_active_max: { // Due to the use of variadic arguments, explicitly retrieve argument @@ -1434,9 +1434,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, else IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.max"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), IID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.wave.active.max"); } case Builtin::BI__builtin_hlsl_wave_active_min: { // Due to the use of variadic arguments, explicitly retrieve argument @@ -1448,9 +1448,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, else IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.min"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), IID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.wave.active.min"); } case Builtin::BI__builtin_hlsl_wave_get_lane_index: { // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in @@ -1458,7 +1458,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, // for the DirectX intrinsic and the demangled builtin name switch (CGM.getTarget().getTriple().getArch()) { case llvm::Triple::dxil: - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( &CGM.getModule(), Intrinsic::dx_wave_getlaneindex)); case llvm::Triple::spirv: return EmitRuntimeCall(CGM.CreateRuntimeFunction( @@ -1471,12 +1471,12 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, } case Builtin::BI__builtin_hlsl_wave_is_first_lane: { Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_wave_get_lane_count: { Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_wave_read_lane_at: { @@ -1484,7 +1484,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, // create our function type. Value *OpExpr = EmitScalarExpr(E->getArg(0)); Value *OpIndex = EmitScalarExpr(E->getArg(1)); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration( &CGM.getModule(), CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(), {OpExpr->getType()}), @@ -1494,31 +1494,31 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID IID = getWavePrefixSumIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.prefix.sum"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), IID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.wave.prefix.sum"); } case Builtin::BI__builtin_hlsl_wave_prefix_product: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID IID = getWavePrefixProductIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.prefix.product"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), IID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.wave.prefix.product"); } case Builtin::BI__builtin_hlsl_quad_read_across_x: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.quad.read.across.x"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), ID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.quad.read.across.x"); } case Builtin::BI__builtin_hlsl_quad_read_across_y: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic(); - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.quad.read.across.y"); + return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), ID, {OpExpr->getType()}), + ArrayRef{OpExpr}, "hlsl.quad.read.across.y"); } case Builtin::BI__builtin_hlsl_elementwise_sign: { auto *Arg0 = E->getArg(0); @@ -1576,35 +1576,35 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return handleHlslClip(E, this); case Builtin::BI__builtin_hlsl_all_memory_barrier: { Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: { Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_device_memory_barrier: { Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: { Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_group_memory_barrier: { Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: { Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic(); - return EmitRuntimeCall( + return EmitIntrinsicCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e0f8e62fb53af..7b2f0a5c5e3a2 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4610,6 +4610,11 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args, const Twine &name = ""); + llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee, + const Twine &Name = ""); + llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee, + ArrayRef<llvm::Value *> Args, + const Twine &Name = ""); llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name = ""); llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee, diff --git a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl index 95ecd575e56fc..9d70545f90a28 100644 --- a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl +++ b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl @@ -15,157 +15,157 @@ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV,CHECK-NO_HALF // Capture the expected interchange format so not every check needs to be duplicated -// CHECK-DXIL: %[[RET:.*]] = call [[CC:]]i32 @llvm.[[ICF:dx]].quad.read.across.y.i32(i32 %[[#]]) -// CHECK-SPIRV: %[[RET:.*]] = call [[CC:]]i32 @llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]]) +// CHECK-DXIL: %[[RET:.*]] = call i32 @llvm.[[ICF:dx]].quad.read.across.y.i32(i32 %[[#]]) +// CHECK-SPIRV: %[[RET:.*]] = call i32 @llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]]) // CHECK: ret i32 %[[RET]] int test_int(int expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> @llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i32> @llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]]) // CHECK: ret <2 x i32> %[[RET]] int2 test_int2(int2 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> @llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i32> @llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]]) // CHECK: ret <3 x i32> %[[RET]] int3 test_int3(int3 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> @llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i32> @llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]]) // CHECK: ret <4 x i32> %[[RET]] int4 test_int4(int4 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]i32 @llvm.[[ICF]].quad.read.across.y.i32(i32 %[[#]]) +// CHECK: %[[RET:.*]] = call i32 @llvm.[[ICF]].quad.read.across.y.i32(i32 %[[#]]) // CHECK: ret i32 %[[RET]] uint test_uint(uint expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> @llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i32> @llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]]) // CHECK: ret <2 x i32> %[[RET]] uint2 test_uint2(uint2 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> @llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i32> @llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]]) // CHECK: ret <3 x i32> %[[RET]] uint3 test_uint3(uint3 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> @llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i32> @llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]]) // CHECK: ret <4 x i32> %[[RET]] uint4 test_uint4(uint4 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]i64 @llvm.[[ICF]].quad.read.across.y.i64(i64 %[[#]]) +// CHECK: %[[RET:.*]] = call i64 @llvm.[[ICF]].quad.read.across.y.i64(i64 %[[#]]) // CHECK: ret i64 %[[RET]] int64_t test_int64_t(int64_t expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> @llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i64> @llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]]) // CHECK: ret <2 x i64> %[[RET]] int64_t2 test_int64_t2(int64_t2 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> @llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i64> @llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]]) // CHECK: ret <3 x i64> %[[RET]] int64_t3 test_int64_t3(int64_t3 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> @llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i64> @llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]]) // CHECK: ret <4 x i64> %[[RET]] int64_t4 test_int64_t4(int64_t4 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]i64 @llvm.[[ICF]].quad.read.across.y.i64(i64 %[[#]]) +// CHECK: %[[RET:.*]] = call i64 @llvm.[[ICF]].quad.read.across.y.i64(i64 %[[#]]) // CHECK: ret i64 %[[RET]] uint64_t test_uint64_t(uint64_t expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> @llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <2 x i64> @llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]]) // CHECK: ret <2 x i64> %[[RET]] uint64_t2 test_uint64_t2(uint64_t2 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> @llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <3 x i64> @llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]]) // CHECK: ret <3 x i64> %[[RET]] uint64_t3 test_uint64_t3(uint64_t3 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> @llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]]) +// CHECK: %[[RET:.*]] = call <4 x i64> @llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]]) // CHECK: ret <4 x i64> %[[RET]] uint64_t4 test_uint64_t4(uint64_t4 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float @llvm.[[ICF]].quad.read.across.y.f32(float %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].quad.read.across.y.f32(float %[[#]]) // CHECK: ret float %[[RET]] float test_float(float expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> @llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]]) // CHECK: ret <2 x float> %[[RET]] float2 test_float2(float2 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> @llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]]) // CHECK: ret <3 x float> %[[RET]] float3 test_float3(float3 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> @llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]]) // CHECK: ret <4 x float> %[[RET]] float4 test_float4(float4 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]double @llvm.[[ICF]].quad.read.across.y.f64(double %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn double @llvm.[[ICF]].quad.read.across.y.f64(double %[[#]]) // CHECK: ret double %[[RET]] double test_double(double expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x double> @llvm.[[ICF]].quad.read.across.y.v2f64(<2 x double> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.[[ICF]].quad.read.across.y.v2f64(<2 x double> %[[#]]) // CHECK: ret <2 x double> %[[RET]] double2 test_double2(double2 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x double> @llvm.[[ICF]].quad.read.across.y.v3f64(<3 x double> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.[[ICF]].quad.read.across.y.v3f64(<3 x double> %[[#]]) // CHECK: ret <3 x double> %[[RET]] double3 test_double3(double3 expr) { return QuadReadAcrossY(expr); } -// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x double> @llvm.[[ICF]].quad.read.across.y.v4f64(<4 x double> %[[#]]) +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[ICF]].quad.read.across.y.v4f64(<4 x double> %[[#]]) // CHECK: ret <4 x double> %[[RET]] double4 test_double4(double4 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]half @llvm.[[ICF]].quad.read.across.y.f16(half %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.[[ICF]].quad.read.across.y.f16(half %[[#]]) // CHECK-NATIVE_HALF: ret half %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float @llvm.[[ICF]].quad.read.across.y.f32(float %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.[[ICF]].quad.read.across.y.f32(float %[[#]]) // CHECK-NO_HALF: ret float %[[RET]] half test_half(half expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x half> @llvm.[[ICF]].quad.read.across.y.v2f16(<2 x half> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.[[ICF]].quad.read.across.y.v2f16(<2 x half> %[[#]]) // CHECK-NATIVE_HALF: ret <2 x half> %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> @llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]]) // CHECK-NO_HALF: ret <2 x float> %[[RET]] half2 test_half2(half2 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x half> @llvm.[[ICF]].quad.read.across.y.v3f16(<3 x half> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.[[ICF]].quad.read.across.y.v3f16(<3 x half> %[[#]]) // CHECK-NATIVE_HALF: ret <3 x half> %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> @llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]]) // CHECK-NO_HALF: ret <3 x float> %[[RET]] half3 test_half3(half3 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x half> @llvm.[[ICF]].quad.read.across.y.v4f16(<4 x half> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[ICF]].quad.read.across.y.v4f16(<4 x half> %[[#]]) // CHECK-NATIVE_HALF: ret <4 x half> %[[RET]] -// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> @llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]]) +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]]) // CHECK-NO_HALF: ret <4 x float> %[[RET]] half4 test_half4(half4 expr) { return QuadReadAcrossY(expr); } #ifdef __HLSL_ENABLE_16_BIT -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 @llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 @llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]]) // CHECK-NATIVE_HALF: ret i16 %[[RET]] int16_t test_int16_t(int16_t expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> @llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> @llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]] int16_t2 test_int16_t2(int16_t2 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> @llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> @llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]] int16_t3 test_int16_t3(int16_t3 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> @llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> @llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]] int16_t4 test_int16_t4(int16_t4 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 @llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 @llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]]) // CHECK-NATIVE_HALF: ret i16 %[[RET]] uint16_t test_uint16_t(uint16_t expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> @llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> @llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]] uint16_t2 test_uint16_t2(uint16_t2 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> @llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> @llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]] uint16_t3 test_uint16_t3(uint16_t3 expr) { return QuadReadAcrossY(expr); } -// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> @llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]]) +// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> @llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]]) // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]] uint16_t4 test_uint16_t4(uint16_t4 expr) { return QuadReadAcrossY(expr); } #endif >From 4d8d384168d31dc73ddecf1b0720434d5f4a9dda Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Wed, 13 May 2026 11:18:47 +0200 Subject: [PATCH 3/3] pass Intrinsic::ID to EmitIntrinsicCall --- clang/lib/CodeGen/CGCall.cpp | 21 +++-- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 116 ++++++++++----------------- clang/lib/CodeGen/CodeGenFunction.h | 8 +- 3 files changed, 61 insertions(+), 84 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b698d4489b1cf..1b420049fffc1 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5240,20 +5240,25 @@ llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, return call; } -llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee, +llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::Intrinsic::ID ID, const llvm::Twine &Name) { - return EmitIntrinsicCall(Callee, {}, Name); + return EmitIntrinsicCall(ID, {}, {}, Name); } -llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee, +llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::Intrinsic::ID ID, ArrayRef<llvm::Value *> Args, const llvm::Twine &Name) { - assert(dyn_cast<llvm::Function>(Callee.getCallee()) && - cast<llvm::Function>(Callee.getCallee())->isIntrinsic() && - "EmitIntrinsicCall called with non-intrinsic callee"); - llvm::CallInst *Call = Builder.CreateCall( - Callee, Args, getBundlesForFunclet(Callee.getCallee()), Name); + return EmitIntrinsicCall(ID, {}, Args, Name); +} +llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::Intrinsic::ID ID, + ArrayRef<llvm::Type *> Types, + ArrayRef<llvm::Value *> Args, + const llvm::Twine &Name) { + llvm::Function *F = + llvm::Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID, Types); + llvm::CallInst *Call = + Builder.CreateCall(F, Args, getBundlesForFunclet(F), Name); if (CGM.shouldEmitConvergenceTokens() && Call->isConvergent()) return cast<llvm::CallInst>(addConvergenceControlToken(Call)); return Call; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 2e672442f281a..a4cd28f97b6d6 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -172,9 +172,8 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF, if (CGF.CGM.getTarget().getTriple().isDXIL()) { // Call DXIL intrinsic: returns { i32, i32, i32, i32 } - llvm::Function *Fn = CGF.CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32}); - - Value *StructVal = CGF.EmitIntrinsicCall(Fn, Cond); + Value *StructVal = + CGF.EmitIntrinsicCall(Intrinsic::dx_wave_ballot, {I32}, {Cond}); assert(StructVal->getType() == Struct4I32 && "dx.wave.ballot must return {i32,i32,i32,i32}"); @@ -190,8 +189,7 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF, } if (CGF.CGM.getTarget().getTriple().isSPIRV()) - return CGF.EmitIntrinsicCall( - CGF.CGM.getIntrinsic(Intrinsic::spv_subgroup_ballot), Cond); + return CGF.EmitIntrinsicCall(Intrinsic::spv_subgroup_ballot, {Cond}); llvm_unreachable( "WaveActiveBallot is only supported for DXIL and SPIRV targets"); @@ -1288,9 +1286,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID = getPrefixCountBitsIntrinsic(getTarget().getTriple().getArch()); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), IID), ArrayRef{Op}, - "hlsl.wave.prefix.bit.count"); + return EmitIntrinsicCall(IID, ArrayRef{Op}, "hlsl.wave.prefix.bit.count"); } case Builtin::BI__builtin_hlsl_select: { Value *OpCond = EmitScalarExpr(E->getArg(0)); @@ -1335,9 +1331,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Value *Op = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - {Op}); + return EmitIntrinsicCall(ID, {Op->getType()}, {Op}); } case Builtin::BI__builtin_hlsl_wave_active_all_true: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1345,8 +1339,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "Intrinsic WaveActiveAllTrue operand must be a bool"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); + return EmitIntrinsicCall(ID, {Op}); } case Builtin::BI__builtin_hlsl_wave_active_any_true: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1354,8 +1347,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "Intrinsic WaveActiveAnyTrue operand must be a bool"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); + return EmitIntrinsicCall(ID, {Op}); } case Builtin::BI__builtin_hlsl_wave_active_bit_or: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1364,9 +1356,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "representation"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - ArrayRef{Op}, "hlsl.wave.active.bit.or"); + return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op}, + "hlsl.wave.active.bit.or"); } case Builtin::BI__builtin_hlsl_wave_active_bit_xor: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1375,9 +1366,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "representation"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - ArrayRef{Op}, "hlsl.wave.active.bit.xor"); + return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op}, + "hlsl.wave.active.bit.xor"); } case Builtin::BI__builtin_hlsl_wave_active_bit_and: { Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1386,9 +1376,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "representation"); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {Op->getType()}), - ArrayRef{Op}, "hlsl.wave.active.bit.and"); + return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op}, + "hlsl.wave.active.bit.and"); } case Builtin::BI__builtin_hlsl_wave_active_ballot: { [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0)); @@ -1400,9 +1389,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, case Builtin::BI__builtin_hlsl_wave_active_count_bits: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), - ArrayRef{OpExpr}); + return EmitIntrinsicCall(ID, ArrayRef{OpExpr}); } case Builtin::BI__builtin_hlsl_wave_active_sum: { // Due to the use of variadic arguments, explicitly retrieve argument @@ -1410,9 +1397,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID = getWaveActiveSumIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.sum"); + return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.wave.active.sum"); } case Builtin::BI__builtin_hlsl_wave_active_product: { // Due to the use of variadic arguments, explicitly retrieve argument @@ -1420,9 +1406,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Intrinsic::ID IID = getWaveActiveProductIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.product"); + return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.wave.active.product"); } case Builtin::BI__builtin_hlsl_wave_active_max: { // Due to the use of variadic arguments, explicitly retrieve argument @@ -1434,9 +1419,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, else IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.max"); + return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.wave.active.max"); } case Builtin::BI__builtin_hlsl_wave_active_min: { // Due to the use of variadic arguments, explicitly retrieve argument @@ -1448,9 +1432,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, else IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.active.min"); + return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.wave.active.min"); } case Builtin::BI__builtin_hlsl_wave_get_lane_index: { // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in @@ -1458,8 +1441,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, // for the DirectX intrinsic and the demangled builtin name switch (CGM.getTarget().getTriple().getArch()) { case llvm::Triple::dxil: - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), Intrinsic::dx_wave_getlaneindex)); + return EmitIntrinsicCall(Intrinsic::dx_wave_getlaneindex); case llvm::Triple::spirv: return EmitRuntimeCall(CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {}, false), @@ -1471,54 +1453,46 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, } case Builtin::BI__builtin_hlsl_wave_is_first_lane: { Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_wave_get_lane_count: { Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_wave_read_lane_at: { // Due to the use of variadic arguments we must explicitly retrieve them and // create our function type. Value *OpExpr = EmitScalarExpr(E->getArg(0)); Value *OpIndex = EmitScalarExpr(E->getArg(1)); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(), - {OpExpr->getType()}), - ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane"); + return EmitIntrinsicCall(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(), + {OpExpr->getType()}, ArrayRef{OpExpr, OpIndex}, + "hlsl.wave.readlane"); } case Builtin::BI__builtin_hlsl_wave_prefix_sum: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID IID = getWavePrefixSumIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.prefix.sum"); + return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.wave.prefix.sum"); } case Builtin::BI__builtin_hlsl_wave_prefix_product: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID IID = getWavePrefixProductIntrinsic( getTarget().getTriple().getArch(), E->getArg(0)->getType()); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), IID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.wave.prefix.product"); + return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.wave.prefix.product"); } case Builtin::BI__builtin_hlsl_quad_read_across_x: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.quad.read.across.x"); + return EmitIntrinsicCall(ID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.quad.read.across.x"); } case Builtin::BI__builtin_hlsl_quad_read_across_y: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic(); - return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), ID, {OpExpr->getType()}), - ArrayRef{OpExpr}, "hlsl.quad.read.across.y"); + return EmitIntrinsicCall(ID, {OpExpr->getType()}, ArrayRef{OpExpr}, + "hlsl.quad.read.across.y"); } case Builtin::BI__builtin_hlsl_elementwise_sign: { auto *Arg0 = E->getArg(0); @@ -1576,36 +1550,30 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return handleHlslClip(E, this); case Builtin::BI__builtin_hlsl_all_memory_barrier: { Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: { Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_device_memory_barrier: { Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: { Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_group_memory_barrier: { Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: { Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic(); - return EmitIntrinsicCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + return EmitIntrinsicCall(ID); } case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: { Value *Op0 = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 7b2f0a5c5e3a2..77ca3e0fee84f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4610,9 +4610,13 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args, const Twine &name = ""); - llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee, + llvm::CallInst *EmitIntrinsicCall(llvm::Intrinsic::ID ID, const Twine &Name = ""); - llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee, + llvm::CallInst *EmitIntrinsicCall(llvm::Intrinsic::ID ID, + ArrayRef<llvm::Value *> Args, + const Twine &Name = ""); + llvm::CallInst *EmitIntrinsicCall(llvm::Intrinsic::ID ID, + ArrayRef<llvm::Type *> Types, ArrayRef<llvm::Value *> Args, const Twine &Name = ""); llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee, _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
