[clang] [Clang][HLSL] Use EmitIntrinsicCall instead of EmitRuntimeCall for intrinsic (PR #197380)

Wenju He via cfe-commits Wed, 13 May 2026 06:32:53 -0700

https://github.com/wenju-he updated 
https://github.com/llvm/llvm-project/pull/197380


>From d842dff0d6fc57a0e1f45609d3d368b34085b576 Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Wed, 13 May 2026 08:51:29 +0200
Subject: [PATCH 1/3] [Clang][HLSL] Fix EmitRuntimeCall to use C calling
 convention for intrinsics

Fix HLSL builtin to SPIR-V intrinsic lowering: intrinsic calls must use
CallingConv::C.
---
 clang/lib/CodeGen/CGCall.cpp                  |  6 +-
 .../builtins/AllMemoryBarrier.hlsl            |  2 +-
 .../AllMemoryBarrierWithGroupSync.hlsl        |  2 +-
 .../builtins/DeviceMemoryBarrier.hlsl         |  2 +-
 .../DeviceMemoryBarrierWithGroupSync.hlsl     |  2 +-
 .../builtins/GroupMemoryBarrier.hlsl          |  2 +-
 .../GroupMemoryBarrierWithGroupSync.hlsl      |  2 +-
 .../CodeGenHLSL/builtins/QuadReadAcrossX.hlsl | 88 +++++++++----------
 .../CodeGenHLSL/builtins/QuadReadAcrossY.hlsl |  2 +-
 .../builtins/WaveActiveAllEqual.hlsl          |  6 +-
 .../builtins/WaveActiveAllTrue.hlsl           |  2 +-
 .../builtins/WaveActiveAnyTrue.hlsl           |  2 +-
 .../builtins/WaveActiveBallot.hlsl            |  2 +-
 .../builtins/WaveActiveBitAnd.hlsl            |  2 +-
 .../CodeGenHLSL/builtins/WaveActiveBitOr.hlsl |  2 +-
 .../builtins/WaveActiveBitXor.hlsl            |  2 +-
 .../CodeGenHLSL/builtins/WaveActiveMax.hlsl   |  6 +-
 .../CodeGenHLSL/builtins/WaveActiveMin.hlsl   |  6 +-
 .../builtins/WaveActiveProduct.hlsl           |  6 +-
 .../CodeGenHLSL/builtins/WaveActiveSum.hlsl   |  6 +-
 .../builtins/WavePrefixCountBits.hlsl         |  2 +-
 .../builtins/WavePrefixProduct.hlsl           |  6 +-
 .../CodeGenHLSL/builtins/WavePrefixSum.hlsl   |  6 +-
 .../CodeGenHLSL/builtins/WaveReadLaneAt.hlsl  | 18 ++--
 .../builtins/wave_get_lane_count.hlsl         |  4 +-
 .../builtins/wave_is_first_lane.hlsl          |  4 +-
 26 files changed, 97 insertions(+), 93 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a2b9c945788ee..73e189bec93ff 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5233,7 +5233,11 @@ llvm::CallInst 
*CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
                                                  const llvm::Twine &name) {
   llvm::CallInst *call = Builder.CreateCall(
       callee, args, getBundlesForFunclet(callee.getCallee()), name);
-  call->setCallingConv(getRuntimeCC());
+  // Intrinsics must use CallingConv::C; only apply the runtime CC to
+  // non-intrinsic callees.
+  if (auto *F = dyn_cast<llvm::Function>(callee.getCallee());
+      !F || !F->isIntrinsic())
+    call->setCallingConv(getRuntimeCC());
 
   if (CGM.shouldEmitConvergenceTokens() && call->isConvergent())
     return cast<llvm::CallInst>(addConvergenceControlToken(call));
diff --git a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl 
b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl
index 90d51c716c771..0fa798a16b805 100644
--- a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl
@@ -11,7 +11,7 @@
 // CHECK-SPIRV: define hidden spir_func void @
 void test_AllMemoryBarrier() {
 // CHECK-DXIL: call void @llvm.[[TARGET]].all.memory.barrier()
-// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].all.memory.barrier()
+// CHECK-SPIRV: call void @llvm.[[TARGET]].all.memory.barrier()
   AllMemoryBarrier();
 }
 
diff --git a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl 
b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl
index 6ddb69671e094..b4a3371f7628f 100644
--- a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl
@@ -11,7 +11,7 @@
 // CHECK-SPIRV: define hidden spir_func void @
 void test_AllMemoryBarrierWithGroupSync() {
 // CHECK-DXIL: call void @llvm.[[TARGET]].all.memory.barrier.with.group.sync()
-// CHECK-SPIRV: call spir_func void 
@llvm.[[TARGET]].all.memory.barrier.with.group.sync()
+// CHECK-SPIRV: call void @llvm.[[TARGET]].all.memory.barrier.with.group.sync()
   AllMemoryBarrierWithGroupSync();
 }
 
diff --git a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl 
b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl
index e2c08f7775c8c..d9613aedc1cc6 100644
--- a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl
@@ -11,7 +11,7 @@
 // CHECK-SPIRV: define hidden spir_func void @
 void test_DeviceMemoryBarrier() {
 // CHECK-DXIL: call void @llvm.[[TARGET]].device.memory.barrier()
-// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].device.memory.barrier()
+// CHECK-SPIRV: call void @llvm.[[TARGET]].device.memory.barrier()
   DeviceMemoryBarrier();
 }
 
diff --git 
a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl 
b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl
index fa455f5f8338b..bea7d7391aec2 100644
--- a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl
@@ -11,7 +11,7 @@
 // CHECK-SPIRV: define hidden spir_func void @
 void test_DeviceMemoryBarrierWithGroupSync() {
 // CHECK-DXIL: call void 
@llvm.[[TARGET]].device.memory.barrier.with.group.sync()
-// CHECK-SPIRV: call spir_func void 
@llvm.[[TARGET]].device.memory.barrier.with.group.sync()
+// CHECK-SPIRV: call void 
@llvm.[[TARGET]].device.memory.barrier.with.group.sync()
   DeviceMemoryBarrierWithGroupSync();
 }
 
diff --git a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl 
b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
index b52819973f677..d33baeac940b6 100644
--- a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
@@ -11,7 +11,7 @@
 // CHECK-SPIRV: define hidden spir_func void @
 void test_GroupMemoryBarrier() {
 // CHECK-DXIL: call void @llvm.[[TARGET]].group.memory.barrier()
-// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].group.memory.barrier()
+// CHECK-SPIRV: call void @llvm.[[TARGET]].group.memory.barrier()
   GroupMemoryBarrier();
 }
 
diff --git 
a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl 
b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl
index e709ed3616f0d..b69f67cb8dfaa 100644
--- a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl
@@ -11,7 +11,7 @@
 // CHECK-SPIRV: define hidden spir_func void @
 void test_GroupMemoryBarrierWithGroupSync() {
 // CHECK-DXIL: call void 
@llvm.[[TARGET]].group.memory.barrier.with.group.sync()
-// CHECK-SPIRV: call spir_func void 
@llvm.[[TARGET]].group.memory.barrier.with.group.sync()
+// CHECK-SPIRV: call void 
@llvm.[[TARGET]].group.memory.barrier.with.group.sync()
   GroupMemoryBarrierWithGroupSync();
 }
 
diff --git a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl 
b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl
index 54dd82b9fd485..f6bf05e524964 100644
--- a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossX.hlsl
@@ -1,169 +1,169 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-compute %s -fnative-half-type 
-fnative-int16-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=dx -DCC=""
+// RUN:   --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=dx 
-DCC=""
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=dx
 
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type 
-fnative-int16-type \
 // RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN:   --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=spv 
-DCC="spir_func "
+// RUN:   --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=spv
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF 
-DTARGET=spv -DCC="spir_func " 
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=spv
 
-// CHECK: %[[RET:.*]] = call [[CC]]i32 
@llvm.[[TARGET]].quad.read.across.x.i32(i32 %[[#]])
+// CHECK: %[[RET:.*]] = call i32 @llvm.[[TARGET]].quad.read.across.x.i32(i32 
%[[#]])
 // CHECK: ret i32 %[[RET]]
 int test_int(int expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]])
 // CHECK: ret <2 x i32> %[[RET]]
 int2 test_int2(int2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]])
 // CHECK: ret <3 x i32> %[[RET]]
 int3 test_int3(int3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]])
 // CHECK: ret <4 x i32> %[[RET]]
 int4 test_int4(int4 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]i32 
@llvm.[[TARGET]].quad.read.across.x.i32(i32 %[[#]])
+// CHECK: %[[RET:.*]] = call i32 @llvm.[[TARGET]].quad.read.across.x.i32(i32 
%[[#]])
 // CHECK: ret i32 %[[RET]]
 uint test_uint(uint expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v2i32(<2 x i32> %[[#]])
 // CHECK: ret <2 x i32> %[[RET]]
 uint2 test_uint2(uint2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v3i32(<3 x i32> %[[#]])
 // CHECK: ret <3 x i32> %[[RET]]
 uint3 test_uint3(uint3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i32> 
@llvm.[[TARGET]].quad.read.across.x.v4i32(<4 x i32> %[[#]])
 // CHECK: ret <4 x i32> %[[RET]]
 uint4 test_uint4(uint4 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]i64 
@llvm.[[TARGET]].quad.read.across.x.i64(i64 %[[#]])
+// CHECK: %[[RET:.*]] = call i64 @llvm.[[TARGET]].quad.read.across.x.i64(i64 
%[[#]])
 // CHECK: ret i64 %[[RET]]
 int64_t test_int64_t(int64_t expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]])
 // CHECK: ret <2 x i64> %[[RET]]
 int64_t2 test_int64_t2(int64_t2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]])
 // CHECK: ret <3 x i64> %[[RET]]
 int64_t3 test_int64_t3(int64_t3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]])
 // CHECK: ret <4 x i64> %[[RET]]
 int64_t4 test_int64_t4(int64_t4 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]i64 
@llvm.[[TARGET]].quad.read.across.x.i64(i64 %[[#]])
+// CHECK: %[[RET:.*]] = call i64 @llvm.[[TARGET]].quad.read.across.x.i64(i64 
%[[#]])
 // CHECK: ret i64 %[[RET]]
 uint64_t test_uint64_t(uint64_t expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v2i64(<2 x i64> %[[#]])
 // CHECK: ret <2 x i64> %[[RET]]
 uint64_t2 test_uint64_t2(uint64_t2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v3i64(<3 x i64> %[[#]])
 // CHECK: ret <3 x i64> %[[RET]]
 uint64_t3 test_uint64_t3(uint64_t3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i64> 
@llvm.[[TARGET]].quad.read.across.x.v4i64(<4 x i64> %[[#]])
 // CHECK: ret <4 x i64> %[[RET]]
 uint64_t4 test_uint64_t4(uint64_t4 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float 
@llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float 
@llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]])
 // CHECK: ret float %[[RET]]
 float test_float(float expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> 
@llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> 
@llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]])
 // CHECK: ret <2 x float> %[[RET]]
 float2 test_float2(float2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> 
@llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> 
@llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]])
 // CHECK: ret <3 x float> %[[RET]]
 float3 test_float3(float3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> 
@llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> 
@llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]])
 // CHECK: ret <4 x float> %[[RET]]
 float4 test_float4(float4 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]double 
@llvm.[[TARGET]].quad.read.across.x.f64(double %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn double 
@llvm.[[TARGET]].quad.read.across.x.f64(double %[[#]])
 // CHECK: ret double %[[RET]]
 double test_double(double expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x double> 
@llvm.[[TARGET]].quad.read.across.x.v2f64(<2 x double> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x double> 
@llvm.[[TARGET]].quad.read.across.x.v2f64(<2 x double> %[[#]])
 // CHECK: ret <2 x double> %[[RET]]
 double2 test_double2(double2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x double> 
@llvm.[[TARGET]].quad.read.across.x.v3f64(<3 x double> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x double> 
@llvm.[[TARGET]].quad.read.across.x.v3f64(<3 x double> %[[#]])
 // CHECK: ret <3 x double> %[[RET]]
 double3 test_double3(double3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x double> 
@llvm.[[TARGET]].quad.read.across.x.v4f64(<4 x double> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x double> 
@llvm.[[TARGET]].quad.read.across.x.v4f64(<4 x double> %[[#]])
 // CHECK: ret <4 x double> %[[RET]]
 double4 test_double4(double4 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]half @llvm.[[TARGET]].quad.read.across.x.f16(half %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn half 
@llvm.[[TARGET]].quad.read.across.x.f16(half %[[#]])
 // CHECK-NATIVE_HALF: ret half %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]float @llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float 
@llvm.[[TARGET]].quad.read.across.x.f32(float %[[#]])
 // CHECK-NO_HALF: ret float %[[RET]]
 half test_half(half expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]<2 x half> @llvm.[[TARGET]].quad.read.across.x.v2f16(<2 x half> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x 
half> @llvm.[[TARGET]].quad.read.across.x.v2f16(<2 x half> %[[#]])
 // CHECK-NATIVE_HALF: ret <2 x half> %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x 
float> @llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x 
float> @llvm.[[TARGET]].quad.read.across.x.v2f32(<2 x float> %[[#]])
 // CHECK-NO_HALF: ret <2 x float> %[[RET]]
 half2 test_half2(half2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]<3 x half> @llvm.[[TARGET]].quad.read.across.x.v3f16(<3 x half> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x 
half> @llvm.[[TARGET]].quad.read.across.x.v3f16(<3 x half> %[[#]])
 // CHECK-NATIVE_HALF: ret <3 x half> %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x 
float> @llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x 
float> @llvm.[[TARGET]].quad.read.across.x.v3f32(<3 x float> %[[#]])
 // CHECK-NO_HALF: ret <3 x float> %[[RET]]
 half3 test_half3(half3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]<4 x half> @llvm.[[TARGET]].quad.read.across.x.v4f16(<4 x half> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x 
half> @llvm.[[TARGET]].quad.read.across.x.v4f16(<4 x half> %[[#]])
 // CHECK-NATIVE_HALF: ret <4 x half> %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x 
float> @llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x 
float> @llvm.[[TARGET]].quad.read.across.x.v4f32(<4 x float> %[[#]])
 // CHECK-NO_HALF: ret <4 x float> %[[RET]]
 half4 test_half4(half4 expr) { return QuadReadAcrossX(expr); }
 
 #ifdef __HLSL_ENABLE_16_BIT
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 
@llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 
@llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]])
 // CHECK-NATIVE_HALF: ret i16 %[[RET]]
 int16_t test_int16_t(int16_t expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]]
 int16_t2 test_int16_t2(int16_t2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]]
 int16_t3 test_int16_t3(int16_t3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]]
 int16_t4 test_int16_t4(int16_t4 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 
@llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 
@llvm.[[TARGET]].quad.read.across.x.i16(i16 %[[#]])
 // CHECK-NATIVE_HALF: ret i16 %[[RET]]
 uint16_t test_uint16_t(uint16_t expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v2i16(<2 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]]
 uint16_t2 test_uint16_t2(uint16_t2 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v3i16(<3 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]]
 uint16_t3 test_uint16_t3(uint16_t3 expr) { return QuadReadAcrossX(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> 
@llvm.[[TARGET]].quad.read.across.x.v4i16(<4 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]]
 uint16_t4 test_uint16_t4(uint16_t4 expr) { return QuadReadAcrossX(expr); }
 #endif
diff --git a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl 
b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl
index 313c287dc1a7d..95ecd575e56fc 100644
--- a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl
@@ -16,7 +16,7 @@
 
 // Capture the expected interchange format so not every check needs to be 
duplicated
 // CHECK-DXIL: %[[RET:.*]] = call [[CC:]]i32 
@llvm.[[ICF:dx]].quad.read.across.y.i32(i32 %[[#]])
-// CHECK-SPIRV: %[[RET:.*]] = call [[CC:spir_func ]]i32 
@llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]])
+// CHECK-SPIRV: %[[RET:.*]] = call [[CC:]]i32 
@llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]])
 // CHECK: ret i32 %[[RET]]
 int test_int(int expr) { return QuadReadAcrossY(expr); }
 
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl
index 323aa439984f9..f8bcdfdb3333f 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveAllEqual.hlsl
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: test_int
 bool test_int(int expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func i1 
@llvm.spv.wave.all.equal.i32(i32
+  // CHECK-SPIRV:  %[[RET:.*]] = call i1 @llvm.spv.wave.all.equal.i32(i32
   // CHECK-DXIL:  %[[RET:.*]] = call i1 @llvm.dx.wave.all.equal.i32(i32
   // CHECK:  ret i1 %[[RET]]
   return WaveActiveAllEqual(expr);
@@ -20,7 +20,7 @@ bool test_int(int expr) {
 
 // CHECK-LABEL: test_uint64_t
 bool test_uint64_t(uint64_t expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func i1 
@llvm.spv.wave.all.equal.i64(i64 
+  // CHECK-SPIRV:  %[[RET:.*]] = call i1 @llvm.spv.wave.all.equal.i64(i64 
   // CHECK-DXIL:  %[[RET:.*]] = call i1 @llvm.dx.wave.all.equal.i64(i64
   // CHECK:  ret i1 %[[RET]]
   return WaveActiveAllEqual(expr);
@@ -33,7 +33,7 @@ bool test_uint64_t(uint64_t expr) {
 
 // CHECK-LABEL: test_floatv4
 bool4 test_floatv4(float4 expr) {
-  // CHECK-SPIRV:  %[[RET1:.*]] = call spir_func <4 x i1> 
@llvm.spv.wave.all.equal.v4f32(<4 x float> 
+  // CHECK-SPIRV:  %[[RET1:.*]] = call <4 x i1> 
@llvm.spv.wave.all.equal.v4f32(<4 x float> 
   // CHECK-DXIL:  %[[RET1:.*]] = call <4 x i1> 
@llvm.dx.wave.all.equal.v4f32(<4 x float> 
   // CHECK:  ret <4 x i1> %[[RET1]]
   return WaveActiveAllEqual(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl
index f499fc97f43fc..94060ceb97e66 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveAllTrue.hlsl
@@ -10,7 +10,7 @@
 // CHECK-LABEL: define {{.*}}test
 bool test(bool p1) {
   // CHECK-SPIRV: %[[#entry_tok0:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func i1 @llvm.spv.wave.all(i1 
%{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call i1 @llvm.spv.wave.all(i1 
%{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call i1 @llvm.dx.wave.all(i1 %{{[a-zA-Z0-9]+}})
   // CHECK:  ret i1 %[[RET]]
   return WaveActiveAllTrue(p1);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl
index 3655cdb443fa9..c4b8239448f2c 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveAnyTrue.hlsl
@@ -10,7 +10,7 @@
 // CHECK-LABEL: define {{.*}}test
 bool test(bool p1) {
   // CHECK-SPIRV: %[[#entry_tok0:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func i1 @llvm.spv.wave.any(i1 
%{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call i1 @llvm.spv.wave.any(i1 
%{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call i1 @llvm.dx.wave.any(i1 %{{[a-zA-Z0-9]+}})
   // CHECK:  ret i1 %[[RET]]
   return WaveActiveAnyTrue(p1);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
index df2d854a64247..4c7d5cd2a1c4a 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
@@ -10,7 +10,7 @@
 // CHECK-LABEL: define {{.*}}test
 uint4 test(bool p1) {
   // CHECK-SPIRV: %[[#entry_tok0:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV: %[[SPIRVRET:.*]] = call spir_func <4 x i32> 
@llvm.spv.subgroup.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token 
%[[#entry_tok0]]) ]
+  // CHECK-SPIRV: %[[SPIRVRET:.*]] = call <4 x i32> 
@llvm.spv.subgroup.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token 
%[[#entry_tok0]]) ]
   // CHECK-DXIL: %[[WAB:.*]] = call { i32, i32, i32, i32 } 
@llvm.dx.wave.ballot.i32(i1 %{{[a-zA-Z0-9]+}})
   // CHECK-DXIL-NEXT: extractvalue { i32, i32, i32, i32 } {{.*}} 0
   // CHECK-DXIL-NEXT: insertelement <4 x i32> poison, i32 {{.*}}, i32 0
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl
index a6da9678d7275..1e1801e49540a 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl
@@ -4,7 +4,7 @@
 
 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
 // RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
-// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func"
+// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call"
 
 // Test basic lowering to runtime function call.
 
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl
index 80364724448fa..e7531b6013166 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitOr.hlsl
@@ -4,7 +4,7 @@
 
 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
 // RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
-// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func"
+// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call"
 
 // Test basic lowering to runtime function call.
 
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
index 9c94663390843..b03cb51c6df04 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
@@ -4,7 +4,7 @@
 
 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
 // RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
-// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func"
+// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call"
 
 // Test basic lowering to runtime function call.
 
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
index be05a17cc3692..a4628ad103e0d 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: test_int
 int test_int(int expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.reduce.max.i32([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.reduce.max.i32([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.reduce.max.i32([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveMax(expr);
@@ -20,7 +20,7 @@ int test_int(int expr) {
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.reduce.umax.i64([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.reduce.umax.i64([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.reduce.umax.i64([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveMax(expr);
@@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr) {
-  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
spir_func [[TY1:.*]] @llvm.spv.wave.reduce.max.v4f32([[TY1]] %[[#]]
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.spv.wave.reduce.max.v4f32([[TY1]] %[[#]]
   // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.dx.wave.reduce.max.v4f32([[TY1]] %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WaveActiveMax(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl
index 1194f842deed6..f2e3686947f51 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: test_int
 int test_int(int expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.reduce.min.i32([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.reduce.min.i32([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.reduce.min.i32([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveMin(expr);
@@ -20,7 +20,7 @@ int test_int(int expr) {
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.reduce.umin.i64([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.reduce.umin.i64([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.reduce.umin.i64([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveMin(expr);
@@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr) {
-  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
spir_func [[TY1:.*]] @llvm.spv.wave.reduce.min.v4f32([[TY1]] %[[#]]
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.spv.wave.reduce.min.v4f32([[TY1]] %[[#]]
   // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.dx.wave.reduce.min.v4f32([[TY1]] %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WaveActiveMin(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl
index 3a8320e7333fc..0247b7cbeb0f6 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveProduct.hlsl
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: test_int
 int test_int(int expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.product.i32([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.product.i32([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.product.i32([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveProduct(expr);
@@ -20,7 +20,7 @@ int test_int(int expr) {
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.product.i64([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.product.i64([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.uproduct.i64([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveProduct(expr);
@@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr) {
-  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
spir_func [[TY1:.*]] @llvm.spv.wave.product.v4f32([[TY1]] %[[#]]
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.spv.wave.product.v4f32([[TY1]] %[[#]]
   // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.dx.wave.product.v4f32([[TY1]] %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WaveActiveProduct(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl
index 1fc93c62c8db0..6caa3d775f0d2 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveSum.hlsl
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: test_int
 int test_int(int expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.reduce.sum.i32([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.reduce.sum.i32([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.reduce.sum.i32([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveSum(expr);
@@ -20,7 +20,7 @@ int test_int(int expr) {
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.reduce.sum.i64([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.reduce.sum.i64([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.reduce.usum.i64([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveActiveSum(expr);
@@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr) {
-  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
spir_func [[TY1:.*]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]] %[[#]]
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.spv.wave.reduce.sum.v4f32([[TY1]] %[[#]]
   // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.dx.wave.reduce.sum.v4f32([[TY1]] %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WaveActiveSum(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl 
b/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl
index 25d9074b08a68..bfd42740ac4ed 100644
--- a/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WavePrefixCountBits.hlsl
@@ -18,7 +18,7 @@ int test_int(bool expr) {
   // CHECK: %[[LOADEDVAL:.*]] = load i32, ptr %[[EXPRADDR]], align 4
   // CHECK: %[[TRUNCLOADEDVAL:.*]] = icmp ne i32 %[[LOADEDVAL]], 0
 
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.subgroup.prefix.bit.count(i1 %[[TRUNCLOADEDVAL]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.subgroup.prefix.bit.count(i1 %[[TRUNCLOADEDVAL]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.prefix.bit.count(i1 %[[TRUNCLOADEDVAL]])
   // CHECK: ret [[TY]] %[[RET]]
   return WavePrefixCountBits(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl 
b/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl
index a45cbf29b87f2..a4dc01527a7f2 100644
--- a/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WavePrefixProduct.hlsl
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: test_int
 int test_int(int expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.prefix.product.i32([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.prefix.product.i32([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.prefix.product.i32([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WavePrefixProduct(expr);
@@ -20,7 +20,7 @@ int test_int(int expr) {
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.prefix.product.i64([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.prefix.product.i64([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.prefix.uproduct.i64([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WavePrefixProduct(expr);
@@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr) {
-  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
spir_func [[TY1:.*]] @llvm.spv.wave.prefix.product.v4f32([[TY1]] %[[#]]
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.spv.wave.prefix.product.v4f32([[TY1]] %[[#]]
   // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.dx.wave.prefix.product.v4f32([[TY1]] %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WavePrefixProduct(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl 
b/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl
index f22aa69ba45d5..a1df3fe02c802 100644
--- a/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WavePrefixSum.hlsl
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: test_int
 int test_int(int expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.prefix.sum.i32([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.prefix.sum.i32([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.prefix.sum.i32([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WavePrefixSum(expr);
@@ -20,7 +20,7 @@ int test_int(int expr) {
 
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr) {
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.prefix.sum.i64([[TY]] %[[#]])
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.prefix.sum.i64([[TY]] %[[#]])
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.prefix.usum.i64([[TY]] %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WavePrefixSum(expr);
@@ -33,7 +33,7 @@ uint64_t test_uint64_t(uint64_t expr) {
 
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr) {
-  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
spir_func [[TY1:.*]] @llvm.spv.wave.prefix.sum.v4f32([[TY1]] %[[#]]
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.spv.wave.prefix.sum.v4f32([[TY1]] %[[#]]
   // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.dx.wave.prefix.sum.v4f32([[TY1]] %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WavePrefixSum(expr);
diff --git a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
index da6cbc40a79bb..24252f3fa3207 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveReadLaneAt.hlsl
@@ -10,7 +10,7 @@
 // CHECK-LABEL: test_int
 int test_int(int expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok0:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -22,7 +22,7 @@ int test_int(int expr, uint idx) {
 // CHECK-LABEL: test_uint
 uint test_uint(uint expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok0:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.readlane.i32([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -31,7 +31,7 @@ uint test_uint(uint expr, uint idx) {
 // CHECK-LABEL: test_int64_t
 int64_t test_int64_t(int64_t expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok0:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -43,7 +43,7 @@ int64_t test_int64_t(int64_t expr, uint idx) {
 // CHECK-LABEL: test_uint64_t
 uint64_t test_uint64_t(uint64_t expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok0:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok0]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.readlane.i64([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -53,7 +53,7 @@ uint64_t test_uint64_t(uint64_t expr, uint idx) {
 // CHECK-LABEL: test_int16
 int16_t test_int16(int16_t expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok1:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok1]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok1]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -65,7 +65,7 @@ int16_t test_int16(int16_t expr, uint idx) {
 // CHECK-LABEL: test_uint16
 uint16_t test_uint16(uint16_t expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok1:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] 
@llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok1]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.spv.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok1]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] 
@llvm.dx.wave.readlane.i16([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -77,7 +77,7 @@ uint16_t test_uint16(uint16_t expr, uint idx) {
 // CHECK-LABEL: test_half
 half test_half(half expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok2:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func 
[[TY:.*]] @llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok2]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] 
@llvm.spv.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok2]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] 
@llvm.dx.wave.readlane.f16([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -89,7 +89,7 @@ half test_half(half expr, uint idx) {
 // CHECK-LABEL: test_double
 double test_double(double expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok3:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn spir_func 
[[TY:.*]] @llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok3]]) ]
+  // CHECK-SPIRV:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] 
@llvm.spv.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok3]]) ]
   // CHECK-DXIL:  %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[TY:.*]] 
@llvm.dx.wave.readlane.f64([[TY]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY]] %[[RET]]
   return WaveReadLaneAt(expr, idx);
@@ -101,7 +101,7 @@ double test_double(double expr, uint idx) {
 // CHECK-LABEL: test_floatv4
 float4 test_floatv4(float4 expr, uint idx) {
   // CHECK-SPIRV: %[[#entry_tok4:]] = call token 
@llvm.experimental.convergence.entry()
-  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
spir_func [[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) 
[ "convergencectrl"(token %[[#entry_tok4]]) ]
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.spv.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]]) [ 
"convergencectrl"(token %[[#entry_tok4]]) ]
   // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn 
[[TY1:.*]] @llvm.dx.wave.readlane.v4f32([[TY1]] %[[#]], i32 %[[#]])
   // CHECK:  ret [[TY1]] %[[RET1]]
   return WaveReadLaneAt(expr, idx);
diff --git a/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl 
b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl
index 8072f6d4ea206..fdf019262d8cb 100644
--- a/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/wave_get_lane_count.hlsl
@@ -14,13 +14,13 @@ void main() {
   while (a) {
 
 // CHECK-DXIL:  %[[#]] = call i32 @llvm.dx.wave.get.lane.count()
-// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count()
+// CHECK-SPIRV: %[[#]] = call i32 @llvm.spv.wave.get.lane.count()
 // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#loop_tok]]) ]
     a = WaveGetLaneCount();
   }
 
 // CHECK-DXIL:  %[[#]] = call i32 @llvm.dx.wave.get.lane.count()
-// CHECK-SPIRV: %[[#]] = call spir_func i32 @llvm.spv.wave.get.lane.count()
+// CHECK-SPIRV: %[[#]] = call i32 @llvm.spv.wave.get.lane.count()
 // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#entry_tok]]) ]
   b = WaveGetLaneCount();
 }
diff --git a/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl 
b/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl
index 2fb6defb896f9..18860c321eb91 100644
--- a/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/wave_is_first_lane.hlsl
@@ -13,7 +13,7 @@ void main() {
   while (true) {
 
 // CHECK-DXIL:  %[[#]] = call i1 @llvm.dx.wave.is.first.lane()
-// CHECK-SPIRV: %[[#]] = call spir_func i1 @llvm.spv.wave.is.first.lane()
+// CHECK-SPIRV: %[[#]] = call i1 @llvm.spv.wave.is.first.lane()
 // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#loop_tok]]) ]
     if (WaveIsFirstLane()) {
       break;
@@ -21,7 +21,7 @@ void main() {
   }
 
 // CHECK-DXIL:  %[[#]] = call i1 @llvm.dx.wave.is.first.lane()
-// CHECK-SPIRV: %[[#]] = call spir_func i1 @llvm.spv.wave.is.first.lane()
+// CHECK-SPIRV: %[[#]] = call i1 @llvm.spv.wave.is.first.lane()
 // CHECK-SPIRV-SAME: [ "convergencectrl"(token %[[#entry_tok]]) ]
   if (WaveIsFirstLane()) {
     return;

>From ef4633d82c5ed0888841a4fbb97327e614e8d1d2 Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Wed, 13 May 2026 10:11:02 +0200
Subject: [PATCH 2/3] Add new EmitIntrinsicCall and use in CGHLSLBuiltins

---
 clang/lib/CodeGen/CGCall.cpp                  |  25 ++++-
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 104 +++++++++---------
 clang/lib/CodeGen/CodeGenFunction.h           |   5 +
 .../CodeGenHLSL/builtins/QuadReadAcrossY.hlsl |  82 +++++++-------
 4 files changed, 118 insertions(+), 98 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 73e189bec93ff..b698d4489b1cf 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5233,17 +5233,32 @@ llvm::CallInst 
*CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
                                                  const llvm::Twine &name) {
   llvm::CallInst *call = Builder.CreateCall(
       callee, args, getBundlesForFunclet(callee.getCallee()), name);
-  // Intrinsics must use CallingConv::C; only apply the runtime CC to
-  // non-intrinsic callees.
-  if (auto *F = dyn_cast<llvm::Function>(callee.getCallee());
-      !F || !F->isIntrinsic())
-    call->setCallingConv(getRuntimeCC());
+  call->setCallingConv(getRuntimeCC());
 
   if (CGM.shouldEmitConvergenceTokens() && call->isConvergent())
     return cast<llvm::CallInst>(addConvergenceControlToken(call));
   return call;
 }
 
+llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee,
+                                                   const llvm::Twine &Name) {
+  return EmitIntrinsicCall(Callee, {}, Name);
+}
+
+llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee,
+                                                   ArrayRef<llvm::Value *> 
Args,
+                                                   const llvm::Twine &Name) {
+  assert(dyn_cast<llvm::Function>(Callee.getCallee()) &&
+         cast<llvm::Function>(Callee.getCallee())->isIntrinsic() &&
+         "EmitIntrinsicCall called with non-intrinsic callee");
+  llvm::CallInst *Call = Builder.CreateCall(
+      Callee, Args, getBundlesForFunclet(Callee.getCallee()), Name);
+
+  if (CGM.shouldEmitConvergenceTokens() && Call->isConvergent())
+    return cast<llvm::CallInst>(addConvergenceControlToken(Call));
+  return Call;
+}
+
 /// Emits a call or invoke to the given noreturn runtime function.
 void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(
     llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args) {
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 82b03d7d5f069..2e672442f281a 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -174,7 +174,7 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction 
&CGF,
     // Call DXIL intrinsic: returns { i32, i32, i32, i32 }
     llvm::Function *Fn = CGF.CGM.getIntrinsic(Intrinsic::dx_wave_ballot, 
{I32});
 
-    Value *StructVal = CGF.EmitRuntimeCall(Fn, Cond);
+    Value *StructVal = CGF.EmitIntrinsicCall(Fn, Cond);
     assert(StructVal->getType() == Struct4I32 &&
            "dx.wave.ballot must return {i32,i32,i32,i32}");
 
@@ -190,7 +190,7 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction 
&CGF,
   }
 
   if (CGF.CGM.getTarget().getTriple().isSPIRV())
-    return CGF.EmitRuntimeCall(
+    return CGF.EmitIntrinsicCall(
         CGF.CGM.getIntrinsic(Intrinsic::spv_subgroup_ballot), Cond);
 
   llvm_unreachable(
@@ -1288,7 +1288,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Intrinsic::ID IID =
         getPrefixCountBitsIntrinsic(getTarget().getTriple().getArch());
 
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), IID), ArrayRef{Op},
         "hlsl.wave.prefix.bit.count");
   }
@@ -1335,9 +1335,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Value *Op = EmitScalarExpr(E->getArg(0));
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic();
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), ID, {Op->getType()}),
-                           {Op});
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), ID, {Op->getType()}),
+                             {Op});
   }
   case Builtin::BI__builtin_hlsl_wave_active_all_true: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1345,7 +1345,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "Intrinsic WaveActiveAllTrue operand must be a bool");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
   }
   case Builtin::BI__builtin_hlsl_wave_active_any_true: {
@@ -1354,7 +1354,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "Intrinsic WaveActiveAnyTrue operand must be a bool");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
   }
   case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
@@ -1364,9 +1364,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "representation");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), ID, {Op->getType()}),
-                           ArrayRef{Op}, "hlsl.wave.active.bit.or");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), ID, {Op->getType()}),
+                             ArrayRef{Op}, "hlsl.wave.active.bit.or");
   }
   case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1375,9 +1375,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "representation");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), ID, {Op->getType()}),
-                           ArrayRef{Op}, "hlsl.wave.active.bit.xor");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), ID, {Op->getType()}),
+                             ArrayRef{Op}, "hlsl.wave.active.bit.xor");
   }
   case Builtin::BI__builtin_hlsl_wave_active_bit_and: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1386,9 +1386,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "representation");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic();
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), ID, {Op->getType()}),
-                           ArrayRef{Op}, "hlsl.wave.active.bit.and");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), ID, {Op->getType()}),
+                             ArrayRef{Op}, "hlsl.wave.active.bit.and");
   }
   case Builtin::BI__builtin_hlsl_wave_active_ballot: {
     [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1400,7 +1400,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
         ArrayRef{OpExpr});
   }
@@ -1410,9 +1410,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Intrinsic::ID IID = getWaveActiveSumIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
 
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), IID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.wave.active.sum");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), IID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.wave.active.sum");
   }
   case Builtin::BI__builtin_hlsl_wave_active_product: {
     // Due to the use of variadic arguments, explicitly retrieve argument
@@ -1420,9 +1420,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Intrinsic::ID IID = getWaveActiveProductIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
 
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), IID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.wave.active.product");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), IID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.wave.active.product");
   }
   case Builtin::BI__builtin_hlsl_wave_active_max: {
     // Due to the use of variadic arguments, explicitly retrieve argument
@@ -1434,9 +1434,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     else
       IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic();
 
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), IID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.wave.active.max");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), IID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.wave.active.max");
   }
   case Builtin::BI__builtin_hlsl_wave_active_min: {
     // Due to the use of variadic arguments, explicitly retrieve argument
@@ -1448,9 +1448,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     else
       IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic();
 
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), IID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.wave.active.min");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), IID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.wave.active.min");
   }
   case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
     // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
@@ -1458,7 +1458,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     // for the DirectX intrinsic and the demangled builtin name
     switch (CGM.getTarget().getTriple().getArch()) {
     case llvm::Triple::dxil:
-      return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+      return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
           &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
     case llvm::Triple::spirv:
       return EmitRuntimeCall(CGM.CreateRuntimeFunction(
@@ -1471,12 +1471,12 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   }
   case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
@@ -1484,7 +1484,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     // create our function type.
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Value *OpIndex = EmitScalarExpr(E->getArg(1));
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(
             &CGM.getModule(), 
CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
             {OpExpr->getType()}),
@@ -1494,31 +1494,31 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID IID = getWavePrefixSumIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), IID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.wave.prefix.sum");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), IID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.wave.prefix.sum");
   }
   case Builtin::BI__builtin_hlsl_wave_prefix_product: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID IID = getWavePrefixProductIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), IID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.wave.prefix.product");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), IID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.wave.prefix.product");
   }
   case Builtin::BI__builtin_hlsl_quad_read_across_x: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic();
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), ID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.quad.read.across.x");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), ID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.quad.read.across.x");
   }
   case Builtin::BI__builtin_hlsl_quad_read_across_y: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic();
-    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
-                               &CGM.getModule(), ID, {OpExpr->getType()}),
-                           ArrayRef{OpExpr}, "hlsl.quad.read.across.y");
+    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
+                                 &CGM.getModule(), ID, {OpExpr->getType()}),
+                             ArrayRef{OpExpr}, "hlsl.quad.read.across.y");
   }
   case Builtin::BI__builtin_hlsl_elementwise_sign: {
     auto *Arg0 = E->getArg(0);
@@ -1576,35 +1576,35 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     return handleHlslClip(E, this);
   case Builtin::BI__builtin_hlsl_all_memory_barrier: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: {
     Intrinsic::ID ID =
         CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_device_memory_barrier: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: {
     Intrinsic::ID ID =
         CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_group_memory_barrier: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
     Intrinsic::ID ID =
         CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
-    return EmitRuntimeCall(
+    return EmitIntrinsicCall(
         Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
   }
   case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index e0f8e62fb53af..7b2f0a5c5e3a2 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4610,6 +4610,11 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee,
                                   ArrayRef<llvm::Value *> args,
                                   const Twine &name = "");
+  llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee,
+                                    const Twine &Name = "");
+  llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee,
+                                    ArrayRef<llvm::Value *> Args,
+                                    const Twine &Name = "");
   llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
                                           const Twine &name = "");
   llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
diff --git a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl 
b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl
index 95ecd575e56fc..9d70545f90a28 100644
--- a/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/QuadReadAcrossY.hlsl
@@ -15,157 +15,157 @@
 // RUN:   -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV,CHECK-NO_HALF
 
 // Capture the expected interchange format so not every check needs to be 
duplicated
-// CHECK-DXIL: %[[RET:.*]] = call [[CC:]]i32 
@llvm.[[ICF:dx]].quad.read.across.y.i32(i32 %[[#]])
-// CHECK-SPIRV: %[[RET:.*]] = call [[CC:]]i32 
@llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]])
+// CHECK-DXIL: %[[RET:.*]] = call i32 
@llvm.[[ICF:dx]].quad.read.across.y.i32(i32 %[[#]])
+// CHECK-SPIRV: %[[RET:.*]] = call i32 
@llvm.[[ICF:spv]].quad.read.across.y.i32(i32 %[[#]])
 // CHECK: ret i32 %[[RET]]
 int test_int(int expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> 
@llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i32> 
@llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]])
 // CHECK: ret <2 x i32> %[[RET]]
 int2 test_int2(int2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> 
@llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i32> 
@llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]])
 // CHECK: ret <3 x i32> %[[RET]]
 int3 test_int3(int3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> 
@llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i32> 
@llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]])
 // CHECK: ret <4 x i32> %[[RET]]
 int4 test_int4(int4 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]i32 
@llvm.[[ICF]].quad.read.across.y.i32(i32 %[[#]])
+// CHECK: %[[RET:.*]] = call i32 @llvm.[[ICF]].quad.read.across.y.i32(i32 
%[[#]])
 // CHECK: ret i32 %[[RET]]
 uint test_uint(uint expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> 
@llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i32> 
@llvm.[[ICF]].quad.read.across.y.v2i32(<2 x i32> %[[#]])
 // CHECK: ret <2 x i32> %[[RET]]
 uint2 test_uint2(uint2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> 
@llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i32> 
@llvm.[[ICF]].quad.read.across.y.v3i32(<3 x i32> %[[#]])
 // CHECK: ret <3 x i32> %[[RET]]
 uint3 test_uint3(uint3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> 
@llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i32> 
@llvm.[[ICF]].quad.read.across.y.v4i32(<4 x i32> %[[#]])
 // CHECK: ret <4 x i32> %[[RET]]
 uint4 test_uint4(uint4 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]i64 
@llvm.[[ICF]].quad.read.across.y.i64(i64 %[[#]])
+// CHECK: %[[RET:.*]] = call i64 @llvm.[[ICF]].quad.read.across.y.i64(i64 
%[[#]])
 // CHECK: ret i64 %[[RET]]
 int64_t test_int64_t(int64_t expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> 
@llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i64> 
@llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]])
 // CHECK: ret <2 x i64> %[[RET]]
 int64_t2 test_int64_t2(int64_t2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> 
@llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i64> 
@llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]])
 // CHECK: ret <3 x i64> %[[RET]]
 int64_t3 test_int64_t3(int64_t3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> 
@llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i64> 
@llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]])
 // CHECK: ret <4 x i64> %[[RET]]
 int64_t4 test_int64_t4(int64_t4 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]i64 
@llvm.[[ICF]].quad.read.across.y.i64(i64 %[[#]])
+// CHECK: %[[RET:.*]] = call i64 @llvm.[[ICF]].quad.read.across.y.i64(i64 
%[[#]])
 // CHECK: ret i64 %[[RET]]
 uint64_t test_uint64_t(uint64_t expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> 
@llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <2 x i64> 
@llvm.[[ICF]].quad.read.across.y.v2i64(<2 x i64> %[[#]])
 // CHECK: ret <2 x i64> %[[RET]]
 uint64_t2 test_uint64_t2(uint64_t2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> 
@llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <3 x i64> 
@llvm.[[ICF]].quad.read.across.y.v3i64(<3 x i64> %[[#]])
 // CHECK: ret <3 x i64> %[[RET]]
 uint64_t3 test_uint64_t3(uint64_t3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> 
@llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]])
+// CHECK: %[[RET:.*]] = call <4 x i64> 
@llvm.[[ICF]].quad.read.across.y.v4i64(<4 x i64> %[[#]])
 // CHECK: ret <4 x i64> %[[RET]]
 uint64_t4 test_uint64_t4(uint64_t4 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float 
@llvm.[[ICF]].quad.read.across.y.f32(float %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float 
@llvm.[[ICF]].quad.read.across.y.f32(float %[[#]])
 // CHECK: ret float %[[RET]]
 float test_float(float expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> 
@llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x float> 
@llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]])
 // CHECK: ret <2 x float> %[[RET]]
 float2 test_float2(float2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> 
@llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x float> 
@llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]])
 // CHECK: ret <3 x float> %[[RET]]
 float3 test_float3(float3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> 
@llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x float> 
@llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]])
 // CHECK: ret <4 x float> %[[RET]]
 float4 test_float4(float4 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]double 
@llvm.[[ICF]].quad.read.across.y.f64(double %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn double 
@llvm.[[ICF]].quad.read.across.y.f64(double %[[#]])
 // CHECK: ret double %[[RET]]
 double test_double(double expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x double> 
@llvm.[[ICF]].quad.read.across.y.v2f64(<2 x double> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x double> 
@llvm.[[ICF]].quad.read.across.y.v2f64(<2 x double> %[[#]])
 // CHECK: ret <2 x double> %[[RET]]
 double2 test_double2(double2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x double> 
@llvm.[[ICF]].quad.read.across.y.v3f64(<3 x double> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x double> 
@llvm.[[ICF]].quad.read.across.y.v3f64(<3 x double> %[[#]])
 // CHECK: ret <3 x double> %[[RET]]
 double3 test_double3(double3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x double> 
@llvm.[[ICF]].quad.read.across.y.v4f64(<4 x double> %[[#]])
+// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x double> 
@llvm.[[ICF]].quad.read.across.y.v4f64(<4 x double> %[[#]])
 // CHECK: ret <4 x double> %[[RET]]
 double4 test_double4(double4 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]half @llvm.[[ICF]].quad.read.across.y.f16(half %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn half 
@llvm.[[ICF]].quad.read.across.y.f16(half %[[#]])
 // CHECK-NATIVE_HALF: ret half %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]float @llvm.[[ICF]].quad.read.across.y.f32(float %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn float 
@llvm.[[ICF]].quad.read.across.y.f32(float %[[#]])
 // CHECK-NO_HALF: ret float %[[RET]]
 half test_half(half expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]<2 x half> @llvm.[[ICF]].quad.read.across.y.v2f16(<2 x half> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x 
half> @llvm.[[ICF]].quad.read.across.y.v2f16(<2 x half> %[[#]])
 // CHECK-NATIVE_HALF: ret <2 x half> %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x 
float> @llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <2 x 
float> @llvm.[[ICF]].quad.read.across.y.v2f32(<2 x float> %[[#]])
 // CHECK-NO_HALF: ret <2 x float> %[[RET]]
 half2 test_half2(half2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]<3 x half> @llvm.[[ICF]].quad.read.across.y.v3f16(<3 x half> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x 
half> @llvm.[[ICF]].quad.read.across.y.v3f16(<3 x half> %[[#]])
 // CHECK-NATIVE_HALF: ret <3 x half> %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x 
float> @llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <3 x 
float> @llvm.[[ICF]].quad.read.across.y.v3f32(<3 x float> %[[#]])
 // CHECK-NO_HALF: ret <3 x float> %[[RET]]
 half3 test_half3(half3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn 
[[CC]]<4 x half> @llvm.[[ICF]].quad.read.across.y.v4f16(<4 x half> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x 
half> @llvm.[[ICF]].quad.read.across.y.v4f16(<4 x half> %[[#]])
 // CHECK-NATIVE_HALF: ret <4 x half> %[[RET]]
-// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x 
float> @llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]])
+// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn <4 x 
float> @llvm.[[ICF]].quad.read.across.y.v4f32(<4 x float> %[[#]])
 // CHECK-NO_HALF: ret <4 x float> %[[RET]]
 half4 test_half4(half4 expr) { return QuadReadAcrossY(expr); }
 
 #ifdef __HLSL_ENABLE_16_BIT
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 
@llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 
@llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]])
 // CHECK-NATIVE_HALF: ret i16 %[[RET]]
 int16_t test_int16_t(int16_t expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> 
@llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> 
@llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]]
 int16_t2 test_int16_t2(int16_t2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> 
@llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> 
@llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]]
 int16_t3 test_int16_t3(int16_t3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> 
@llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> 
@llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]]
 int16_t4 test_int16_t4(int16_t4 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 
@llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call i16 
@llvm.[[ICF]].quad.read.across.y.i16(i16 %[[#]])
 // CHECK-NATIVE_HALF: ret i16 %[[RET]]
 uint16_t test_uint16_t(uint16_t expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> 
@llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <2 x i16> 
@llvm.[[ICF]].quad.read.across.y.v2i16(<2 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]]
 uint16_t2 test_uint16_t2(uint16_t2 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> 
@llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <3 x i16> 
@llvm.[[ICF]].quad.read.across.y.v3i16(<3 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]]
 uint16_t3 test_uint16_t3(uint16_t3 expr) { return QuadReadAcrossY(expr); }
 
-// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> 
@llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]])
+// CHECK-NATIVE_HALF: %[[RET:.*]] = call <4 x i16> 
@llvm.[[ICF]].quad.read.across.y.v4i16(<4 x i16> %[[#]])
 // CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]]
 uint16_t4 test_uint16_t4(uint16_t4 expr) { return QuadReadAcrossY(expr); }
 #endif

>From 4d8d384168d31dc73ddecf1b0720434d5f4a9dda Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Wed, 13 May 2026 11:18:47 +0200
Subject: [PATCH 3/3] pass Intrinsic::ID to EmitIntrinsicCall

---
 clang/lib/CodeGen/CGCall.cpp         |  21 +++--
 clang/lib/CodeGen/CGHLSLBuiltins.cpp | 116 ++++++++++-----------------
 clang/lib/CodeGen/CodeGenFunction.h  |   8 +-
 3 files changed, 61 insertions(+), 84 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b698d4489b1cf..1b420049fffc1 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5240,20 +5240,25 @@ llvm::CallInst 
*CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
   return call;
 }
 
-llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee,
+llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::Intrinsic::ID ID,
                                                    const llvm::Twine &Name) {
-  return EmitIntrinsicCall(Callee, {}, Name);
+  return EmitIntrinsicCall(ID, {}, {}, Name);
 }
 
-llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::FunctionCallee Callee,
+llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::Intrinsic::ID ID,
                                                    ArrayRef<llvm::Value *> 
Args,
                                                    const llvm::Twine &Name) {
-  assert(dyn_cast<llvm::Function>(Callee.getCallee()) &&
-         cast<llvm::Function>(Callee.getCallee())->isIntrinsic() &&
-         "EmitIntrinsicCall called with non-intrinsic callee");
-  llvm::CallInst *Call = Builder.CreateCall(
-      Callee, Args, getBundlesForFunclet(Callee.getCallee()), Name);
+  return EmitIntrinsicCall(ID, {}, Args, Name);
+}
 
+llvm::CallInst *CodeGenFunction::EmitIntrinsicCall(llvm::Intrinsic::ID ID,
+                                                   ArrayRef<llvm::Type *> 
Types,
+                                                   ArrayRef<llvm::Value *> 
Args,
+                                                   const llvm::Twine &Name) {
+  llvm::Function *F =
+      llvm::Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID, Types);
+  llvm::CallInst *Call =
+      Builder.CreateCall(F, Args, getBundlesForFunclet(F), Name);
   if (CGM.shouldEmitConvergenceTokens() && Call->isConvergent())
     return cast<llvm::CallInst>(addConvergenceControlToken(Call));
   return Call;
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 2e672442f281a..a4cd28f97b6d6 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -172,9 +172,8 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction 
&CGF,
 
   if (CGF.CGM.getTarget().getTriple().isDXIL()) {
     // Call DXIL intrinsic: returns { i32, i32, i32, i32 }
-    llvm::Function *Fn = CGF.CGM.getIntrinsic(Intrinsic::dx_wave_ballot, 
{I32});
-
-    Value *StructVal = CGF.EmitIntrinsicCall(Fn, Cond);
+    Value *StructVal =
+        CGF.EmitIntrinsicCall(Intrinsic::dx_wave_ballot, {I32}, {Cond});
     assert(StructVal->getType() == Struct4I32 &&
            "dx.wave.ballot must return {i32,i32,i32,i32}");
 
@@ -190,8 +189,7 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction 
&CGF,
   }
 
   if (CGF.CGM.getTarget().getTriple().isSPIRV())
-    return CGF.EmitIntrinsicCall(
-        CGF.CGM.getIntrinsic(Intrinsic::spv_subgroup_ballot), Cond);
+    return CGF.EmitIntrinsicCall(Intrinsic::spv_subgroup_ballot, {Cond});
 
   llvm_unreachable(
       "WaveActiveBallot is only supported for DXIL and SPIRV targets");
@@ -1288,9 +1286,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Intrinsic::ID IID =
         getPrefixCountBitsIntrinsic(getTarget().getTriple().getArch());
 
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), IID), ArrayRef{Op},
-        "hlsl.wave.prefix.bit.count");
+    return EmitIntrinsicCall(IID, ArrayRef{Op}, "hlsl.wave.prefix.bit.count");
   }
   case Builtin::BI__builtin_hlsl_select: {
     Value *OpCond = EmitScalarExpr(E->getArg(0));
@@ -1335,9 +1331,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Value *Op = EmitScalarExpr(E->getArg(0));
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic();
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), ID, {Op->getType()}),
-                             {Op});
+    return EmitIntrinsicCall(ID, {Op->getType()}, {Op});
   }
   case Builtin::BI__builtin_hlsl_wave_active_all_true: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1345,8 +1339,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "Intrinsic WaveActiveAllTrue operand must be a bool");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
+    return EmitIntrinsicCall(ID, {Op});
   }
   case Builtin::BI__builtin_hlsl_wave_active_any_true: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1354,8 +1347,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "Intrinsic WaveActiveAnyTrue operand must be a bool");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
+    return EmitIntrinsicCall(ID, {Op});
   }
   case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1364,9 +1356,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "representation");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), ID, {Op->getType()}),
-                             ArrayRef{Op}, "hlsl.wave.active.bit.or");
+    return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op},
+                             "hlsl.wave.active.bit.or");
   }
   case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1375,9 +1366,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "representation");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), ID, {Op->getType()}),
-                             ArrayRef{Op}, "hlsl.wave.active.bit.xor");
+    return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op},
+                             "hlsl.wave.active.bit.xor");
   }
   case Builtin::BI__builtin_hlsl_wave_active_bit_and: {
     Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1386,9 +1376,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
            "representation");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic();
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), ID, {Op->getType()}),
-                             ArrayRef{Op}, "hlsl.wave.active.bit.and");
+    return EmitIntrinsicCall(ID, {Op->getType()}, ArrayRef{Op},
+                             "hlsl.wave.active.bit.and");
   }
   case Builtin::BI__builtin_hlsl_wave_active_ballot: {
     [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
@@ -1400,9 +1389,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
-        ArrayRef{OpExpr});
+    return EmitIntrinsicCall(ID, ArrayRef{OpExpr});
   }
   case Builtin::BI__builtin_hlsl_wave_active_sum: {
     // Due to the use of variadic arguments, explicitly retrieve argument
@@ -1410,9 +1397,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Intrinsic::ID IID = getWaveActiveSumIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
 
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), IID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.wave.active.sum");
+    return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.wave.active.sum");
   }
   case Builtin::BI__builtin_hlsl_wave_active_product: {
     // Due to the use of variadic arguments, explicitly retrieve argument
@@ -1420,9 +1406,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     Intrinsic::ID IID = getWaveActiveProductIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
 
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), IID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.wave.active.product");
+    return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.wave.active.product");
   }
   case Builtin::BI__builtin_hlsl_wave_active_max: {
     // Due to the use of variadic arguments, explicitly retrieve argument
@@ -1434,9 +1419,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     else
       IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic();
 
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), IID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.wave.active.max");
+    return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.wave.active.max");
   }
   case Builtin::BI__builtin_hlsl_wave_active_min: {
     // Due to the use of variadic arguments, explicitly retrieve argument
@@ -1448,9 +1432,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     else
       IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic();
 
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), IID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.wave.active.min");
+    return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.wave.active.min");
   }
   case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
     // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
@@ -1458,8 +1441,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     // for the DirectX intrinsic and the demangled builtin name
     switch (CGM.getTarget().getTriple().getArch()) {
     case llvm::Triple::dxil:
-      return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-          &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
+      return EmitIntrinsicCall(Intrinsic::dx_wave_getlaneindex);
     case llvm::Triple::spirv:
       return EmitRuntimeCall(CGM.CreateRuntimeFunction(
           llvm::FunctionType::get(IntTy, {}, false),
@@ -1471,54 +1453,46 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
   }
   case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
     // Due to the use of variadic arguments we must explicitly retrieve them 
and
     // create our function type.
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Value *OpIndex = EmitScalarExpr(E->getArg(1));
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(
-            &CGM.getModule(), 
CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
-            {OpExpr->getType()}),
-        ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
+    return EmitIntrinsicCall(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
+                             {OpExpr->getType()}, ArrayRef{OpExpr, OpIndex},
+                             "hlsl.wave.readlane");
   }
   case Builtin::BI__builtin_hlsl_wave_prefix_sum: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID IID = getWavePrefixSumIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), IID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.wave.prefix.sum");
+    return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.wave.prefix.sum");
   }
   case Builtin::BI__builtin_hlsl_wave_prefix_product: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID IID = getWavePrefixProductIntrinsic(
         getTarget().getTriple().getArch(), E->getArg(0)->getType());
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), IID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.wave.prefix.product");
+    return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.wave.prefix.product");
   }
   case Builtin::BI__builtin_hlsl_quad_read_across_x: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic();
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), ID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.quad.read.across.x");
+    return EmitIntrinsicCall(ID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.quad.read.across.x");
   }
   case Builtin::BI__builtin_hlsl_quad_read_across_y: {
     Value *OpExpr = EmitScalarExpr(E->getArg(0));
     Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic();
-    return EmitIntrinsicCall(Intrinsic::getOrInsertDeclaration(
-                                 &CGM.getModule(), ID, {OpExpr->getType()}),
-                             ArrayRef{OpExpr}, "hlsl.quad.read.across.y");
+    return EmitIntrinsicCall(ID, {OpExpr->getType()}, ArrayRef{OpExpr},
+                             "hlsl.quad.read.across.y");
   }
   case Builtin::BI__builtin_hlsl_elementwise_sign: {
     auto *Arg0 = E->getArg(0);
@@ -1576,36 +1550,30 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     return handleHlslClip(E, this);
   case Builtin::BI__builtin_hlsl_all_memory_barrier: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: {
     Intrinsic::ID ID =
         CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_device_memory_barrier: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: {
     Intrinsic::ID ID =
         CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_group_memory_barrier: {
     Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
     Intrinsic::ID ID =
         CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
-    return EmitIntrinsicCall(
-        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+    return EmitIntrinsicCall(ID);
   }
   case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 7b2f0a5c5e3a2..77ca3e0fee84f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4610,9 +4610,13 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee,
                                   ArrayRef<llvm::Value *> args,
                                   const Twine &name = "");
-  llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee,
+  llvm::CallInst *EmitIntrinsicCall(llvm::Intrinsic::ID ID,
                                     const Twine &Name = "");
-  llvm::CallInst *EmitIntrinsicCall(llvm::FunctionCallee Callee,
+  llvm::CallInst *EmitIntrinsicCall(llvm::Intrinsic::ID ID,
+                                    ArrayRef<llvm::Value *> Args,
+                                    const Twine &Name = "");
+  llvm::CallInst *EmitIntrinsicCall(llvm::Intrinsic::ID ID,
+                                    ArrayRef<llvm::Type *> Types,
                                     ArrayRef<llvm::Value *> Args,
                                     const Twine &Name = "");
   llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee,

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HLSL] Use EmitIntrinsicCall instead of EmitRuntimeCall for intrinsic (PR #197380)

Reply via email to