[clang] [llvm] [HLSL] Add WaveActiveBitAnd builtin function (PR #187149)

Joshua Batista via cfe-commits Tue, 17 Mar 2026 16:08:54 -0700

https://github.com/bob80905 created 
https://github.com/llvm/llvm-project/pull/187149


This PR adds the WaveActiveBitAnd HLSL function.
Fixes https://github.com/llvm/llvm-project/issues/99166

>From 19dd48a5484dbdfece4f0325ba19b6000de95f9f Mon Sep 17 00:00:00 2001
From: Joshua Batista <[email protected]>
Date: Tue, 17 Mar 2026 16:07:14 -0700
Subject: [PATCH] add wave active bit and functions

---
 clang/include/clang/Basic/Builtins.td         |  6 ++
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 11 +++
 clang/lib/CodeGen/CGHLSLRuntime.h             |  1 +
 .../lib/Headers/hlsl/hlsl_alias_intrinsics.h  | 34 ++++++++
 clang/lib/Sema/SemaHLSL.cpp                   |  1 +
 .../builtins/WaveActiveBitAnd-errors.hlsl     | 23 ++++++
 .../builtins/WaveActiveBitAnd.hlsl            | 82 +++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  1 +
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |  1 +
 llvm/lib/Target/DirectX/DXIL.td               |  4 +
 llvm/lib/Target/DirectX/DXILShaderFlags.cpp   |  1 +
 .../DirectX/DirectXTargetTransformInfo.cpp    |  1 +
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  3 +
 .../CodeGen/DirectX/ShaderFlags/wave-ops.ll   |  8 ++
 llvm/test/CodeGen/DirectX/WaveActiveBitAnd.ll | 19 +++++
 .../SPIRV/hlsl-intrinsics/WaveActiveBitAnd.ll | 32 ++++++++
 16 files changed, 228 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd-errors.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/WaveActiveBitAnd.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitAnd.ll

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index a1c99ccba7676..d4b6fbaa7640d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5192,6 +5192,12 @@ def HLSLWaveActiveBitXor : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void (...)";
 }
 
+def HLSLWaveActiveBitAnd : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_wave_active_bit_and"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void (...)";
+}
+
 def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_wave_active_ballot"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 80c590437309d..d36525c0b0a37 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -1279,6 +1279,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
                                &CGM.getModule(), ID, {Op->getType()}),
                            ArrayRef{Op}, "hlsl.wave.active.bit.xor");
   }
+  case Builtin::BI__builtin_hlsl_wave_active_bit_and: {
+    Value *Op = EmitScalarExpr(E->getArg(0));
+    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
+           "Intrinsic WaveActiveBitAnd operand must have an unsigned integer "
+           "representation");
+
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic();
+    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+                               &CGM.getModule(), ID, {Op->getType()}),
+                           ArrayRef{Op}, "hlsl.wave.active.bit.and");
+  }
   case Builtin::BI__builtin_hlsl_wave_active_ballot: {
     [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
     assert(Op->getType()->isIntegerTy(1) &&
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h 
b/clang/lib/CodeGen/CGHLSLRuntime.h
index b1c24ab23fea0..53e670a8a1db6 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -149,6 +149,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitOr, wave_reduce_or)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitXor, wave_reduce_xor)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitAnd, wave_reduce_and)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMax, wave_reduce_max)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveUMax, wave_reduce_umax)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMin, wave_reduce_min)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index dd46c94d8b5ba..ae96170948b4a 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -2863,6 +2863,40 @@ _HLSL_AVAILABILITY(shadermodel, 6.0)
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
 __attribute__((convergent)) uint64_t4 WaveActiveBitXor(uint64_t4);
 
+//===----------------------------------------------------------------------===//
+// WaveActiveBitAnd builtins
+//===----------------------------------------------------------------------===//
+
+// \brief Returns the bitwise And of all the values of <expr> across all active
+// non-helper lanes in the current wave, and replicates it back to
+// all active non-helper lanes.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint WaveActiveBitAnd(uint);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint2 WaveActiveBitAnd(uint2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint3 WaveActiveBitAnd(uint3);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint4 WaveActiveBitAnd(uint4);
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint64_t WaveActiveBitAnd(uint64_t);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint64_t2 WaveActiveBitAnd(uint64_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint64_t3 WaveActiveBitAnd(uint64_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_and)
+__attribute__((convergent)) uint64_t4 WaveActiveBitAnd(uint64_t4);
+
 
//===----------------------------------------------------------------------===//
 // WaveActiveMax builtins
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 3babf64822f10..93deff188d774 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4125,6 +4125,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
     TheCall->setType(ArgTyExpr);
     break;
   }
+  case Builtin::BI__builtin_hlsl_wave_active_bit_and:
   case Builtin::BI__builtin_hlsl_wave_active_bit_xor:
   case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
     if (SemaRef.checkArgCount(TheCall, 1))
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd-errors.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd-errors.hlsl
new file mode 100644
index 0000000000000..bd38558ef8d9c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd-errors.hlsl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+uint test_too_few_arg() {
+  return __builtin_hlsl_wave_active_bit_and();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 
0}}
+}
+
+uint test_too_many_arg(uint p0) {
+  return __builtin_hlsl_wave_active_bit_and(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 
2}}
+}
+
+struct S { uint x; };
+
+uint test_expr_struct_type_check(S p0) {
+  return __builtin_hlsl_wave_active_bit_and(p0);
+  // expected-error@-1 {{invalid operand of type 'S' where a scalar or vector 
is required}}
+}
+
+bool test_expr_bool_type_check(bool p0) {
+  return __builtin_hlsl_wave_active_bit_and(p0);
+  // expected-error@-1 {{invalid operand of type 'bool'}}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl 
b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl
new file mode 100644
index 0000000000000..704cab1a30da6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitAnd.hlsl
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o 
- | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,DXCHECK -DCALL="call"
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func"
+
+// Test basic lowering to runtime function call.
+
+// CHECK-LABEL: test_uint
+uint test_uint(uint expr) {
+  // DXCHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF:dx]].wave.reduce.and.i32([[TY]] %[[#]])
+  // SPVCHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF:spv]].wave.reduce.and.i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint2
+uint2 test_uint2(uint2 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF]].wave.reduce.and.v2i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.v2i32([[TY]]) 
#[[#attr:]]
+
+// CHECK-LABEL: test_uint3
+uint3 test_uint3(uint3 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF]].wave.reduce.and.v3i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.v3i32([[TY]]) 
#[[#attr:]]
+
+// CHECK-LABEL: test_uint4
+uint4 test_uint4(uint4 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF]].wave.reduce.and.v4i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.v4i32([[TY]]) 
#[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t
+uint64_t test_uint64_t(uint64_t expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF]].wave.reduce.and.i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.i64([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t2
+uint64_t2 test_uint64_t2(uint64_t2 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF]].wave.reduce.and.v2i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.v2i64([[TY]]) 
#[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t3
+uint64_t3 test_uint64_t3(uint64_t3 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF]].wave.reduce.and.v3i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.v3i64([[TY]]) 
#[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t4
+uint64_t4 test_uint64_t4(uint64_t4 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] 
@llvm.[[ICF]].wave.reduce.and.v4i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitAnd(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.and.v4i64([[TY]]) 
#[[#attr:]]
\ No newline at end of file
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td 
b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index a65c5e17754e1..04caf3e570b7e 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -238,6 +238,7 @@ def int_dx_wave_ballot : 
DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>
 def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], 
[IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_reduce_and : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td 
b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index facea14cea9b1..2ed5d13eb36af 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -133,6 +133,7 @@ def int_spv_rsqrt : 
DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
   def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], 
[IntrConvergent, IntrNoMem]>;
   def int_spv_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
   def int_spv_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
+  def int_spv_wave_reduce_and : DefaultAttrsIntrinsic<[llvm_anyint_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
   def int_spv_subgroup_ballot : 
ClangBuiltin<"__builtin_spirv_subgroup_ballot">,
     DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, 
IntrNoMem]>;
   def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], 
[LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 9bc26a79d2d26..edc5f672e9e32 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1158,6 +1158,10 @@ def WaveActiveBit : DXILOp<120, waveActiveBit> {
                  [
                    IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Xor>,
                  ]>,
+    IntrinSelect<int_dx_wave_reduce_and,
+                 [
+                   IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_And>,
+                 ]>
   ];
 
   let arguments = [OverloadTy, Int8Ty];
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp 
b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
index 50d08b3a66dc1..fa2141fd23062 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
@@ -93,6 +93,7 @@ static bool checkWaveOps(Intrinsic::ID IID) {
   // Wave Active Op Variants
   case Intrinsic::dx_wave_reduce_or:
   case Intrinsic::dx_wave_reduce_xor:
+  case Intrinsic::dx_wave_reduce_and:
   case Intrinsic::dx_wave_reduce_sum:
   case Intrinsic::dx_wave_reduce_usum:
   case Intrinsic::dx_wave_product:
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp 
b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 7c1ef63a2d21c..8f0d0dbd2a0c9 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -64,6 +64,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_wave_readlane:
   case Intrinsic::dx_wave_reduce_or:
   case Intrinsic::dx_wave_reduce_xor:
+  case Intrinsic::dx_wave_reduce_and:
   case Intrinsic::dx_wave_reduce_max:
   case Intrinsic::dx_wave_reduce_min:
   case Intrinsic::dx_wave_reduce_sum:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp 
b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 29beb0c3c3653..6045b0629e827 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -4408,6 +4408,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register 
ResVReg,
   case Intrinsic::spv_wave_reduce_xor:
     return selectWaveReduceOp(ResVReg, ResType, I,
                               SPIRV::OpGroupNonUniformBitwiseXor);
+  case Intrinsic::spv_wave_reduce_and:
+    return selectWaveReduceOp(ResVReg, ResType, I,
+                              SPIRV::OpGroupNonUniformBitwiseAnd);
   case Intrinsic::spv_wave_reduce_umax:
     return selectWaveReduceMax(ResVReg, ResType, I, /*IsUnsigned*/ true);
   case Intrinsic::spv_wave_reduce_max:
diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll 
b/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
index 55db9a391a2bc..c5a3f4e853d76 100644
--- a/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
+++ b/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
@@ -56,6 +56,14 @@ entry:
   ret i32 %ret
 }
 
+define noundef i32 @wave_bit_and(i32 %x) {
+entry:
+  ; CHECK: Function wave_bit_and : [[WAVE_FLAG]]
+  %ret = call i32 @llvm.dx.wave.reduce.and(i32 %x)
+  ret i32 %ret
+}
+
+
 define noundef i1 @wave_all_equal(i1 %x) {
 entry:
   ; CHECK: Function wave_all_equal : [[WAVE_FLAG]]
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveBitAnd.ll 
b/llvm/test/CodeGen/DirectX/WaveActiveBitAnd.ll
new file mode 100644
index 0000000000000..7399415c09605
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/WaveActiveBitAnd.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -scalarizer -dxil-op-lower 
-mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
+
+define noundef i32 @wave_bitand_simple(i32 noundef %p1) {
+entry:
+; CHECK: call i32 @dx.op.waveActiveBit.i32(i32 120, i32 %p1, i8 0){{$}}
+  %ret = call i32 @llvm.dx.wave.reduce.and.i32(i32 %p1)
+  ret i32 %ret
+}
+
+declare i32 @llvm.dx.wave.reduce.and.i32(i32)
+
+define noundef i64 @wave_bitand_simple64(i64 noundef %p1) {
+entry:
+; CHECK: call i64 @dx.op.waveActiveBit.i64(i32 120, i64 %p1, i8 0){{$}}
+  %ret = call i64 @llvm.dx.wave.reduce.and.i64(i64 %p1)
+  ret i64 %ret
+}
+
+declare i64 @llvm.dx.wave.reduce.and.i64(i64)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitAnd.ll 
b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitAnd.ll
new file mode 100644
index 0000000000000..e5626fd612e2a
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitAnd.ll
@@ -0,0 +1,32 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-unknown %s -o - | 
FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - 
-filetype=obj | spirv-val --target-env spv1.4 %}
+
+; Test lowering to spir-v backend for various types and scalar/vector
+
+; CHECK: OpCapability GroupNonUniformArithmetic
+
+; CHECK-DAG:   %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG:   %[[#uint64:]] = OpTypeInt 64 0
+; CHECK-DAG:   %[[#scope:]] = OpConstant %[[#uint]] 3
+
+; CHECK-LABEL: Begin function test_uint
+; CHECK:   %[[#iexpr:]] = OpFunctionParameter %[[#uint]]
+define i32 @test_uint(i32 %iexpr) {
+entry:
+; CHECK:   %[[#iret:]] = OpGroupNonUniformBitwiseAnd %[[#uint]] %[[#scope]] 
Reduce %[[#iexpr]]
+  %0 = call i32 @llvm.spv.wave.reduce.and.i32(i32 %iexpr)
+  ret i32 %0
+}
+
+declare i32 @llvm.spv.wave.reduce.and.i32(i32)
+
+; CHECK-LABEL: Begin function test_uint64
+; CHECK:   %[[#iexpr64:]] = OpFunctionParameter %[[#uint64]]
+define i64 @test_uint64(i64 %iexpr64) {
+entry:
+; CHECK:   %[[#iret:]] = OpGroupNonUniformBitwiseAnd %[[#uint64]] %[[#scope]] 
Reduce %[[#iexpr64]]
+  %0 = call i64 @llvm.spv.wave.reduce.and.i64(i64 %iexpr64)
+  ret i64 %0
+}
+
+declare i64 @llvm.spv.wave.reduce.and.i64(i64)
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [HLSL] Add WaveActiveBitAnd builtin function (PR #187149)

Reply via email to