================
@@ -26,3 +26,42 @@ __m512bh test_mm512_undefined_pbh(void) {
   // OGCG: ret <32 x bfloat> zeroinitializer
   return _mm512_undefined_pbh();
 }
+
+__mmask32 test_mm512_mask_fpclass_pbh_mask(__mmask32 __U, __m512bh __A) {
+  // CIR-LABEL: _mm512_mask_fpclass_pbh_mask
+  // CIR: %[[A:.*]] = cir.call_llvm_intrinsic "x86.avx10.fpclass.bf16.512"
+  // CIR: %[[B:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.bool>
+  // CIR: %[[C:.*]] = cir.binop(and, %[[A]], %[[B]]) : !cir.vector<32 x 
!cir.bool>
+  // CIR: cir.cast bitcast %[[C]] : !cir.vector<32 x !cir.bool> -> !u32i
+
+  // LLVM-LABEL: test_mm512_mask_fpclass_pbh_mask
+  // LLVM: %[[A:.*]] = call <32 x i1> @llvm.x86.avx10.fpclass.bf16.512
+  // LLVM: %[[B:.*]] = bitcast i32 {{.*}} to <32 x i1>
+  // LLVM: %[[C:.*]] = and <32 x i1> %[[A]], %[[B]]
+  // LLVM: bitcast <32 x i1> %[[C]] to i32
+
+  // OGCG-LABEL: test_mm512_mask_fpclass_pbh_mask
+  // OGCG: %[[A:.*]] = call <32 x i1> @llvm.x86.avx10.fpclass.bf16.512
+  // OGCG: %[[B:.*]] = bitcast i32 {{.*}} to <32 x i1>
+  // OGCG: %[[C:.*]] = and <32 x i1> %[[A]], %[[B]]
+  // OGCG: bitcast <32 x i1> %[[C]] to i32
+  return _mm512_mask_fpclass_pbh_mask(__U, __A, 4);
+}
+
+__mmask32 test_mm512_fpclass_pbh_mask(__m512bh __A) {
+  // CIR-LABEL: _mm512_fpclass_pbh_mask
+  // CIR: %[[A:.*]] = cir.call_llvm_intrinsic "x86.avx10.fpclass.bf16.512"
+  // CIR: %[[B:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.bool>
+  // CIR: %[[C:.*]] = cir.binop(and, %[[A]], %[[B]]) : !cir.vector<32 x 
!cir.bool>
+  // CIR: cir.cast bitcast %[[C]] : !cir.vector<32 x !cir.bool> -> !u32i
+
+  // LLVM-LABEL: test_mm512_fpclass_pbh_mask
+  // LLVM: %[[A:.*]] = call <32 x i1> @llvm.x86.avx10.fpclass.bf16.512
+  // LLVM: %[[B:.*]] = and <32 x i1> %[[A]], splat (i1 true)
----------------
andykaylor wrote:

It wasn't clear to me at first where the `-1` was coming from, but I see that 
it is from this definition in the header file:

```
#define _mm512_fpclass_pbh_mask(__A, imm)                                      \
  ((__mmask32)__builtin_ia32_vfpclassbf16512_mask(                             \
      (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
```

We should fold the -1. I'm looking into a couple of possible approaches to 
doing that.


https://github.com/llvm/llvm-project/pull/172813
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to