llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Julian Pokrovsky (raventid)

<details>
<summary>Changes</summary>

This implementation is adapted from the existing code for 
`X86::BI__builtin_ia32_shuf_i*` and `X86::BI__builtin_ia32_shuf_f*` from 
`/llvm-project/clang/lib/CodeGen/TargetBuiltins/X86.cpp`.

It adds support for the following X86 builtins:
- __builtin_ia32_shuf_f32x4
- __builtin_ia32_shuf_f64x2
- __builtin_ia32_shuf_i32x4
- __builtin_ia32_shuf_i64x2
- __builtin_ia32_shuf_f32x4_256
- __builtin_ia32_shuf_f64x2_256
- __builtin_ia32_shuf_i32x4_256
- __builtin_ia32_shuf_i64x2_256

Part of https://github.com/llvm/llvm-project/issues/167765

---
Full diff: https://github.com/llvm/llvm-project/pull/172960.diff


3 Files Affected:

- (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+28-1) 
- (modified) clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c (+53) 
- (modified) clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c (+52) 


``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 32ae1957d3c6a..cd12b37d51029 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1247,7 +1247,34 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_shuf_f32x4:
   case X86::BI__builtin_ia32_shuf_f64x2:
   case X86::BI__builtin_ia32_shuf_i32x4:
-  case X86::BI__builtin_ia32_shuf_i64x2:
+  case X86::BI__builtin_ia32_shuf_i64x2: {
+    mlir::Value src1 = ops[0];
+    mlir::Value src2 = ops[1];
+
+    unsigned imm =
+        ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
+
+    unsigned numElems = cast<cir::VectorType>(src1.getType()).getSize();
+    unsigned totalBits = getContext().getTypeSize(expr->getArg(0)->getType());
+    unsigned numLanes = totalBits == 512 ? 4 : 2;
+    unsigned numElemsPerLane = numElems / numLanes;
+
+    SmallVector<mlir::Attribute, 16> indices;
+    mlir::Type i32Ty = builder.getSInt32Ty();
+
+    for (unsigned l = 0; l != numElems; l += numElemsPerLane) {
+      unsigned index = (imm % numLanes) * numElemsPerLane;
+      imm /= numLanes;
+      if (l >= (numElems / 2))
+        index += numElems;
+      for (unsigned i = 0; i != numElemsPerLane; ++i) {
+        indices.push_back(cir::IntAttr::get(i32Ty, index + i));
+      }
+    }
+
+    return builder.createVecShuffle(getLoc(expr->getExprLoc()), src1, src2,
+                                    indices);
+  }
   case X86::BI__builtin_ia32_vperm2f128_pd256:
   case X86::BI__builtin_ia32_vperm2f128_ps256:
   case X86::BI__builtin_ia32_vperm2f128_si256:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index c7443c82546ae..fc4a65136df42 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -982,3 +982,56 @@ int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
   // OGCG: zext i1 %[[CMP]] to i32
   return _mm512_kortestz(__A,__B);
 }
+
+__m512 test_mm512_shuffle_f32x4(__m512 a, __m512 b) {
+  // CIR-LABEL: test_mm512_shuffle_f32x4
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<16 x !cir.float>)
+  // CIR-SAME: [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : 
!s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, 
#cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> 
: !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, 
#cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i]
+
+  // LLVM-LABEL: test_mm512_shuffle_f32x4
+  // LLVM: shufflevector <16 x float> %{{.+}}, <16 x float> %{{.+}}, <16 x 
i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 
17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  // OGCG-LABEL: test_mm512_shuffle_f32x4
+  // OGCG: shufflevector <16 x float> %{{.+}}, <16 x float> %{{.+}}, <16 x 
i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 
17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_shuffle_f32x4(a, b, 0x4E); // 2, 3, 0, 1
+}
+
+__m512i test_mm512_shuffle_i32x4(__m512i a, __m512i b) {
+  // CIR-LABEL: test_mm512_shuffle_i32x4
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<16 x !s32i>)
+  // CIR-SAME: [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : 
!s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, 
#cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> 
: !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, 
#cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i]
+
+  // LLVM-LABEL: test_mm512_shuffle_i32x4
+  // LLVM: shufflevector <16 x i32> %{{.+}}, <16 x i32> %{{.+}}, <16 x i32> 
<i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, 
i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  // OGCG-LABEL: test_mm512_shuffle_i32x4
+  // OGCG: shufflevector <16 x i32> %{{.+}}, <16 x i32> %{{.+}}, <16 x i32> 
<i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, 
i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_shuffle_i32x4(a, b, 0x4E);
+}
+
+__m512d test_mm512_shuffle_f64x2(__m512d a, __m512d b) {
+  // CIR-LABEL: test_mm512_shuffle_f64x2
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.double>)
+  // CIR-SAME: [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : 
!s32i, #cir.int<11> : !s32i]
+
+  // LLVM-LABEL: test_mm512_shuffle_f64x2
+  // LLVM: shufflevector <8 x double> %{{.+}}, <8 x double> %{{.+}}, <8 x i32> 
<i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+
+  // OGCG-LABEL: test_mm512_shuffle_f64x2
+  // OGCG: shufflevector <8 x double> %{{.+}}, <8 x double> %{{.+}}, <8 x i32> 
<i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  return _mm512_shuffle_f64x2(a, b, 0x4E);
+}
+
+__m512i test_mm512_shuffle_i64x2(__m512i a, __m512i b) {
+  // CIR-LABEL: test_mm512_shuffle_i64x2
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !s64i>)
+  // CIR-SAME: [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : 
!s32i, #cir.int<11> : !s32i]
+
+  // LLVM-LABEL: test_mm512_shuffle_i64x2
+  // LLVM: shufflevector <8 x i64> %{{.+}}, <8 x i64> %{{.+}}, <8 x i32> <i32 
4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+
+  // OGCG-LABEL: test_mm512_shuffle_i64x2
+  // OGCG: shufflevector <8 x i64> %{{.+}}, <8 x i64> %{{.+}}, <8 x i32> <i32 
4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  return _mm512_shuffle_i64x2(a, b, 0x4E);
+}
+
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
index 9ba3e19d41566..29363d6c2d23e 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
@@ -232,3 +232,55 @@ __m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) 
{
   return _mm_maskz_expand_pd(__U,__A);
 }
 
+
+__m256 test_mm256_shuffle_f32x4(__m256 a, __m256 b) {
+  // CIR-LABEL: test_mm256_shuffle_f32x4
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.float>)
+  // CIR-SAME: [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : 
!s32i, #cir.int<15> : !s32i]
+
+  // LLVM-LABEL: test_mm256_shuffle_f32x4
+  // LLVM: shufflevector <8 x float> %{{.+}}, <8 x float> %{{.+}}, <8 x i32> 
<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm256_shuffle_f32x4
+  // OGCG: shufflevector <8 x float> %{{.+}}, <8 x float> %{{.+}}, <8 x i32> 
<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+  return _mm256_shuffle_f32x4(a, b, 0x03); // 1, 1
+}
+
+__m256d test_mm256_shuffle_f64x2(__m256d a, __m256d b) {
+  // CIR-LABEL: test_mm256_shuffle_f64x2
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<4 x !cir.double>)
+  // CIR-SAME: [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i]
+
+  // LLVM-LABEL: test_mm256_shuffle_f64x2
+  // LLVM: shufflevector <4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x i32> 
<i32 2, i32 3, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm256_shuffle_f64x2
+  // OGCG: shufflevector <4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x i32> 
<i32 2, i32 3, i32 6, i32 7>
+  return _mm256_shuffle_f64x2(a, b, 0x03);
+}
+
+__m256i test_mm256_shuffle_i32x4(__m256i a, __m256i b) {
+  // CIR-LABEL: test_mm256_shuffle_i32x4
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !s32i>)
+  // CIR-SAME: [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : 
!s32i, #cir.int<15> : !s32i]
+
+  // LLVM-LABEL: test_mm256_shuffle_i32x4
+  // LLVM: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> <i32 
4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm256_shuffle_i32x4
+  // OGCG: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> <i32 
4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+  return _mm256_shuffle_i32x4(a, b, 0x03);
+}
+
+__m256i test_mm256_shuffle_i64x2(__m256i a, __m256i b) {
+  // CIR-LABEL: test_mm256_shuffle_i64x2
+  // CIR:   cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<4 x !s64i>)
+  // CIR-SAME: [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i]
+
+  // LLVM-LABEL: test_mm256_shuffle_i64x2
+  // LLVM: shufflevector <4 x i64> %{{.+}}, <4 x i64> %{{.+}}, <4 x i32> <i32 
2, i32 3, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm256_shuffle_i64x2
+  // OGCG: shufflevector <4 x i64> %{{.+}}, <4 x i64> %{{.+}}, <4 x i32> <i32 
2, i32 3, i32 6, i32 7>
+  return _mm256_shuffle_i64x2(a, b, 0x03);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/172960
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to