[clang] [llvm] [RISCV][P-ext] Support Packed Merge. (PR #207110)

via cfe-commits Wed, 01 Jul 2026 19:31:44 -0700

llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: TelGome

<details>
<summary>Changes</summary>

This pr support RISC-V P extension intrinsics [Packed 
Merge](https://github.com/riscv/riscv-p-spec/blob/master/P-ext-intrinsics.adoc#packed-merge).

---

Patch is 45.43 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/207110.diff


9 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsRISCV.td (+14) 
- (modified) clang/lib/CodeGen/TargetBuiltins/RISCV.cpp (+24-1) 
- (modified) clang/lib/Headers/riscv_packed_simd.h (+21) 
- (modified) clang/test/CodeGen/RISCV/rvp-intrinsics.c (+244) 
- (modified) cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c 
(+198) 
- (modified) llvm/include/llvm/IR/IntrinsicsRISCV.td (+8) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+48-4) 
- (modified) llvm/test/CodeGen/RISCV/rvp-simd-32.ll (+109) 
- (modified) llvm/test/CodeGen/RISCV/rvp-simd-64.ll (+271) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td 
b/clang/include/clang/Basic/BuiltinsRISCV.td
index 3f84528fdca70..785e41324ad78 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -233,6 +233,20 @@ def predsumu_u16x4_u64 : RISCVBuiltin<"uint64_t(_Vector<4, 
unsigned short>, uint
 def predsum_i32x2_i64 : RISCVBuiltin<"int64_t(_Vector<2, int>, int64_t)">;
 def predsumu_u32x2_u64 : RISCVBuiltin<"uint64_t(_Vector<2, unsigned int>, 
uint64_t)">;
 
+// Packed Merge (32-bit)
+def pmerge_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned 
char>, _Vector<4, unsigned char>, _Vector<4, unsigned char>)">;
+def pmerge_i8x4 : RISCVBuiltin<"_Vector<4, signed char>(_Vector<4, signed 
char>, _Vector<4, signed char>, _Vector<4, unsigned char>)">;
+def pmerge_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, 
unsigned short>, _Vector<2, unsigned short>, _Vector<2, unsigned short>)">;
+def pmerge_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, 
_Vector<2, short>, _Vector<2, unsigned short>)">;
+
+// Packed Merge (64-bit)
+def pmerge_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned 
char>, _Vector<8, unsigned char>, _Vector<8, unsigned char>)">;
+def pmerge_i8x8 : RISCVBuiltin<"_Vector<8, signed char>(_Vector<8, signed 
char>, _Vector<8, signed char>, _Vector<8, unsigned char>)">;
+def pmerge_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, 
unsigned short>, _Vector<4, unsigned short>, _Vector<4, unsigned short>)">;
+def pmerge_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, 
_Vector<4, short>, _Vector<4, unsigned short>)">;
+def pmerge_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned 
int>, _Vector<2, unsigned int>, _Vector<2, unsigned int>)">;
+def pmerge_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, 
int>, _Vector<2, unsigned int>)">;
+
 } // Features = "experimental-p"
 
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp 
b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index bb8fa86e7a564..588e229499082 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -1247,7 +1247,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned 
BuiltinID,
   case RISCV::BI__builtin_riscv_pabdu_u8x4:
   case RISCV::BI__builtin_riscv_pabdu_u16x2:
   case RISCV::BI__builtin_riscv_pabdu_u8x8:
-  case RISCV::BI__builtin_riscv_pabdu_u16x4: {
+  case RISCV::BI__builtin_riscv_pabdu_u16x4:
+  // Packed Merge
+  case RISCV::BI__builtin_riscv_pmerge_u8x4:
+  case RISCV::BI__builtin_riscv_pmerge_i8x4:
+  case RISCV::BI__builtin_riscv_pmerge_u16x2:
+  case RISCV::BI__builtin_riscv_pmerge_i16x2:
+  case RISCV::BI__builtin_riscv_pmerge_u8x8:
+  case RISCV::BI__builtin_riscv_pmerge_i8x8:
+  case RISCV::BI__builtin_riscv_pmerge_u16x4:
+  case RISCV::BI__builtin_riscv_pmerge_i16x4:
+  case RISCV::BI__builtin_riscv_pmerge_u32x2:
+  case RISCV::BI__builtin_riscv_pmerge_i32x2: {
     switch (BuiltinID) {
     default:
       llvm_unreachable("unexpected builtin ID");
@@ -1321,6 +1332,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned 
BuiltinID,
     case RISCV::BI__builtin_riscv_pabdu_u16x4:
       ID = Intrinsic::riscv_pabdu;
       break;
+    case RISCV::BI__builtin_riscv_pmerge_u8x4:
+    case RISCV::BI__builtin_riscv_pmerge_i8x4:
+    case RISCV::BI__builtin_riscv_pmerge_u16x2:
+    case RISCV::BI__builtin_riscv_pmerge_i16x2:
+    case RISCV::BI__builtin_riscv_pmerge_u8x8:
+    case RISCV::BI__builtin_riscv_pmerge_i8x8:
+    case RISCV::BI__builtin_riscv_pmerge_u16x4:
+    case RISCV::BI__builtin_riscv_pmerge_i16x4:
+    case RISCV::BI__builtin_riscv_pmerge_u32x2:
+    case RISCV::BI__builtin_riscv_pmerge_i32x2:
+      ID = Intrinsic::riscv_pmerge;
+      break;
     }
 
     IntrinsicTypes = {ResultType};
diff --git a/clang/lib/Headers/riscv_packed_simd.h 
b/clang/lib/Headers/riscv_packed_simd.h
index c61e156ca6a7f..cdfa7ec5812ba 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -109,6 +109,12 @@ typedef uint32_t uint32x2_t 
__attribute__((__vector_size__(8)));
     return builtin(__rs1, __rs2);                                              
\
   }
 
+#define __packed_merge_builtin(name, ty, mask_ty, builtin)                     
\
+  static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(                      
\
+      ty __rs1, ty __rs2, mask_ty __rd) {                                      
\
+    return (ty)builtin(__rs1, __rs2, __rd);                                    
\
+  }
+
 // clang-format off: macro call sites have no trailing semicolons, which
 // confuses clang-format into a deeply nested expression.
 
@@ -448,6 +454,20 @@ __packed_reduction(predsumu_u16x4_u64, uint64_t, 
uint16x4_t, __builtin_riscv_pre
 __packed_reduction(predsum_i32x2_i64, int64_t, int32x2_t, 
__builtin_riscv_predsum_i32x2_i64)
 __packed_reduction(predsumu_u32x2_u64, uint64_t, uint32x2_t, 
__builtin_riscv_predsumu_u32x2_u64)
 
+/* Packed Merge (32-bit) */
+__packed_merge_builtin(pmerge_u8x4, uint8x4_t, uint8x4_t, 
__builtin_riscv_pmerge_u8x4)
+__packed_merge_builtin(pmerge_i8x4, int8x4_t, uint8x4_t, 
__builtin_riscv_pmerge_i8x4)
+__packed_merge_builtin(pmerge_u16x2, uint16x2_t, uint16x2_t, 
__builtin_riscv_pmerge_u16x2)
+__packed_merge_builtin(pmerge_i16x2, int16x2_t, uint16x2_t, 
__builtin_riscv_pmerge_i16x2)
+
+/* Packed Merge (64-bit) */
+__packed_merge_builtin(pmerge_u8x8, uint8x8_t, uint8x8_t, 
__builtin_riscv_pmerge_u8x8)
+__packed_merge_builtin(pmerge_i8x8, int8x8_t, uint8x8_t, 
__builtin_riscv_pmerge_i8x8)
+__packed_merge_builtin(pmerge_u16x4, uint16x4_t, uint16x4_t, 
__builtin_riscv_pmerge_u16x4)
+__packed_merge_builtin(pmerge_i16x4, int16x4_t, uint16x4_t, 
__builtin_riscv_pmerge_i16x4)
+__packed_merge_builtin(pmerge_u32x2, uint32x2_t, uint32x2_t, 
__builtin_riscv_pmerge_u32x2)
+__packed_merge_builtin(pmerge_i32x2, int32x2_t, uint32x2_t, 
__builtin_riscv_pmerge_i32x2)
+
 // clang-format on
 
 #undef __packed_splat2
@@ -468,6 +488,7 @@ __packed_reduction(predsumu_u32x2_u64, uint64_t, 
uint32x2_t, __builtin_riscv_pre
 #undef __packed_pabs
 #undef __packed_binary_builtin_cast
 #undef __packed_reduction
+#undef __packed_merge_builtin
 #undef __DEFAULT_FN_ATTRS
 
 #if defined(__cplusplus)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c 
b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 290f61787ceff..ec555e52d62fe 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -6143,3 +6143,247 @@ int64_t test_predsum_i32x2_i64(int32x2_t rs1, int64_t 
rs2) {
 uint64_t test_predsumu_u32x2_u64(uint32x2_t rs1, uint64_t rs2) {
   return __riscv_predsumu_u32x2_u64(rs1, rs2);
 }
+
+// Packed Merge (32-bit)
+
+// RV32-LABEL: define dso_local i32 @test_pmerge_u8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32
+// RV32-NEXT:    ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmerge_u8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32
+// RV64-NEXT:    ret i32 [[TMP4]]
+//
+uint8x4_t test_pmerge_u8x4(uint8x4_t rs1, uint8x4_t rs2, uint8x4_t rd) {
+  return __riscv_pmerge_u8x4(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pmerge_i8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32
+// RV32-NEXT:    ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmerge_i8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.riscv.pmerge.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]], <4 x i8> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <4 x i8> [[TMP3]] to i32
+// RV64-NEXT:    ret i32 [[TMP4]]
+//
+int8x4_t test_pmerge_i8x4(int8x4_t rs1, int8x4_t rs2, uint8x4_t rd) {
+  return __riscv_pmerge_i8x4(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pmerge_u16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32
+// RV32-NEXT:    ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmerge_u16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32
+// RV64-NEXT:    ret i32 [[TMP4]]
+//
+uint16x2_t test_pmerge_u16x2(uint16x2_t rs1, uint16x2_t rs2, uint16x2_t rd) {
+  return __riscv_pmerge_u16x2(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pmerge_i16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32
+// RV32-NEXT:    ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmerge_i16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]], 
i32 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i32 [[RD_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP3:%.*]] = call <2 x i16> @llvm.riscv.pmerge.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <2 x i16> [[TMP3]] to i32
+// RV64-NEXT:    ret i32 [[TMP4]]
+//
+int16x2_t test_pmerge_i16x2(int16x2_t rs1, int16x2_t rs2, uint16x2_t rd) {
+  return __riscv_pmerge_i16x2(rs1, rs2, rd);
+}
+
+// Packed Merge (64-bit)
+
+// RV32-LABEL: define dso_local i64 @test_pmerge_u8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64
+// RV32-NEXT:    ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmerge_u8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64
+// RV64-NEXT:    ret i64 [[TMP4]]
+//
+uint8x8_t test_pmerge_u8x8(uint8x8_t rs1, uint8x8_t rs2, uint8x8_t rd) {
+  return __riscv_pmerge_u8x8(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmerge_i8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64
+// RV32-NEXT:    ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmerge_i8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.riscv.pmerge.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to i64
+// RV64-NEXT:    ret i64 [[TMP4]]
+//
+int8x8_t test_pmerge_i8x8(int8x8_t rs1, int8x8_t rs2, uint8x8_t rd) {
+  return __riscv_pmerge_i8x8(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmerge_u16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64
+// RV32-NEXT:    ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmerge_u16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64
+// RV64-NEXT:    ret i64 [[TMP4]]
+//
+uint16x4_t test_pmerge_u16x4(uint16x4_t rs1, uint16x4_t rs2, uint16x4_t rd) {
+  return __riscv_pmerge_u16x4(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmerge_i16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64
+// RV32-NEXT:    ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmerge_i16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.riscv.pmerge.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to i64
+// RV64-NEXT:    ret i64 [[TMP4]]
+//
+int16x4_t test_pmerge_i16x4(int16x4_t rs1, int16x4_t rs2, uint16x4_t rd) {
+  return __riscv_pmerge_i16x4(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmerge_u32x2(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.riscv.pmerge.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+// RV32-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64
+// RV32-NEXT:    ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmerge_u32x2(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.riscv.pmerge.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+// RV64-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64
+// RV64-NEXT:    ret i64 [[TMP4]]
+//
+uint32x2_t test_pmerge_u32x2(uint32x2_t rs1, uint32x2_t rs2, uint32x2_t rd) {
+  return __riscv_pmerge_u32x2(rs1, rs2, rd);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmerge_i32x2(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]], 
i64 noundef [[RD_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP2:%.*]] = bitcast i64 [[RD_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.riscv.pmerge.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]], <2...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/207110
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [RISCV][P-ext] Support Packed Merge. (PR #207110)

Reply via email to