Author: TelGome
Date: 2026-06-12T10:59:32+08:00
New Revision: 56f8fbb0149146f50aa8c905467b9384e8cb7bad

URL: 
https://github.com/llvm/llvm-project/commit/56f8fbb0149146f50aa8c905467b9384e8cb7bad
DIFF: 
https://github.com/llvm/llvm-project/commit/56f8fbb0149146f50aa8c905467b9384e8cb7bad.diff

LOG: [RISCV][P-ext] Support Packed Averaging Addition and Subtraction 
intrinsics(#203147)

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsRISCV.td
    clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
    clang/lib/Headers/riscv_packed_simd.h
    clang/test/CodeGen/RISCV/rvp-intrinsics.c
    cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
    llvm/include/llvm/IR/IntrinsicsRISCV.td
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoP.td
    llvm/test/CodeGen/RISCV/rvp-simd-32.ll
    llvm/test/CodeGen/RISCV/rvp-simd-64.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsRISCV.td 
b/clang/include/clang/Basic/BuiltinsRISCV.td
index 2dad5ede2d64b..185269bfc6d85 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -153,6 +153,36 @@ def ntl_store : RISCVBuiltin<"void(...)">;
 let Features = "zihintpause", Attributes = [NoThrow] in
 def pause : RISCVBuiltin<"void()">;
 
+//===----------------------------------------------------------------------===//
+// P extension.
+//===----------------------------------------------------------------------===//
+let Features = "experimental-p", Attributes = [NoThrow, Const] in {
+// Packed Averaging Addition and Subtraction (32-bit)
+def paadd_i8x4 : RISCVBuiltin<"_Vector<4, signed char>(_Vector<4, signed 
char>, _Vector<4, signed char>)">;
+def paadd_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, 
_Vector<2, short>)">;
+def paaddu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned 
char>, _Vector<4, unsigned char>)">;
+def paaddu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, 
unsigned short>, _Vector<2, unsigned short>)">;
+def pasub_i8x4 : RISCVBuiltin<"_Vector<4, signed char>(_Vector<4, signed 
char>, _Vector<4, signed char>)">;
+def pasub_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, 
_Vector<2, short>)">;
+def pasubu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned 
char>, _Vector<4, unsigned char>)">;
+def pasubu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, 
unsigned short>, _Vector<2, unsigned short>)">;
+
+// Packed Averaging Addition and Subtraction (64-bit)
+def paadd_i8x8 : RISCVBuiltin<"_Vector<8, signed char>(_Vector<8, signed 
char>, _Vector<8, signed char>)">;
+def paadd_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, 
_Vector<4, short>)">;
+def paadd_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, 
int>)">;
+def paaddu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned 
char>, _Vector<8, unsigned char>)">;
+def paaddu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, 
unsigned short>, _Vector<4, unsigned short>)">;
+def paaddu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned 
int>, _Vector<2, unsigned int>)">;
+def pasub_i8x8 : RISCVBuiltin<"_Vector<8, signed char>(_Vector<8, signed 
char>, _Vector<8, signed char>)">;
+def pasub_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, 
_Vector<4, short>)">;
+def pasub_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, 
int>)">;
+def pasubu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned 
char>, _Vector<8, unsigned char>)">;
+def pasubu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, 
unsigned short>, _Vector<4, unsigned short>)">;
+def pasubu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned 
int>, _Vector<2, unsigned int>)">;
+
+} // Features = "experimental-p"
+
 
//===----------------------------------------------------------------------===//
 // XCV extensions.
 
//===----------------------------------------------------------------------===//

diff  --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp 
b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 3bf7dd07d54d3..8c0684110dad7 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -1199,6 +1199,64 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned 
BuiltinID,
     break;
   }
 
+  // Packed Averaging Addition and Subtraction
+  case RISCV::BI__builtin_riscv_paadd_i8x4:
+  case RISCV::BI__builtin_riscv_paadd_i16x2:
+  case RISCV::BI__builtin_riscv_paadd_i8x8:
+  case RISCV::BI__builtin_riscv_paadd_i16x4:
+  case RISCV::BI__builtin_riscv_paadd_i32x2:
+  case RISCV::BI__builtin_riscv_paaddu_u8x4:
+  case RISCV::BI__builtin_riscv_paaddu_u16x2:
+  case RISCV::BI__builtin_riscv_paaddu_u8x8:
+  case RISCV::BI__builtin_riscv_paaddu_u16x4:
+  case RISCV::BI__builtin_riscv_paaddu_u32x2:
+  case RISCV::BI__builtin_riscv_pasub_i8x4:
+  case RISCV::BI__builtin_riscv_pasub_i16x2:
+  case RISCV::BI__builtin_riscv_pasub_i8x8:
+  case RISCV::BI__builtin_riscv_pasub_i16x4:
+  case RISCV::BI__builtin_riscv_pasub_i32x2:
+  case RISCV::BI__builtin_riscv_pasubu_u8x4:
+  case RISCV::BI__builtin_riscv_pasubu_u16x2:
+  case RISCV::BI__builtin_riscv_pasubu_u8x8:
+  case RISCV::BI__builtin_riscv_pasubu_u16x4:
+  case RISCV::BI__builtin_riscv_pasubu_u32x2: {
+    switch (BuiltinID) {
+    default:
+      llvm_unreachable("unexpected builtin ID");
+    case RISCV::BI__builtin_riscv_paadd_i8x4:
+    case RISCV::BI__builtin_riscv_paadd_i16x2:
+    case RISCV::BI__builtin_riscv_paadd_i8x8:
+    case RISCV::BI__builtin_riscv_paadd_i16x4:
+    case RISCV::BI__builtin_riscv_paadd_i32x2:
+      ID = Intrinsic::riscv_paadd;
+      break;
+    case RISCV::BI__builtin_riscv_paaddu_u8x4:
+    case RISCV::BI__builtin_riscv_paaddu_u16x2:
+    case RISCV::BI__builtin_riscv_paaddu_u8x8:
+    case RISCV::BI__builtin_riscv_paaddu_u16x4:
+    case RISCV::BI__builtin_riscv_paaddu_u32x2:
+      ID = Intrinsic::riscv_paaddu;
+      break;
+    case RISCV::BI__builtin_riscv_pasub_i8x4:
+    case RISCV::BI__builtin_riscv_pasub_i16x2:
+    case RISCV::BI__builtin_riscv_pasub_i8x8:
+    case RISCV::BI__builtin_riscv_pasub_i16x4:
+    case RISCV::BI__builtin_riscv_pasub_i32x2:
+      ID = Intrinsic::riscv_pasub;
+      break;
+    case RISCV::BI__builtin_riscv_pasubu_u8x4:
+    case RISCV::BI__builtin_riscv_pasubu_u16x2:
+    case RISCV::BI__builtin_riscv_pasubu_u8x8:
+    case RISCV::BI__builtin_riscv_pasubu_u16x4:
+    case RISCV::BI__builtin_riscv_pasubu_u32x2:
+      ID = Intrinsic::riscv_pasubu;
+      break;
+    }
+
+    IntrinsicTypes = {ResultType};
+    break;
+  }
+
   // Zk builtins
 
   // Zknh

diff  --git a/clang/lib/Headers/riscv_packed_simd.h 
b/clang/lib/Headers/riscv_packed_simd.h
index 39f2f4cd2c704..7e981c91ec3e1 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -351,6 +351,30 @@ __packed_unary_op(pnot_u16x4, uint16x4_t, ~)
 __packed_unary_op(pnot_i32x2, int32x2_t, ~)
 __packed_unary_op(pnot_u32x2, uint32x2_t, ~)
 
+/* Packed Averaging Addition and Subtraction (32-bit) */
+__packed_binary_builtin(paadd_i8x4, int8x4_t, __builtin_riscv_paadd_i8x4)
+__packed_binary_builtin(paadd_i16x2, int16x2_t, __builtin_riscv_paadd_i16x2)
+__packed_binary_builtin(paaddu_u8x4, uint8x4_t, __builtin_riscv_paaddu_u8x4)
+__packed_binary_builtin(paaddu_u16x2, uint16x2_t, __builtin_riscv_paaddu_u16x2)
+__packed_binary_builtin(pasub_i8x4, int8x4_t, __builtin_riscv_pasub_i8x4)
+__packed_binary_builtin(pasub_i16x2, int16x2_t, __builtin_riscv_pasub_i16x2)
+__packed_binary_builtin(pasubu_u8x4, uint8x4_t, __builtin_riscv_pasubu_u8x4)
+__packed_binary_builtin(pasubu_u16x2, uint16x2_t, __builtin_riscv_pasubu_u16x2)
+
+/* Packed Averaging Addition and Subtraction (64-bit) */
+__packed_binary_builtin(paadd_i8x8, int8x8_t, __builtin_riscv_paadd_i8x8)
+__packed_binary_builtin(paadd_i16x4, int16x4_t, __builtin_riscv_paadd_i16x4)
+__packed_binary_builtin(paadd_i32x2, int32x2_t, __builtin_riscv_paadd_i32x2)
+__packed_binary_builtin(paaddu_u8x8, uint8x8_t, __builtin_riscv_paaddu_u8x8)
+__packed_binary_builtin(paaddu_u16x4, uint16x4_t, __builtin_riscv_paaddu_u16x4)
+__packed_binary_builtin(paaddu_u32x2, uint32x2_t, __builtin_riscv_paaddu_u32x2)
+__packed_binary_builtin(pasub_i8x8, int8x8_t, __builtin_riscv_pasub_i8x8)
+__packed_binary_builtin(pasub_i16x4, int16x4_t, __builtin_riscv_pasub_i16x4)
+__packed_binary_builtin(pasub_i32x2, int32x2_t, __builtin_riscv_pasub_i32x2)
+__packed_binary_builtin(pasubu_u8x8, uint8x8_t, __builtin_riscv_pasubu_u8x8)
+__packed_binary_builtin(pasubu_u16x4, uint16x4_t, __builtin_riscv_pasubu_u16x4)
+__packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2)
+
 // clang-format on
 
 #undef __packed_splat2

diff  --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c 
b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 71fb5eb1f0e25..363bfa5f5d995 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -4791,3 +4791,447 @@ int32x2_t test_pnot_i32x2(int32x2_t a) {
 uint32x2_t test_pnot_u32x2(uint32x2_t a) {
   return __riscv_pnot_u32x2(a);
 }
+
+// RV32-LABEL: define dso_local i32 @test_paadd_i8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paadd.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_paadd_i8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paadd.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+int8x4_t test_paadd_i8x4(int8x4_t rs1, int8x4_t rs2) {
+  return __riscv_paadd_i8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_paadd_i16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paadd.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_paadd_i16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paadd.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+int16x2_t test_paadd_i16x2(int16x2_t rs1, int16x2_t rs2) {
+  return __riscv_paadd_i16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_paaddu_u8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paaddu.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_paaddu_u8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.paaddu.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint8x4_t test_paaddu_u8x4(uint8x4_t rs1, uint8x4_t rs2) {
+  return __riscv_paaddu_u8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_paaddu_u16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paaddu.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_paaddu_u16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paaddu.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint16x2_t test_paaddu_u16x2(uint16x2_t rs1, uint16x2_t rs2) {
+  return __riscv_paaddu_u16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pasub_i8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasub.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pasub_i8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasub.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+int8x4_t test_pasub_i8x4(int8x4_t rs1, int8x4_t rs2) {
+  return __riscv_pasub_i8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pasub_i16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasub.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pasub_i16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasub.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+int16x2_t test_pasub_i16x2(int16x2_t rs1, int16x2_t rs2) {
+  return __riscv_pasub_i16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pasubu_u8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasubu.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pasubu_u8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pasubu.v4i8(<4 x i8> 
[[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint8x4_t test_pasubu_u8x4(uint8x4_t rs1, uint8x4_t rs2) {
+  return __riscv_pasubu_u8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pasubu_u16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT:    ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pasubu_u16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x 
i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT:    ret i32 [[TMP3]]
+//
+uint16x2_t test_pasubu_u16x2(uint16x2_t rs1, uint16x2_t rs2) {
+  return __riscv_pasubu_u16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_paadd_i8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paadd.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_paadd_i8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paadd.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+int8x8_t test_paadd_i8x8(int8x8_t rs1, int8x8_t rs2) {
+  return __riscv_paadd_i8x8(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_paadd_i16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paadd.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_paadd_i16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paadd.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+int16x4_t test_paadd_i16x4(int16x4_t rs1, int16x4_t rs2) {
+  return __riscv_paadd_i16x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_paadd_i32x2(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paadd.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_paadd_i32x2(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paadd.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+int32x2_t test_paadd_i32x2(int32x2_t rs1, int32x2_t rs2) {
+  return __riscv_paadd_i32x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_paaddu_u8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paaddu.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_paaddu_u8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.paaddu.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint8x8_t test_paaddu_u8x8(uint8x8_t rs1, uint8x8_t rs2) {
+  return __riscv_paaddu_u8x8(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_paaddu_u16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paaddu.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_paaddu_u16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paaddu.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint16x4_t test_paaddu_u16x4(uint16x4_t rs1, uint16x4_t rs2) {
+  return __riscv_paaddu_u16x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_paaddu_u32x2(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paaddu.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_paaddu_u32x2(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paaddu.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint32x2_t test_paaddu_u32x2(uint32x2_t rs1, uint32x2_t rs2) {
+  return __riscv_paaddu_u32x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pasub_i8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasub.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pasub_i8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasub.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+int8x8_t test_pasub_i8x8(int8x8_t rs1, int8x8_t rs2) {
+  return __riscv_pasub_i8x8(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pasub_i16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasub.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pasub_i16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasub.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+int16x4_t test_pasub_i16x4(int16x4_t rs1, int16x4_t rs2) {
+  return __riscv_pasub_i16x4(rs1, rs2);
+}
+
+
+// RV32-LABEL: define dso_local i64 @test_pasub_i32x2(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasub.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pasub_i32x2(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasub.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+int32x2_t test_pasub_i32x2(int32x2_t rs1, int32x2_t rs2) {
+  return __riscv_pasub_i32x2(rs1, rs2);
+}
+
+
+// RV32-LABEL: define dso_local i64 @test_pasubu_u8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasubu.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pasubu_u8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pasubu.v8i8(<8 x i8> 
[[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint8x8_t test_pasubu_u8x8(uint8x8_t rs1, uint8x8_t rs2) {
+  return __riscv_pasubu_u8x8(rs1, rs2);
+}
+
+
+// RV32-LABEL: define dso_local i64 @test_pasubu_u16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasubu.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pasubu_u16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasubu.v4i16(<4 x 
i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint16x4_t test_pasubu_u16x4(uint16x4_t rs1, uint16x4_t rs2) {
+  return __riscv_pasubu_u16x4(rs1, rs2);
+}
+
+
+// RV32-LABEL: define dso_local i64 @test_pasubu_u32x2(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV32-NEXT:  [[ENTRY:.*:]]
+// RV32-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV32-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasubu.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV32-NEXT:    ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pasubu_u32x2(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) 
#[[ATTR0]] {
+// RV64-NEXT:  [[ENTRY:.*:]]
+// RV64-NEXT:    [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <2 x i32>
+// RV64-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasubu.v2i32(<2 x 
i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV64-NEXT:    ret i64 [[TMP3]]
+//
+uint32x2_t test_pasubu_u32x2(uint32x2_t rs1, uint32x2_t rs2) {
+  return __riscv_pasubu_u32x2(rs1, rs2);
+}

diff  --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c 
b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index 971795c877198..edbc56ce0e199 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -1640,3 +1640,135 @@ uint32x2_t test_pmsle_u32x2(int32x2_t a, int32x2_t b) {
 uint32x2_t test_pmsleu_u32x2(uint32x2_t a, uint32x2_t b) {
   return __riscv_pmsleu_u32x2(a, b);
 }
+
+// CHECK-LABEL: test_paadd_i8x4:
+// CHECK:       paadd.b
+int8x4_t test_paadd_i8x4(int8x4_t a, int8x4_t b) {
+  return __riscv_paadd_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_paadd_i16x2:
+// CHECK:       paadd.h
+int16x2_t test_paadd_i16x2(int16x2_t a, int16x2_t b) {
+  return __riscv_paadd_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_paaddu_u8x4:
+// CHECK:       paaddu.b
+uint8x4_t test_paaddu_u8x4(uint8x4_t a, uint8x4_t b) {
+  return __riscv_paaddu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_paaddu_u16x2:
+// CHECK:       paaddu.h
+uint16x2_t test_paaddu_u16x2(uint16x2_t a, uint16x2_t b) {
+  return __riscv_paaddu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pasub_i8x4:
+// CHECK:       pasub.b
+int8x4_t test_pasub_i8x4(int8x4_t a, int8x4_t b) {
+  return __riscv_pasub_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pasub_i16x2:
+// CHECK:       pasub.h
+int16x2_t test_pasub_i16x2(int16x2_t a, int16x2_t b) {
+  return __riscv_pasub_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pasubu_u8x4:
+// CHECK:       pasubu.b
+uint8x4_t test_pasubu_u8x4(uint8x4_t a, uint8x4_t b) {
+  return __riscv_pasubu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pasubu_u16x2:
+// CHECK:       pasubu.h
+uint16x2_t test_pasubu_u16x2(uint16x2_t a, uint16x2_t b) {
+  return __riscv_pasubu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_paadd_i8x8:
+// RV32:        paadd.db
+// RV64:        paadd.b
+int8x8_t test_paadd_i8x8(int8x8_t a, int8x8_t b) {
+  return __riscv_paadd_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_paadd_i16x4:
+// RV32:        paadd.dh
+// RV64:        paadd.h
+int16x4_t test_paadd_i16x4(int16x4_t a, int16x4_t b) {
+  return __riscv_paadd_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_paadd_i32x2:
+// RV32:        paadd.dw
+// RV64:        paadd.w
+int32x2_t test_paadd_i32x2(int32x2_t a, int32x2_t b) {
+  return __riscv_paadd_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_paaddu_u8x8:
+// RV32:        paaddu.db
+// RV64:        paaddu.b
+uint8x8_t test_paaddu_u8x8(uint8x8_t a, uint8x8_t b) {
+  return __riscv_paaddu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_paaddu_u16x4:
+// RV32:        paaddu.dh
+// RV64:        paaddu.h
+uint16x4_t test_paaddu_u16x4(uint16x4_t a, uint16x4_t b) {
+  return __riscv_paaddu_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_paaddu_u32x2:
+// RV32:        paaddu.dw
+// RV64:        paaddu.w
+uint32x2_t test_paaddu_u32x2(uint32x2_t a, uint32x2_t b) {
+  return __riscv_paaddu_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pasub_i8x8:
+// RV32:        pasub.db
+// RV64:        pasub.b
+int8x8_t test_pasub_i8x8(int8x8_t a, int8x8_t b) {
+  return __riscv_pasub_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pasub_i16x4:
+// RV32:        pasub.dh
+// RV64:        pasub.h
+int16x4_t test_pasub_i16x4(int16x4_t a, int16x4_t b) {
+  return __riscv_pasub_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pasub_i32x2:
+// RV32:        pasub.dw
+// RV64:        pasub.w
+int32x2_t test_pasub_i32x2(int32x2_t a, int32x2_t b) {
+  return __riscv_pasub_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_pasubu_u8x8:
+// RV32:        pasubu.db
+// RV64:        pasubu.b
+uint8x8_t test_pasubu_u8x8(uint8x8_t a, uint8x8_t b) {
+  return __riscv_pasubu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pasubu_u16x4:
+// RV32:        pasubu.dh
+// RV64:        pasubu.h
+uint16x4_t test_pasubu_u16x4(uint16x4_t a, uint16x4_t b) {
+  return __riscv_pasubu_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_pasubu_u32x2:
+// RV32:        pasubu.dw
+// RV64:        pasubu.w
+uint32x2_t test_pasubu_u32x2(uint32x2_t a, uint32x2_t b) {
+  return __riscv_pasubu_u32x2(a, b);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td 
b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index f53f752c25c30..b2add44b19a5e 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2043,6 +2043,20 @@ let TargetPrefix = "riscv" in {
 let TargetPrefix = "riscv" in
 def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, 
IntrHasSideEffects]>;
 
+// Packed SIMD extensions
+//===----------------------------------------------------------------------===//
+let TargetPrefix = "riscv" in {
+// Packed Averaging Addition and Subtraction.
+class RVPBinaryIntrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                            [LLVMMatchType<0>, LLVMMatchType<0>],
+                            [IntrNoMem, IntrSpeculatable]>;
+
+  def int_riscv_paadd : RVPBinaryIntrinsic;
+  def int_riscv_paaddu : RVPBinaryIntrinsic;
+  def int_riscv_pasub : RVPBinaryIntrinsic;
+  def int_riscv_pasubu : RVPBinaryIntrinsic;
+} // TargetPrefix = "riscv"
 
 
//===----------------------------------------------------------------------===//
 // Zvfofp8min - OFP8 conversion extension

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp 
b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 753901d71baca..57af056c2d3c0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -556,6 +556,8 @@ RISCVTargetLowering::RISCVTargetLowering(const 
TargetMachine &TM,
       // widen for those operations that will be unrolled.
       setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA},
                          {MVT::v2i16, MVT::v4i8}, Custom);
+      setOperationAction(ISD::INTRINSIC_WO_CHAIN, {MVT::v2i16, MVT::v4i8},
+                         Custom);
     } else {
       VTs = P32VecVTs;
     }
@@ -633,6 +635,7 @@ RISCVTargetLowering::RISCVTargetLowering(const 
TargetMachine &TM,
       setOperationAction(
           {ISD::UADDSAT, ISD::SADDSAT, ISD::USUBSAT, ISD::SSUBSAT}, P64VecVTs,
           Legal);
+      setOperationAction(ISD::INTRINSIC_WO_CHAIN, P64VecVTs, Legal);
       setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, P64VecVTs, Legal);
       setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
                          P64VecVTs, Legal);
@@ -15637,6 +15640,26 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
       return;
     }
+    case Intrinsic::riscv_paadd:
+    case Intrinsic::riscv_paaddu:
+    case Intrinsic::riscv_pasub:
+    case Intrinsic::riscv_pasubu: {
+      EVT VT = N->getValueType(0);
+      if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16))
+        return;
+
+      EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16;
+      SDValue Undef = DAG.getUNDEF(VT);
+      SDValue Op0 =
+          DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(1), 
Undef);
+      SDValue Op1 =
+          DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(2), 
Undef);
+      SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WideVT,
+                                N->getOperand(0), Op0, Op1);
+      Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+                                    DAG.getVectorIdxConstant(0, DL)));
+      return;
+    }
     case Intrinsic::riscv_orc_b:
     case Intrinsic::riscv_brev8:
     case Intrinsic::riscv_sha256sig0:

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 79ff90545b68a..fe1e459967e92 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1990,12 +1990,22 @@ let Predicates = [HasStdExtP] in {
   def : PatGprGpr<riscv_asub, PASUB_B, XLenVecI8VT>;
   def : PatGprGpr<riscv_asubu, PASUBU_B, XLenVecI8VT>;
 
+  def : PatGprGpr<int_riscv_paadd, PAADD_B, XLenVecI8VT>;
+  def : PatGprGpr<int_riscv_paaddu, PAADDU_B, XLenVecI8VT>;
+  def : PatGprGpr<int_riscv_pasub, PASUB_B, XLenVecI8VT>;
+  def : PatGprGpr<int_riscv_pasubu, PASUBU_B, XLenVecI8VT>;
+
   // 16-bit averaging patterns
   def : PatGprGpr<avgfloors, PAADD_H, XLenVecI16VT>;
   def : PatGprGpr<avgflooru, PAADDU_H, XLenVecI16VT>;
   def : PatGprGpr<riscv_asub, PASUB_H, XLenVecI16VT>;
   def : PatGprGpr<riscv_asubu, PASUBU_H, XLenVecI16VT>;
 
+  def : PatGprGpr<int_riscv_paadd, PAADD_H, XLenVecI16VT>;
+  def : PatGprGpr<int_riscv_paaddu, PAADDU_H, XLenVecI16VT>;
+  def : PatGprGpr<int_riscv_pasub, PASUB_H, XLenVecI16VT>;
+  def : PatGprGpr<int_riscv_pasubu, PASUBU_H, XLenVecI16VT>;
+
   // 8-bit absolute 
diff erence patterns
   def : Pat<(XLenVecI8VT (abs GPR:$rs1)), (PABD_B GPR:$rs1, (XLenVecI8VT X0))>;
   def : PatGprGpr<abds, PABD_B, XLenVecI8VT>;
@@ -2270,6 +2280,19 @@ let append Predicates = [IsRV32] in {
   def : PatGprPairGprPair<riscv_asub, PASUB_DW, v2i32>;
   def : PatGprPairGprPair<riscv_asubu, PASUBU_DW, v2i32>;
 
+  def : PatGprPairGprPair<int_riscv_paadd, PAADD_DB, v8i8>;
+  def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DB, v8i8>;
+  def : PatGprPairGprPair<int_riscv_pasub, PASUB_DB, v8i8>;
+  def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DB, v8i8>;
+  def : PatGprPairGprPair<int_riscv_paadd, PAADD_DH, v4i16>;
+  def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DH, v4i16>;
+  def : PatGprPairGprPair<int_riscv_pasub, PASUB_DH, v4i16>;
+  def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DH, v4i16>;
+  def : PatGprPairGprPair<int_riscv_paadd, PAADD_DW, v2i32>;
+  def : PatGprPairGprPair<int_riscv_paaddu, PAADDU_DW, v2i32>;
+  def : PatGprPairGprPair<int_riscv_pasub, PASUB_DW, v2i32>;
+  def : PatGprPairGprPair<int_riscv_pasubu, PASUBU_DW, v2i32>;
+
   // 8-bit absolute 
diff erence patterns
   def : Pat<(v8i8 (abs GPRPair:$rs1)), (PABD_DB GPRPair:$rs1, (v8i8 X0_Pair))>;
   def : PatGprPairGprPair<abds, PABD_DB, v8i8>;
@@ -2478,10 +2501,14 @@ let append Predicates = [IsRV64] in {
   // 32-bit averaging patterns
   def : PatGprGpr<avgfloors, PAADD_W, v2i32>;
   def : PatGprGpr<avgflooru, PAADDU_W, v2i32>;
+  def : PatGprGpr<int_riscv_paadd, PAADD_W, v2i32>;
+  def : PatGprGpr<int_riscv_paaddu, PAADDU_W, v2i32>;
 
   // 32-bit averaging-sub patterns
   def : PatGprGpr<riscv_asub, PASUB_W, v2i32>;
   def : PatGprGpr<riscv_asubu, PASUBU_W, v2i32>;
+  def : PatGprGpr<int_riscv_pasub, PASUB_W, v2i32>;
+  def : PatGprGpr<int_riscv_pasubu, PASUBU_W, v2i32>;
 
   // 32-bit multiply high patterns
   def : PatGprGpr<mulhs, PMULH_W, v2i32>;

diff  --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll 
b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
index 9292450d51b19..cddea9ae32d25 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
@@ -2686,3 +2686,75 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) {
   %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
   ret <2 x i16> %res
 }
+
+define <4 x i8> @test_paadd_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_paadd_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    paadd.b a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <4 x i8> @llvm.riscv.paadd.v4i8(<4 x i8> %a, <4 x i8> %b)
+  ret <4 x i8> %res
+}
+
+define <4 x i8> @test_paaddu_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_paaddu_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    paaddu.b a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <4 x i8> @llvm.riscv.paaddu.v4i8(<4 x i8> %a, <4 x i8> %b)
+  ret <4 x i8> %res
+}
+
+define <4 x i8> @test_pasub_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_pasub_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pasub.b a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <4 x i8> @llvm.riscv.pasub.v4i8(<4 x i8> %a, <4 x i8> %b)
+  ret <4 x i8> %res
+}
+
+define <4 x i8> @test_pasubu_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_pasubu_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pasubu.b a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <4 x i8> @llvm.riscv.pasubu.v4i8(<4 x i8> %a, <4 x i8> %b)
+  ret <4 x i8> %res
+}
+
+define <2 x i16> @test_paadd_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_paadd_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    paadd.h a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <2 x i16> @llvm.riscv.paadd.v2i16(<2 x i16> %a, <2 x i16> %b)
+  ret <2 x i16> %res
+}
+
+define <2 x i16> @test_paaddu_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_paaddu_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    paaddu.h a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <2 x i16> @llvm.riscv.paaddu.v2i16(<2 x i16> %a, <2 x i16> %b)
+  ret <2 x i16> %res
+}
+
+define <2 x i16> @test_pasub_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_pasub_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pasub.h a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <2 x i16> @llvm.riscv.pasub.v2i16(<2 x i16> %a, <2 x i16> %b)
+  ret <2 x i16> %res
+}
+
+define <2 x i16> @test_pasubu_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_pasubu_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pasubu.h a0, a0, a1
+; CHECK-NEXT:    ret
+  %res = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x i16> %a, <2 x i16> %b)
+  ret <2 x i16> %res
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll 
b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
index 79ae7f43a45d9..2fcbc7bfec7f2 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
@@ -4720,3 +4720,171 @@ define <2 x i32> @test_sext_v2i16_to_v2i32(<2 x i16> 
%a) {
   %res = sext <2 x i16> %a to <2 x i32>
   ret <2 x i32> %res
 }
+
+define <8 x i8> @test_paadd_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_paadd_v8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    paadd.db a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_paadd_v8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    paadd.b a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <8 x i8> @llvm.riscv.paadd.v8i8(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %res
+}
+
+define <8 x i8> @test_paaddu_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_paaddu_v8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    paaddu.db a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_paaddu_v8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    paaddu.b a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <8 x i8> @llvm.riscv.paaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %res
+}
+
+define <8 x i8> @test_pasub_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_pasub_v8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pasub.db a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pasub_v8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pasub.b a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <8 x i8> @llvm.riscv.pasub.v8i8(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %res
+}
+
+define <8 x i8> @test_pasubu_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_pasubu_v8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pasubu.db a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pasubu_v8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pasubu.b a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <8 x i8> @llvm.riscv.pasubu.v8i8(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %res
+}
+
+define <4 x i16> @test_paadd_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_paadd_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    paadd.dh a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_paadd_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    paadd.h a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <4 x i16> @llvm.riscv.paadd.v4i16(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i16> %res
+}
+
+define <4 x i16> @test_paaddu_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_paaddu_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    paaddu.dh a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_paaddu_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    paaddu.h a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <4 x i16> @llvm.riscv.paaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i16> %res
+}
+
+define <4 x i16> @test_pasub_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_pasub_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pasub.dh a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pasub_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pasub.h a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <4 x i16> @llvm.riscv.pasub.v4i16(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i16> %res
+}
+
+define <4 x i16> @test_pasubu_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_pasubu_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pasubu.dh a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pasubu_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pasubu.h a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <4 x i16> @llvm.riscv.pasubu.v4i16(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i16> %res
+}
+
+define <2 x i32> @test_paadd_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; RV32-LABEL: test_paadd_v2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    paadd.dw a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_paadd_v2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    paadd.w a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <2 x i32> @llvm.riscv.paadd.v2i32(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test_paaddu_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; RV32-LABEL: test_paaddu_v2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    paaddu.dw a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_paaddu_v2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    paaddu.w a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <2 x i32> @llvm.riscv.paaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test_pasub_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; RV32-LABEL: test_pasub_v2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pasub.dw a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pasub_v2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pasub.w a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <2 x i32> @llvm.riscv.pasub.v2i32(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test_pasubu_v2i32(<2 x i32> %a, <2 x i32> %b) {
+; RV32-LABEL: test_pasubu_v2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    pasubu.dw a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_pasubu_v2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    pasubu.w a0, a0, a1
+; RV64-NEXT:    ret
+  %res = call <2 x i32> @llvm.riscv.pasubu.v2i32(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i32> %res
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to