[clang] adcad45 - [CIR] Vector saturating rounding shift right and narrow intrinsics (#198947)

via cfe-commits Tue, 26 May 2026 03:05:50 -0700

Author: Kartik Ohlan
Date: 2026-05-26T11:05:37+01:00
New Revision: adcad45cbdf804c18ba691d879df98a9e3e22fc2


URL: 
https://github.com/llvm/llvm-project/commit/adcad45cbdf804c18ba691d879df98a9e3e22fc2
DIFF: 
https://github.com/llvm/llvm-project/commit/adcad45cbdf804c18ba691d879df98a9e3e22fc2.diff

LOG: [CIR] Vector saturating rounding shift right and narrow intrinsics  
(#198947)

This PR ignores all SISD variants that we had in #198216 

Part of https://github.com/llvm/llvm-project/issues/185382

Move the test cases to
[intrinsics.c](https://github.com/llvm/llvmproject/pull/clang/test/CodeGen/AArch64/neon/intrinsics.c)
Removed the test cases from
[neon-intrinsics.c](https://github.com/llvm/llvmproject/pull/clang/test/CodeGen/AArch64/neon/intrinsics.c)


variants that are skipped/ not covered in this PR  (SISD)

```

1.  vqrshrunh_n_s16 (uint8_t, _h scalar)
2.  vqrshruns_n_s32 (uint16_t, _s scalar)
3.  vqrshrnh_n_s16 (int8_t, _h scalar)
4.  vqrshrns_n_s32 (int16_t, _s scalar)
5.  vqrshrnh_n_u16 (uint8_t, _h scalar)
6.  vqrshrns_n_u32 (uint16_t, _s scalar)


```

Added: 
    

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
    clang/test/CodeGen/AArch64/neon-intrinsics.c
    clang/test/CodeGen/AArch64/neon/intrinsics.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 8a3465b6d879b..ca5a4680ee79f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -369,6 +369,9 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
   case NEON::BI__builtin_neon_vmaxv_f32:
   case NEON::BI__builtin_neon_vmaxvq_f32:
   case NEON::BI__builtin_neon_vmaxvq_f64:
+  case NEON::BI__builtin_neon_vqrshrund_n_s64:
+  case NEON::BI__builtin_neon_vqrshrnd_n_s64:
+  case NEON::BI__builtin_neon_vqrshrnd_n_u64:
     return emitNeonCall(cgf.cgm, cgf.getBuilder(),
                         {cgf.convertType(expr->getArg(0)->getType())}, ops,
                         llvmIntrName, cgf.convertType(expr->getType()), loc);
@@ -2674,10 +2677,34 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vrecpsd_f64:
   case NEON::BI__builtin_neon_vrecpsh_f16:
   case NEON::BI__builtin_neon_vqshrun_n_v:
-  case NEON::BI__builtin_neon_vqrshrun_n_v:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
+  case NEON::BI__builtin_neon_vqrshrun_n_v: {
+    cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
+        ty, /*isExtended=*/true, /*isSigned=*/true);
+    return emitNeonCall(cgm, builder, {argTy, sInt32Ty}, ops,
+                        "aarch64.neon.sqrshrun", ty, loc);
+  }
   case NEON::BI__builtin_neon_vqshrn_n_v:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vrshrn_n_v:
-  case NEON::BI__builtin_neon_vqrshrn_n_v:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
+  case NEON::BI__builtin_neon_vqrshrn_n_v: {
+    cir::VectorType argTy = builder.getExtendedOrTruncatedElementVectorType(
+        ty, /*isExtended=*/true, /*isSigned=*/!usgn);
+    llvm::StringRef intrName =
+        usgn ? "aarch64.neon.uqrshrn" : "aarch64.neon.sqrshrn";
+    return emitNeonCall(cgm, builder, {argTy, sInt32Ty}, ops, intrName, ty,
+                        loc);
+  }
   case NEON::BI__builtin_neon_vrndah_f16:
   case NEON::BI__builtin_neon_vrnda_v:
   case NEON::BI__builtin_neon_vrndaq_v:

diff  --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c 
b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 480d77723a7c2..35b886c53a5ec 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -5996,81 +5996,6 @@ uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, 
uint64x2_t b) {
   return vrshrn_high_n_u64(a, b, 19);
 }
 
-// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshrun_n_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VQRSHRUN_N1:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
-// CHECK-NEXT:    ret <8 x i8> [[VQRSHRUN_N1]]
-//
-uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
-  return vqrshrun_n_s16(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshrun_n_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VQRSHRUN_N1:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
-// CHECK-NEXT:    ret <4 x i16> [[VQRSHRUN_N1]]
-//
-uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
-  return vqrshrun_n_s32(a, 9);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshrun_n_s64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VQRSHRUN_N1:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
-// CHECK-NEXT:    ret <2 x i32> [[VQRSHRUN_N1]]
-//
-uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
-  return vqrshrun_n_s64(a, 19);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshrun_high_n_s16(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VQRSHRUN_N3:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[VQRSHRUN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK-NEXT:    ret <16 x i8> [[SHUFFLE_I]]
-//
-uint8x16_t test_vqrshrun_high_n_s16(uint8x8_t a, int16x8_t b) {
-  return vqrshrun_high_n_s16(a, b, 3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshrun_high_n_s32(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VQRSHRUN_N3:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> 
[[VQRSHRUN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
-// CHECK-NEXT:    ret <8 x i16> [[SHUFFLE_I]]
-//
-uint16x8_t test_vqrshrun_high_n_s32(uint16x4_t a, int32x4_t b) {
-  return vqrshrun_high_n_s32(a, b, 9);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshrun_high_n_s64(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VQRSHRUN_N3:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> 
[[VQRSHRUN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK-NEXT:    ret <4 x i32> [[SHUFFLE_I]]
-//
-uint32x4_t test_vqrshrun_high_n_s64(uint32x2_t a, int64x2_t b) {
-  return vqrshrun_high_n_s64(a, b, 19);
-}
-
 // CHECK-LABEL: define dso_local <8 x i8> @test_vqshrn_n_s16(
 // CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -6221,156 +6146,6 @@ uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, 
uint64x2_t b) {
   return vqshrn_high_n_u64(a, b, 19);
 }
 
-// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshrn_n_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VQRSHRN_N1:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK-NEXT:    ret <8 x i8> [[VQRSHRN_N1]]
-//
-int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
-  return vqrshrn_n_s16(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshrn_n_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VQRSHRN_N1:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK-NEXT:    ret <4 x i16> [[VQRSHRN_N1]]
-//
-int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
-  return vqrshrn_n_s32(a, 9);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshrn_n_s64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VQRSHRN_N1:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK-NEXT:    ret <2 x i32> [[VQRSHRN_N1]]
-//
-int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
-  return vqrshrn_n_s64(a, 19);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vqrshrn_n_u16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VQRSHRN_N1:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK-NEXT:    ret <8 x i8> [[VQRSHRN_N1]]
-//
-uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
-  return vqrshrn_n_u16(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vqrshrn_n_u32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VQRSHRN_N1:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK-NEXT:    ret <4 x i16> [[VQRSHRN_N1]]
-//
-uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
-  return vqrshrn_n_u32(a, 9);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vqrshrn_n_u64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VQRSHRN_N1:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK-NEXT:    ret <2 x i32> [[VQRSHRN_N1]]
-//
-uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
-  return vqrshrn_n_u64(a, 19);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshrn_high_n_s16(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VQRSHRN_N3:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[VQRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK-NEXT:    ret <16 x i8> [[SHUFFLE_I]]
-//
-int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
-  return vqrshrn_high_n_s16(a, b, 3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshrn_high_n_s32(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VQRSHRN_N3:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> 
[[VQRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7>
-// CHECK-NEXT:    ret <8 x i16> [[SHUFFLE_I]]
-//
-int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
-  return vqrshrn_high_n_s32(a, b, 9);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshrn_high_n_s64(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VQRSHRN_N3:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> 
[[VQRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK-NEXT:    ret <4 x i32> [[SHUFFLE_I]]
-//
-int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
-  return vqrshrn_high_n_s64(a, b, 19);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vqrshrn_high_n_u16(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VQRSHRN_N3:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[VQRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-// CHECK-NEXT:    ret <16 x i8> [[SHUFFLE_I]]
-//
-uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
-  return vqrshrn_high_n_u16(a, b, 3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vqrshrn_high_n_u32(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VQRSHRN_N3:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> 
[[VQRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7>
-// CHECK-NEXT:    ret <8 x i16> [[SHUFFLE_I]]
-//
-uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
-  return vqrshrn_high_n_u32(a, b, 9);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vqrshrn_high_n_u64(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VQRSHRN_N3:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
-// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> 
[[VQRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK-NEXT:    ret <4 x i32> [[SHUFFLE_I]]
-//
-uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
-  return vqrshrn_high_n_u64(a, b, 19);
-}
-
 // CHECK-LABEL: define dso_local <8 x i16> @test_vshll_n_s8(
 // CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -15407,16 +15182,6 @@ int16_t test_vqrshrns_n_s32(int32_t a) {
   return (int16_t)vqrshrns_n_s32(a, 16);
 }
 
-// CHECK-LABEL: define dso_local i32 @test_vqrshrnd_n_s64(
-// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VQRSHRND_N_S64:%.*]] = call i32 
@llvm.aarch64.neon.sqrshrn.i32(i64 [[A]], i32 32)
-// CHECK-NEXT:    ret i32 [[VQRSHRND_N_S64]]
-//
-int32_t test_vqrshrnd_n_s64(int64_t a) {
-  return (int32_t)vqrshrnd_n_s64(a, 32);
-}
-
 // CHECK-LABEL: define dso_local i8 @test_vqrshrnh_n_u16(
 // CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -15441,16 +15206,6 @@ uint16_t test_vqrshrns_n_u32(uint32_t a) {
   return (uint16_t)vqrshrns_n_u32(a, 16);
 }
 
-// CHECK-LABEL: define dso_local i32 @test_vqrshrnd_n_u64(
-// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VQRSHRND_N_U64:%.*]] = call i32 
@llvm.aarch64.neon.uqrshrn.i32(i64 [[A]], i32 32)
-// CHECK-NEXT:    ret i32 [[VQRSHRND_N_U64]]
-//
-uint32_t test_vqrshrnd_n_u64(uint64_t a) {
-  return (uint32_t)vqrshrnd_n_u64(a, 32);
-}
-
 // CHECK-LABEL: define dso_local i8 @test_vqshrunh_n_s16(
 // CHECK-SAME: i16 noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -15509,16 +15264,6 @@ uint16_t test_vqrshruns_n_s32(int32_t a) {
   return (uint16_t)vqrshruns_n_s32(a, 16);
 }
 
-// CHECK-LABEL: define dso_local i32 @test_vqrshrund_n_s64(
-// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VQRSHRUND_N_S64:%.*]] = call i32 
@llvm.aarch64.neon.sqrshrun.i32(i64 [[A]], i32 32)
-// CHECK-NEXT:    ret i32 [[VQRSHRUND_N_S64]]
-//
-uint32_t test_vqrshrund_n_s64(int64_t a) {
-  return (uint32_t)vqrshrund_n_s64(a, 32);
-}
-
 // CHECK-LABEL: define dso_local <8 x i8> @test_vreinterpret_s8_s16(
 // CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]

diff  --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 58f37e76a95eb..b07f61a712f90 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -24,6 +24,266 @@
 
 #include <arm_neon.h>
 
+//===------------------------------------------------------===//
+// 2.1.3.2.7  Vector saturating rounding shift right and narrow
+// TODO: Implement SISD variants
+//===------------------------------------------------------===//
+
+// ALL-LABEL: @test_vqrshrun_n_s16(
+uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
+  // CIR:   cir.call_llvm_intrinsic "aarch64.neon.sqrshrun" 
+
+  // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]])
+  // LLVM:      {{.*}} = bitcast <8 x i16> [[A]] to <16 x i8>
+  // LLVM:      [[TMP:%.*]] = bitcast <16 x i8> {{.*}} to <8 x i16>
+  // LLVM:      [[VQRSHRUN_N1:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP]], i32 3)
+  // LLVM:      ret <8 x i8> [[VQRSHRUN_N1]]
+  return vqrshrun_n_s16(a, 3);
+}
+
+// ALL-LABEL: @test_vqrshrun_n_s32(
+uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
+  // CIR:   cir.call_llvm_intrinsic "aarch64.neon.sqrshrun" 
+
+  // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]])
+  // LLVM:      [[BC1:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+  // LLVM:      [[BC2:%.*]] = bitcast <16 x i8> [[BC1]] to <4 x i32>
+  // LLVM:      [[VQRSHRUN_N1:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[BC2]], i32 9)
+  // LLVM:      ret <4 x i16> [[VQRSHRUN_N1]]
+  return vqrshrun_n_s32(a, 9);
+}
+
+// ALL-LABEL: @test_vqrshrun_n_s64(
+uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
+  // CIR:   cir.call_llvm_intrinsic "aarch64.neon.sqrshrun" 
+
+  // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]])
+  // LLVM:      [[BC1:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+  // LLVM:      [[BC2:%.*]] = bitcast <16 x i8> [[BC1]] to <2 x i64>
+  // LLVM:      [[VQRSHRUN_N1:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[BC2]], i32 19)
+  // LLVM:      ret <2 x i32> [[VQRSHRUN_N1]]
+  return vqrshrun_n_s64(a, 19);
+}
+
+//ALL-LABEL:  @test_vqrshrund_n_s64(
+uint32_t test_vqrshrund_n_s64(int64_t a) {
+  //CIR:  cir.call_llvm_intrinsic "aarch64.neon.sqrshrun" 
+
+  // LLVM-SAME: i64 {{.*}} [[A:%.*]])
+  // LLVM:   [[VQRSHRUND_N_S64:%.*]] = call i32 
@llvm.aarch64.neon.sqrshrun.i32(i64 [[A]], i32 32)
+  // LLVM:  ret i32 [[VQRSHRUND_N_S64]]
+  return (uint32_t)vqrshrund_n_s64(a, 32);
+}
+
+// ALL-LABEL: @test_vqrshrn_n_s16(
+int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqrshrn" 
+
+  // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]])
+  // LLVM:      [[BC1:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+  // LLVM:      [[BC2:%.*]] = bitcast <16 x i8> [[BC1]] to <8 x i16>
+  // LLVM:      [[VQRSHRN_N1:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[BC2]], i32 3)
+  // LLVM:      ret <8 x i8> [[VQRSHRN_N1]]
+  return vqrshrn_n_s16(a, 3);
+}
+
+// ALL-LABEL: @test_vqrshrn_n_s32(
+int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqrshrn" 
+
+  // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]])
+  // LLVM:      [[BC1:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+  // LLVM:      [[BC2:%.*]] = bitcast <16 x i8> [[BC1]] to <4 x i32>
+  // LLVM:      [[VQRSHRN_N1:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[BC2]], i32 9)
+  // LLVM:      ret <4 x i16> [[VQRSHRN_N1]]
+  return vqrshrn_n_s32(a, 9);
+}
+
+// ALL-LABEL: @test_vqrshrn_n_s64(
+int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqrshrn" 
+
+  // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]])
+  // LLVM:      [[BC1:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+  // LLVM:      [[BC2:%.*]] = bitcast <16 x i8> [[BC1]] to <2 x i64>
+  // LLVM:      [[VQRSHRN_N1:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[BC2]], i32 19)
+  // LLVM:      ret <2 x i32> [[VQRSHRN_N1]]
+  return vqrshrn_n_s64(a, 19);
+}
+
+// ALL-LABEL: @test_vqrshrun_high_n_s16(
+uint8x16_t test_vqrshrun_high_n_s16(uint8x8_t a, int16x8_t b) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqrshrun" 
+
+  // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i16> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM:      [[VQRSHRUN_N3:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[VQRSHRUN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM:      ret <16 x i8> [[SHUFFLE_I]]
+  return vqrshrun_high_n_s16(a, b, 3);
+}
+
+// ALL-LABEL: @test_vqrshrun_high_n_s32(
+uint16x8_t test_vqrshrun_high_n_s32(uint16x4_t a, int32x4_t b) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.sqrshrun" 
+  
+  // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i32> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM:      [[VQRSHRUN_N3:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> 
[[VQRSHRUN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // LLVM:      ret <8 x i16> [[SHUFFLE_I]]
+  return vqrshrun_high_n_s32(a, b, 9);
+}
+
+// ALL-LABEL: @test_vqrshrun_high_n_s64(
+uint32x4_t test_vqrshrun_high_n_s64(uint32x2_t a, int64x2_t b) {
+  // CIR:    cir.call_llvm_intrinsic "aarch64.neon.sqrshrun" 
+
+  // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i64> {{.*}} [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM:      [[VQRSHRUN_N3:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> 
[[VQRSHRUN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM:      ret <4 x i32> [[SHUFFLE_I]]
+  return vqrshrun_high_n_s64(a, b, 19);
+}
+
+// ALL-LABEL: @test_vqrshrn_n_u16(
+uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqrshrn" 
+
+  // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM:      [[VQRSHRN_N1:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+  // LLVM:      ret <8 x i8> [[VQRSHRN_N1]]
+  return vqrshrn_n_u16(a, 3);
+}
+
+// ALL-LABEL: @test_vqrshrn_n_u32(
+uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqrshrn" 
+
+  // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM:      [[VQRSHRN_N1:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+  // LLVM:      ret <4 x i16> [[VQRSHRN_N1]]
+  return vqrshrn_n_u32(a, 9);
+}
+
+// ALL-LABEL: @test_vqrshrn_n_u64(
+uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
+  // CIR: cir.call_llvm_intrinsic "aarch64.neon.uqrshrn" 
+
+  // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM:      [[VQRSHRN_N1:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+  // LLVM:      ret <2 x i32> [[VQRSHRN_N1]]
+  return vqrshrn_n_u64(a, 19);
+}
+
+// ALL-LABEL: @test_vqrshrnd_n_s64(
+int32_t test_vqrshrnd_n_s64(int64_t a) {
+  // CIR:    cir.call_llvm_intrinsic "aarch64.neon.sqrshrn" 
+
+  // LLVM-SAME: i64 noundef [[A:%.*]])
+  // LLVM:      [[VQRSHRND_N_S64:%.*]] = call i32 
@llvm.aarch64.neon.sqrshrn.i32(i64 [[A]], i32 32)
+  // LLVM:      ret i32 [[VQRSHRND_N_S64]]
+  return (int32_t)vqrshrnd_n_s64(a, 32);
+}
+
+// ALL-LABEL: @test_vqrshrnd_n_u64(
+uint32_t test_vqrshrnd_n_u64(uint64_t a) {
+  // CIR:    cir.call_llvm_intrinsic "aarch64.neon.uqrshrn" 
+
+  // LLVM-SAME: i64 noundef [[A:%.*]])
+  // LLVM:      [[VQRSHRND_N_U64:%.*]] = call i32 
@llvm.aarch64.neon.uqrshrn.i32(i64 [[A]], i32 32)
+  // LLVM:      ret i32 [[VQRSHRND_N_U64]]
+  return (uint32_t)vqrshrnd_n_u64(a, 32);
+}
+
+// ALL-LABEL: @test_vqrshrn_high_n_s16(
+int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+  // CIR:    cir.call_llvm_intrinsic "aarch64.neon.sqrshrn" 
+
+  // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i16> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM:      [[VQRSHRN_N3:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[VQRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM:      ret <16 x i8> [[SHUFFLE_I]]
+  return vqrshrn_high_n_s16(a, b, 3);
+}
+
+// ALL-LABEL: @test_vqrshrn_high_n_s32(
+int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+  // CIR:    cir.call_llvm_intrinsic "aarch64.neon.sqrshrn" 
+
+  // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i32> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM:      [[VQRSHRN_N3:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> 
[[VQRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7>
+  // LLVM:      ret <8 x i16> [[SHUFFLE_I]]
+  return vqrshrn_high_n_s32(a, b, 9);
+}
+
+// ALL-LABEL:  @test_vqrshrn_high_n_s64(
+int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+  // CIR:  cir.call_llvm_intrinsic "aarch64.neon.sqrshrn" 
+
+  // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i64> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM:      [[VQRSHRN_N3:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> 
[[VQRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM:      ret <4 x i32> [[SHUFFLE_I]]
+  return vqrshrn_high_n_s64(a, b, 19);
+}
+
+// ALL-LABEL:  @test_vqrshrn_high_n_u16(
+uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+  // CIR:    cir.call_llvm_intrinsic "aarch64.neon.uqrshrn" 
+
+  // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i16> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM:      [[VQRSHRN_N3:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[VQRSHRN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM:      ret <16 x i8> [[SHUFFLE_I]]
+  return vqrshrn_high_n_u16(a, b, 3);
+}
+
+// ALL-LABEL: @test_vqrshrn_high_n_u32(
+uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+  // CIR:   cir.call_llvm_intrinsic "aarch64.neon.uqrshrn" 
+
+  // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i32> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM:      [[VQRSHRN_N3:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> 
[[VQRSHRN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7>
+  // LLVM:      ret <8 x i16> [[SHUFFLE_I]]
+  return vqrshrn_high_n_u32(a, b, 9);
+}
+
+// ALL-LABEL: @test_vqrshrn_high_n_u64(
+uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+  // CIR:    cir.call_llvm_intrinsic "aarch64.neon.uqrshrn" 
+
+  // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i64> {{.*}}  [[B:%.*]])
+  // LLVM:      [[TMP0:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
+  // LLVM:      [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM:      [[VQRSHRN_N3:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+  // LLVM:      [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> 
[[VQRSHRN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM:      ret <4 x i32> [[SHUFFLE_I]]
+  return vqrshrn_high_n_u64(a, b, 19);
+}
+
 //===------------------------------------------------------===//
 // 2.1.1.13.3 Maximum across vector
 //===------------------------------------------------------===//


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] adcad45 - [CIR] Vector saturating rounding shift right and narrow intrinsics (#198947)

Reply via email to