[clang] [CIR][AArch64] Added vector intrinsics for shift left (PR #187516)

via cfe-commits Fri, 20 Mar 2026 22:43:57 -0700

https://github.com/albertbolt1 updated 
https://github.com/llvm/llvm-project/pull/187516


>From 33792d29d3b9e80876a1b3bfdd50b2612d493e5c Mon Sep 17 00:00:00 2001
From: albertbolt <[email protected]>
Date: Thu, 19 Mar 2026 19:33:16 +0530
Subject: [PATCH 1/4] adding vector intrinsics for shift left

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  14 +-
 clang/test/CodeGen/AArch64/neon/intrinsics.c  | 163 ++++++++++++++++++
 2 files changed, 175 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 5d7b8d839fa84..c9960be7b0b6b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -860,6 +860,7 @@ static mlir::Value emitCommonNeonBuiltinExpr(
   cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType,
                                     false, allowBFloatArgsAndRet);
   mlir::Type ty = vTy;
+
   if (!ty)
     return nullptr;
 
@@ -1053,8 +1054,17 @@ static mlir::Value emitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vsha512h2q_u64:
   case NEON::BI__builtin_neon_vsha512su0q_u64:
   case NEON::BI__builtin_neon_vsha512su1q_u64:
+    cgf.cgm.errorNYI(expr->getSourceRange(),
+                     std::string("unimplemented AArch64 builtin call: ") +
+                         ctx.BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vshl_n_v:
-  case NEON::BI__builtin_neon_vshlq_n_v:
+  case NEON::BI__builtin_neon_vshlq_n_v: {
+    auto rhsScalar = cgf.getBuilder().createIntCast(ops[1], 
vTy.getElementType());
+    auto rhsVec = cir::VecSplatOp::create(cgf.getBuilder(),loc, vTy, 
rhsScalar);
+    auto lhsVec = cgf.getBuilder().createBitcast(ops[0], vTy);
+    return cgf.getBuilder().createShiftLeft(loc, lhsVec, rhsVec);
+  }
   case NEON::BI__builtin_neon_vshll_n_v:
   case NEON::BI__builtin_neon_vshrn_n_v:
   case NEON::BI__builtin_neon_vshr_n_v:
@@ -2593,7 +2603,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   // defer to common code if it's been added to our special map.
   builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
                                         aarch64SIMDIntrinsicsProvenSorted);
-
+  
   if (builtin)
     return emitCommonNeonBuiltinExpr(
         *this, builtin->builtinID, builtin->llvmIntrinsic,
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index bf8e62feda8da..af2341659f437 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -982,3 +982,166 @@ int64_t test_vshld_u64(int64_t a,int64_t b) {
   return (int64_t)vshld_u64(a, b);
 }
 
+// ALL-LABEL: test_vshlq_n_s8
+int8x16_t test_vshlq_n_s8(int8x16_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !s8i>, 
%{{.*}} : !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
+  
+ // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <16 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <16 x i8> [[SHL]]
+ return vshlq_n_s8(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_s16
+int16x8_t test_vshlq_n_s16(int16x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s16i>, 
%{{.*}} : !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
+  
+ // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <8 x i16> [[SHL]]
+ return vshlq_n_s16(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_s32
+int32x4_t test_vshlq_n_s32(int32x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s32i>, 
%{{.*}} : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
+  
+ // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <4 x i32> [[SHL]]
+ return vshlq_n_s32(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_s64
+int64x2_t test_vshlq_n_s64(int64x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s64i>, 
%{{.*}} : !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i>
+  
+ // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <2 x i64> [[SHL]]
+ return vshlq_n_s64(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u8
+uint8x16_t test_vshlq_n_u8(uint8x16_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !u8i>, 
%{{.*}} : !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
+  
+ // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <16 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <16 x i8> [[SHL]]
+ return vshlq_n_u8(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u16
+uint16x8_t test_vshlq_n_u16(uint16x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u16i>, 
%{{.*}} : !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i>
+  
+ // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <8 x i16> [[SHL]]
+ return vshlq_n_u16(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u32
+uint32x4_t test_vshlq_n_u32(uint32x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u32i>, 
%{{.*}} : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
+  
+ // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <4 x i32> [[SHL]]
+ return vshlq_n_u32(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u64
+uint64x2_t test_vshlq_n_u64(uint64x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u64i>, 
%{{.*}} : !cir.vector<2 x !u64i>) -> !cir.vector<2 x !u64i>
+  
+ // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <2 x i64> [[SHL]]
+ return vshlq_n_u64(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s8
+int8x8_t test_vshl_n_s8(int8x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s8i>, 
%{{.*}} : !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
+  
+ // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <8 x i8> [[SHL]]
+ return vshl_n_s8(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s16
+int16x4_t test_vshl_n_s16(int16x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s16i>, 
%{{.*}} : !cir.vector<4 x !s16i>) -> !cir.vector<4 x !s16i>
+  
+ // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <4 x i16> [[SHL]]
+ return vshl_n_s16(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s32
+int32x2_t test_vshl_n_s32(int32x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s32i>, 
%{{.*}} : !cir.vector<2 x !s32i>) -> !cir.vector<2 x !s32i>
+  
+ // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <2 x i32> [[SHL]]
+ return vshl_n_s32(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s64
+int64x1_t test_vshl_n_s64(int64x1_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !s64i>, 
%{{.*}} : !cir.vector<1 x !s64i>) -> !cir.vector<1 x !s64i>
+  
+ // LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <1 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <1 x i64> [[SHL]]
+ return vshl_n_s64(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u8
+uint8x8_t test_vshl_n_u8(uint8x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u8i>, 
%{{.*}} : !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
+  
+ // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <8 x i8> [[SHL]]
+ return vshl_n_u8(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u16
+uint16x4_t test_vshl_n_u16(uint16x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u16i>, 
%{{.*}} : !cir.vector<4 x !u16i>) -> !cir.vector<4 x !u16i>
+
+  
+ // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <4 x i16> [[SHL]]
+ return vshl_n_u16(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u32
+uint32x2_t test_vshl_n_u32(uint32x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u32i>, 
%{{.*}} : !cir.vector<2 x !u32i>) -> !cir.vector<2 x !u32i>
+  
+ // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <2 x i32> [[SHL]]
+ return vshl_n_u32(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u64
+uint64x1_t test_vshl_n_u64(uint64x1_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !u64i>, 
%{{.*}} : !cir.vector<1 x !u64i>) -> !cir.vector<1 x !u64i>
+  
+ // LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <1 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <1 x i64> [[SHL]]
+ return vshl_n_u64(a, 1);
+}
+
+
+

>From 2048fbf4ad51311dc672350d25ee7f41a08ee61f Mon Sep 17 00:00:00 2001
From: albertbolt <[email protected]>
Date: Sat, 21 Mar 2026 11:00:47 +0530
Subject: [PATCH 2/4] used code from incubator, ported test cases

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  42 +++-
 clang/test/CodeGen/AArch64/neon-intrinsics.c  | 208 +++---------------
 clang/test/CodeGen/AArch64/neon/intrinsics.c  | 186 +++++++++-------
 3 files changed, 172 insertions(+), 264 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index c9960be7b0b6b..d634cb2fe21f4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -831,6 +831,40 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, 
NeonTypeFlags typeFlags,
   llvm_unreachable("Unknown vector element type!");
 }
 
+/// Get integer from a mlir::Value that is an int constant or a constant op.
+static int64_t getIntValueFromConstOp(mlir::Value val) {
+  return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue();
+}
+
+/// Build a constant shift amount vector of `vecTy` to shift a vector
+/// Here `shitfVal` is a constant integer that will be splated into a
+/// a const vector of `vecTy` which is the return of this function
+static mlir::Value emitNeonShiftVector(CIRGenBuilderTy &builder,
+                                       mlir::Value shiftVal,
+                                       cir::VectorType vecTy,
+                                       mlir::Location loc, bool neg) {
+  int shiftAmt = getIntValueFromConstOp(shiftVal);
+  if (neg)
+    shiftAmt = -shiftAmt;
+  llvm::SmallVector<mlir::Attribute> vecAttr{
+      vecTy.getSize(),
+      // ConstVectorAttr requires cir::IntAttr
+      cir::IntAttr::get(vecTy.getElementType(), shiftAmt)};
+  cir::ConstVectorAttr constVecAttr = cir::ConstVectorAttr::get(
+      vecTy, mlir::ArrayAttr::get(builder.getContext(), vecAttr));
+  return cir::ConstantOp::create(builder, loc, constVecAttr);
+}
+
+static mlir::Value
+emitCommonNeonShift(CIRGenBuilderTy &builder, mlir::Location loc,
+                    cir::VectorType resTy, mlir::Value shifTgt,
+                    mlir::Value shiftAmt, bool shiftLeft, bool negAmt = false) 
{
+  shiftAmt = emitNeonShiftVector(builder, shiftAmt, resTy, loc, negAmt);
+  return cir::ShiftOp::create(builder, loc, resTy,
+                              builder.createBitcast(shifTgt, resTy), shiftAmt,
+                              shiftLeft);
+}
+
 static mlir::Value emitCommonNeonBuiltinExpr(
     CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic,
     unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier,
@@ -1059,12 +1093,8 @@ static mlir::Value emitCommonNeonBuiltinExpr(
                          ctx.BuiltinInfo.getName(builtinID));
     return mlir::Value{};
   case NEON::BI__builtin_neon_vshl_n_v:
-  case NEON::BI__builtin_neon_vshlq_n_v: {
-    auto rhsScalar = cgf.getBuilder().createIntCast(ops[1], 
vTy.getElementType());
-    auto rhsVec = cir::VecSplatOp::create(cgf.getBuilder(),loc, vTy, 
rhsScalar);
-    auto lhsVec = cgf.getBuilder().createBitcast(ops[0], vTy);
-    return cgf.getBuilder().createShiftLeft(loc, lhsVec, rhsVec);
-  }
+  case NEON::BI__builtin_neon_vshlq_n_v:
+    return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1], 
true);
   case NEON::BI__builtin_neon_vshll_n_v:
   case NEON::BI__builtin_neon_vshrn_n_v:
   case NEON::BI__builtin_neon_vshr_n_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c 
b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 8eb6cd86339d6..84e91832cf648 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -6610,166 +6610,6 @@ float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t 
b) {
   return vmulxq_f64(a, b);
 }
 
-// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_n_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
-// CHECK-NEXT:    ret <8 x i8> [[VSHL_N]]
-//
-int8x8_t test_vshl_n_s8(int8x8_t a) {
-  return vshl_n_s8(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_n_s16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
-// CHECK-NEXT:    ret <4 x i16> [[VSHL_N]]
-//
-int16x4_t test_vshl_n_s16(int16x4_t a) {
-  return vshl_n_s16(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_n_s32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
-// CHECK-NEXT:    ret <2 x i32> [[VSHL_N]]
-//
-int32x2_t test_vshl_n_s32(int32x2_t a) {
-  return vshl_n_s32(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_n_s8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
-// CHECK-NEXT:    ret <16 x i8> [[VSHL_N]]
-//
-int8x16_t test_vshlq_n_s8(int8x16_t a) {
-  return vshlq_n_s8(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_n_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK-NEXT:    ret <8 x i16> [[VSHL_N]]
-//
-int16x8_t test_vshlq_n_s16(int16x8_t a) {
-  return vshlq_n_s16(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_n_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
-// CHECK-NEXT:    ret <4 x i32> [[VSHL_N]]
-//
-int32x4_t test_vshlq_n_s32(int32x4_t a) {
-  return vshlq_n_s32(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_n_s64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
-// CHECK-NEXT:    ret <2 x i64> [[VSHL_N]]
-//
-int64x2_t test_vshlq_n_s64(int64x2_t a) {
-  return vshlq_n_s64(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vshl_n_u8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
-// CHECK-NEXT:    ret <8 x i8> [[VSHL_N]]
-//
-uint8x8_t test_vshl_n_u8(uint8x8_t a) {
-  return vshl_n_u8(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vshl_n_u16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
-// CHECK-NEXT:    ret <4 x i16> [[VSHL_N]]
-//
-uint16x4_t test_vshl_n_u16(uint16x4_t a) {
-  return vshl_n_u16(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vshl_n_u32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
-// CHECK-NEXT:    ret <2 x i32> [[VSHL_N]]
-//
-uint32x2_t test_vshl_n_u32(uint32x2_t a) {
-  return vshl_n_u32(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vshlq_n_u8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
-// CHECK-NEXT:    ret <16 x i8> [[VSHL_N]]
-//
-uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
-  return vshlq_n_u8(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vshlq_n_u16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
-// CHECK-NEXT:    ret <8 x i16> [[VSHL_N]]
-//
-uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
-  return vshlq_n_u16(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vshlq_n_u32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
-// CHECK-NEXT:    ret <4 x i32> [[VSHL_N]]
-//
-uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
-  return vshlq_n_u32(a, 3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vshlq_n_u64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
-// CHECK-NEXT:    ret <2 x i64> [[VSHL_N]]
-//
-uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
-  return vshlq_n_u64(a, 3);
-}
-
 // CHECK-LABEL: define dso_local <8 x i8> @test_vshr_n_s8(
 // CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -12052,6 +11892,26 @@ uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
   return vqsubd_u64(a, b);
 }
 
+// CHECK-LABEL: define dso_local i64 @test_vshld_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VSHLD_S64_I:%.*]] = call i64 
@llvm.aarch64.neon.sshl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT:    ret i64 [[VSHLD_S64_I]]
+//
+int64_t test_vshld_s64(int64_t a, int64_t b) {
+  return vshld_s64(a, b);
+}
+
+// CHECK-LABEL: define dso_local i64 @test_vshld_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VSHLD_U64_I:%.*]] = call i64 
@llvm.aarch64.neon.ushl.i64(i64 [[A]], i64 [[B]])
+// CHECK-NEXT:    ret i64 [[VSHLD_U64_I]]
+//
+uint64_t test_vshld_u64(uint64_t a, int64_t b) {
+  return vshld_u64(a, b);
+}
+
 // CHECK-LABEL: define dso_local i8 @test_vqshlb_s8(
 // CHECK-SAME: i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -17867,28 +17727,24 @@ uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t 
b) {
   return vrsra_n_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_n_s64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-LABEL: define dso_local i64 @test_vshld_n_s64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK-NEXT:    ret <1 x i64> [[VSHL_N]]
+// CHECK-NEXT:    [[SHLD_N:%.*]] = shl i64 [[A]], 1
+// CHECK-NEXT:    ret i64 [[SHLD_N]]
 //
-int64x1_t test_vshl_n_s64(int64x1_t a) {
-  return vshl_n_s64(a, 1);
+int64_t test_vshld_n_s64(int64_t a) {
+  return (int64_t)vshld_n_s64(a, 1);
 }
 
-// CHECK-LABEL: define dso_local <1 x i64> @test_vshl_n_u64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-LABEL: define dso_local i64 @test_vshld_n_u64(
+// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT:    [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
-// CHECK-NEXT:    ret <1 x i64> [[VSHL_N]]
+// CHECK-NEXT:    [[SHLD_N:%.*]] = shl i64 [[A]], 63
+// CHECK-NEXT:    ret i64 [[SHLD_N]]
 //
-uint64x1_t test_vshl_n_u64(uint64x1_t a) {
-  return vshl_n_u64(a, 1);
+uint64_t test_vshld_n_u64(uint64_t a) {
+  return (uint64_t)vshld_n_u64(a, 63);
 }
 
 // CHECK-LABEL: define dso_local i8 @test_vqshlb_n_s8(
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index af2341659f437..4fa83e9b0b7d9 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -983,165 +983,187 @@ int64_t test_vshld_u64(int64_t a,int64_t b) {
 }
 
 // ALL-LABEL: test_vshlq_n_s8
-int8x16_t test_vshlq_n_s8(int8x16_t a, int64_t b) {
- // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !s8i>, 
%{{.*}} : !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
+int8x16_t test_vshlq_n_s8(int8x16_t a) {
+// CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !s8i>, 
%{{.*}} : !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
   
- // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <16 x i8> {{.*}}, splat (i8 1)
- // LLVM: ret <16 x i8> [[SHL]]
- return vshlq_n_s8(a, 1);
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
+// LLVM:    ret <16 x i8> [[VSHL_N]]
+//
+ return vshlq_n_s8(a, 3);
 }
 
 // ALL-LABEL: test_vshlq_n_s16
-int16x8_t test_vshlq_n_s16(int16x8_t a, int64_t b) {
+int16x8_t test_vshlq_n_s16(int16x8_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s16i>, 
%{{.*}} : !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
   
- // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <8 x i16> {{.*}}, splat (i16 1)
- // LLVM: ret <8 x i16> [[SHL]]
- return vshlq_n_s16(a, 1);
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
+// LLVM:    ret <8 x i16> [[VSHL_N]]
+ return vshlq_n_s16(a, 3);
 }
 
 // ALL-LABEL: test_vshlq_n_s32
-int32x4_t test_vshlq_n_s32(int32x4_t a, int64_t b) {
+int32x4_t test_vshlq_n_s32(int32x4_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s32i>, 
%{{.*}} : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
   
- // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <4 x i32> {{.*}}, splat (i32 1)
- // LLVM: ret <4 x i32> [[SHL]]
- return vshlq_n_s32(a, 1);
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
+// LLVM:    ret <4 x i32> [[VSHL_N]]
+ return vshlq_n_s32(a, 3);
 }
 
 // ALL-LABEL: test_vshlq_n_s64
-int64x2_t test_vshlq_n_s64(int64x2_t a, int64_t b) {
+int64x2_t test_vshlq_n_s64(int64x2_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s64i>, 
%{{.*}} : !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i>
   
- // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <2 x i64> {{.*}}, splat (i64 1)
- // LLVM: ret <2 x i64> [[SHL]]
- return vshlq_n_s64(a, 1);
+// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
+// LLVM:    ret <2 x i64> [[VSHL_N]]
+ return vshlq_n_s64(a, 3);
 }
 
 // ALL-LABEL: test_vshlq_n_u8
-uint8x16_t test_vshlq_n_u8(uint8x16_t a, int64_t b) {
+uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !u8i>, 
%{{.*}} : !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
   
- // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <16 x i8> {{.*}}, splat (i8 1)
- // LLVM: ret <16 x i8> [[SHL]]
- return vshlq_n_u8(a, 1);
+// LLVM-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[VSHL_N:%.*]] = shl <16 x i8> [[A]], splat (i8 3)
+// LLVM:    ret <16 x i8> [[VSHL_N]]
+ return vshlq_n_u8(a, 3);
 }
 
 // ALL-LABEL: test_vshlq_n_u16
-uint16x8_t test_vshlq_n_u16(uint16x8_t a, int64_t b) {
+uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u16i>, 
%{{.*}} : !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i>
   
- // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <8 x i16> {{.*}}, splat (i16 1)
- // LLVM: ret <8 x i16> [[SHL]]
- return vshlq_n_u16(a, 1);
+// LLVM-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
+// LLVM:    ret <8 x i16> [[VSHL_N]]
+ return vshlq_n_u16(a, 3);
 }
 
 // ALL-LABEL: test_vshlq_n_u32
-uint32x4_t test_vshlq_n_u32(uint32x4_t a, int64_t b) {
+uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u32i>, 
%{{.*}} : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
   
- // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <4 x i32> {{.*}}, splat (i32 1)
- // LLVM: ret <4 x i32> [[SHL]]
- return vshlq_n_u32(a, 1);
+// LLVM-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
+// LLVM:    ret <4 x i32> [[VSHL_N]]
+ return vshlq_n_u32(a, 3);
 }
 
 // ALL-LABEL: test_vshlq_n_u64
-uint64x2_t test_vshlq_n_u64(uint64x2_t a, int64_t b) {
+uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u64i>, 
%{{.*}} : !cir.vector<2 x !u64i>) -> !cir.vector<2 x !u64i>
   
- // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <2 x i64> {{.*}}, splat (i64 1)
- // LLVM: ret <2 x i64> [[SHL]]
- return vshlq_n_u64(a, 1);
+// LLVM-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
+// LLVM:    ret <2 x i64> [[VSHL_N]]
+ return vshlq_n_u64(a, 3);
 }
 
 // ALL-LABEL: test_vshl_n_s8
-int8x8_t test_vshl_n_s8(int8x8_t a, int64_t b) {
+int8x8_t test_vshl_n_s8(int8x8_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s8i>, 
%{{.*}} : !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
   
- // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <8 x i8> {{.*}}, splat (i8 1)
- // LLVM: ret <8 x i8> [[SHL]]
- return vshl_n_s8(a, 1);
+// LLVM-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
+// LLVM:    ret <8 x i8> [[VSHL_N]]
+ return vshl_n_s8(a, 3);
 }
 
 // ALL-LABEL: test_vshl_n_s16
-int16x4_t test_vshl_n_s16(int16x4_t a, int64_t b) {
+int16x4_t test_vshl_n_s16(int16x4_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s16i>, 
%{{.*}} : !cir.vector<4 x !s16i>) -> !cir.vector<4 x !s16i>
   
- // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <4 x i16> {{.*}}, splat (i16 1)
- // LLVM: ret <4 x i16> [[SHL]]
- return vshl_n_s16(a, 1);
+// LLVM-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
+// LLVM:    ret <4 x i16> [[VSHL_N]]
+ return vshl_n_s16(a, 3);
 }
 
 // ALL-LABEL: test_vshl_n_s32
-int32x2_t test_vshl_n_s32(int32x2_t a, int64_t b) {
+int32x2_t test_vshl_n_s32(int32x2_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s32i>, 
%{{.*}} : !cir.vector<2 x !s32i>) -> !cir.vector<2 x !s32i>
   
- // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <2 x i32> {{.*}}, splat (i32 1)
- // LLVM: ret <2 x i32> [[SHL]]
- return vshl_n_s32(a, 1);
+// LLVM-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
+// LLVM:    ret <2 x i32> [[VSHL_N]]
+ return vshl_n_s32(a, 3);
 }
 
 // ALL-LABEL: test_vshl_n_s64
-int64x1_t test_vshl_n_s64(int64x1_t a, int64_t b) {
+int64x1_t test_vshl_n_s64(int64x1_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !s64i>, 
%{{.*}} : !cir.vector<1 x !s64i>) -> !cir.vector<1 x !s64i>
   
- // LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <1 x i64> {{.*}}, splat (i64 1)
- // LLVM: ret <1 x i64> [[SHL]]
+// LLVM-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
+// LLVM:    ret <1 x i64> [[VSHL_N]]
  return vshl_n_s64(a, 1);
 }
 
 // ALL-LABEL: test_vshl_n_u8
-uint8x8_t test_vshl_n_u8(uint8x8_t a, int64_t b) {
+uint8x8_t test_vshl_n_u8(uint8x8_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u8i>, 
%{{.*}} : !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
   
- // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <8 x i8> {{.*}}, splat (i8 1)
- // LLVM: ret <8 x i8> [[SHL]]
- return vshl_n_u8(a, 1);
+// LLVM-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[VSHL_N:%.*]] = shl <8 x i8> [[A]], splat (i8 3)
+// LLVM:    ret <8 x i8> [[VSHL_N]]
+ return vshl_n_u8(a, 3);
 }
 
 // ALL-LABEL: test_vshl_n_u16
-uint16x4_t test_vshl_n_u16(uint16x4_t a, int64_t b) {
+uint16x4_t test_vshl_n_u16(uint16x4_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u16i>, 
%{{.*}} : !cir.vector<4 x !u16i>) -> !cir.vector<4 x !u16i>
 
-  
- // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <4 x i16> {{.*}}, splat (i16 1)
- // LLVM: ret <4 x i16> [[SHL]]
- return vshl_n_u16(a, 1);
+// LLVM-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
+// LLVM:    ret <4 x i16> [[VSHL_N]]
+ return vshl_n_u16(a, 3);
 }
 
 // ALL-LABEL: test_vshl_n_u32
-uint32x2_t test_vshl_n_u32(uint32x2_t a, int64_t b) {
+uint32x2_t test_vshl_n_u32(uint32x2_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u32i>, 
%{{.*}} : !cir.vector<2 x !u32i>) -> !cir.vector<2 x !u32i>
   
- // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <2 x i32> {{.*}}, splat (i32 1)
- // LLVM: ret <2 x i32> [[SHL]]
- return vshl_n_u32(a, 1);
+// LLVM-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
+// LLVM:    ret <2 x i32> [[VSHL_N]]
+ return vshl_n_u32(a, 3);
 }
 
 // ALL-LABEL: test_vshl_n_u64
-uint64x1_t test_vshl_n_u64(uint64x1_t a, int64_t b) {
+uint64x1_t test_vshl_n_u64(uint64x1_t a) {
  // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !u64i>, 
%{{.*}} : !cir.vector<1 x !u64i>) -> !cir.vector<1 x !u64i>
   
- // LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
- // LLVM: [[SHL:%.*]] = shl <1 x i64> {{.*}}, splat (i64 1)
- // LLVM: ret <1 x i64> [[SHL]]
+// LLVM-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// LLVM:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT:    [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
+// LLVM:    ret <1 x i64> [[VSHL_N]]
  return vshl_n_u64(a, 1);
 }
 
-
-

>From eac40dffd347dac4b2e3c880c328f166d0af6aa2 Mon Sep 17 00:00:00 2001
From: albertbolt <[email protected]>
Date: Sat, 21 Mar 2026 11:09:25 +0530
Subject: [PATCH 3/4] cleaning

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index d634cb2fe21f4..a1c751a9c4c11 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -1094,7 +1094,8 @@ static mlir::Value emitCommonNeonBuiltinExpr(
     return mlir::Value{};
   case NEON::BI__builtin_neon_vshl_n_v:
   case NEON::BI__builtin_neon_vshlq_n_v:
-    return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1], 
true);
+    return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1],
+                               true);
   case NEON::BI__builtin_neon_vshll_n_v:
   case NEON::BI__builtin_neon_vshrn_n_v:
   case NEON::BI__builtin_neon_vshr_n_v:

>From 5894ce30e1d62de7f9f53a5d5fe011fd0ff8ca4f Mon Sep 17 00:00:00 2001
From: albertbolt <[email protected]>
Date: Sat, 21 Mar 2026 11:13:34 +0530
Subject: [PATCH 4/4] cleaning

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index a1c751a9c4c11..971f8219e00bd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2634,7 +2634,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   // defer to common code if it's been added to our special map.
   builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
                                         aarch64SIMDIntrinsicsProvenSorted);
-  
   if (builtin)
     return emitCommonNeonBuiltinExpr(
         *this, builtin->builtinID, builtin->llvmIntrinsic,

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AArch64] Added vector intrinsics for shift left (PR #187516)

Reply via email to