[PATCH] D15223: [ARM] [AARCH64] Add CodeGen IR tests for {VS}QRDML{AS}H v8.1a intrinsics.

Alexandros Lamprineas via cfe-commits Fri, 04 Dec 2015 02:39:13 -0800

labrinea created this revision.
labrinea added reviewers: jmolloy, rengolin, echristo, cfe-commits.
Herald added subscribers: rengolin, aemerson.


Existing tests are currently testing generated backend assembly. We want to 
test the generated frontend IR as well. As discussed on the list we would like 
to keep both ASM checks for integration testing and IR checks for unit testing.

http://reviews.llvm.org/D15223

Files:
  test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
  test/CodeGen/arm-v8.1a-neon-intrinsics.c

Index: test/CodeGen/arm-v8.1a-neon-intrinsics.c
===================================================================
--- test/CodeGen/arm-v8.1a-neon-intrinsics.c
+++ test/CodeGen/arm-v8.1a-neon-intrinsics.c
@@ -1,122 +1,244 @@
 // RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-feature +neon \
 // RUN:  -O3 -S -o - %s \
-// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM-ASM
+
+// RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-feature +neon \
+// RUN:  -O1 -S -emit-llvm -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM-IR
+
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
 // RUN:  -target-feature +v8.1a -O3 -S -o - %s \
-// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64-ASM
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
+// RUN:  -target-feature +v8.1a -O1 -S -emit-llvm -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64-IR
+
 // REQUIRES: arm-registered-target,aarch64-registered-target
 
 #include <arm_neon.h>
 
 // CHECK-LABEL: test_vqrdmlah_s16
 int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+// CHECK-ARM-ASM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %b, <4 x i16> %c)
+// CHECK-ARM-IR: tail call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %b, <4 x i16> %c)
+// CHECK-AARCH64-IR: tail call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
   return vqrdmlah_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlah_s32
 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+// CHECK-ARM-ASM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %b, <2 x i32> %c)
+// CHECK-ARM-IR: tail call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %b, <2 x i32> %c)
+// CHECK-AARCH64-IR: tail call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
   return vqrdmlah_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_s16
 int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
-// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+// CHECK-ARM-ASM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %b, <8 x i16> %c)
+// CHECK-ARM-IR: tail call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %b, <8 x i16> %c)
+// CHECK-AARCH64-IR: tail call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
   return vqrdmlahq_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_s32
 int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
-// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+// CHECK-ARM-ASM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %b, <4 x i32> %c)
+// CHECK-ARM-IR: tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %b, <4 x i32> %c)
+// CHECK-AARCH64-IR: tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
   return vqrdmlahq_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlah_lane_s16
 int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+// CHECK-ARM-ASM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %b, <4 x i16> [[shuffle]])
+// CHECK-ARM-IR: tail call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %b, <4 x i16> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
   return vqrdmlah_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlah_lane_s32
 int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+// CHECK-ARM-ASM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %b, <2 x i32> [[shuffle]])
+// CHECK-ARM-IR: tail call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %b, <2 x i32> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
   return vqrdmlah_lane_s32(a, b, c, 1);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_lane_s16
 int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+// CHECK-ARM-ASM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %b, <8 x i16> [[shuffle]])
+// CHECK-ARM-IR: tail call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %b, <8 x i16> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
   return vqrdmlahq_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlahq_lane_s32
 int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+// CHECK-ARM-ASM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64-ASM: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %b, <4 x i32> [[shuffle]])
+// CHECK-ARM-IR: tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %b, <4 x i32> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
   return vqrdmlahq_lane_s32(a, b, c, 1);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_s16
 int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+// CHECK-ARM-ASM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %b, <4 x i16> %c)
+// CHECK-ARM-IR: tail call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %b, <4 x i16> %c)
+// CHECK-AARCH64-IR: tail call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
   return vqrdmlsh_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_s32
 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+// CHECK-ARM-ASM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %b, <2 x i32> %c)
+// CHECK-ARM-IR: tail call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %b, <2 x i32> %c)
+// CHECK-AARCH64-IR: tail call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
   return vqrdmlsh_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_s16
 int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
-// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+// CHECK-ARM-ASM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %b, <8 x i16> %c)
+// CHECK-ARM-IR: tail call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %b, <8 x i16> %c)
+// CHECK-AARCH64-IR: tail call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
   return vqrdmlshq_s16(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_s32
 int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
-// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+// CHECK-ARM-ASM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %b, <4 x i32> %c)
+// CHECK-ARM-IR: tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %b, <4 x i32> %c)
+// CHECK-AARCH64-IR: tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
   return vqrdmlshq_s32(a, b, c);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_lane_s16
 int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+// CHECK-ARM-ASM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %b, <4 x i16> [[shuffle]])
+// CHECK-ARM-IR: tail call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %b, <4 x i16> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
   return vqrdmlsh_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlsh_lane_s32
 int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+// CHECK-ARM-ASM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %b, <2 x i32> [[shuffle]])
+// CHECK-ARM-IR: tail call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %b, <2 x i32> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
   return vqrdmlsh_lane_s32(a, b, c, 1);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_lane_s16
 int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
-// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+// CHECK-ARM-ASM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %b, <8 x i16> [[shuffle]])
+// CHECK-ARM-IR: tail call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %b, <8 x i16> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
   return vqrdmlshq_lane_s16(a, b, c, 3);
 }
 
 // CHECK-LABEL: test_vqrdmlshq_lane_s32
 int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
-// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+// CHECK-ARM-ASM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64-ASM: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+
+// CHECK-ARM-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-ARM-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %b, <4 x i32> [[shuffle]])
+// CHECK-ARM-IR: tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
+
+// CHECK-AARCH64-IR: [[shuffle:%.*]] = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-AARCH64-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %b, <4 x i32> [[shuffle]])
+// CHECK-AARCH64-IR: tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
   return vqrdmlshq_lane_s32(a, b, c, 1);
 }
 
Index: test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
===================================================================
--- test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
+++ test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
@@ -2,127 +2,227 @@
 
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
 // RUN:  -target-feature +v8.1a -O3 -S -o - %s \
-// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ASM
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
+// RUN:  -target-feature +v8.1a -O1 -S -emit-llvm -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-IR
 
  #include <arm_neon.h>
 
-// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s16
+// CHECK-LABEL: test_vqrdmlah_laneq_s16
 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+// CHECK-ASM: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %b, <4 x i16> [[shuffle]])
+// CHECK-IR: tail call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
   return vqrdmlah_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s32
+// CHECK-LABEL: test_vqrdmlah_laneq_s32
 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+// CHECK-ASM: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+// CHECK-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %b, <2 x i32> [[shuffle]])
+// CHECK-IR: tail call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
   return vqrdmlah_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s16
+// CHECK-LABEL: test_vqrdmlahq_laneq_s16
 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+// CHECK-ASM: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %b, <8 x i16> [[shuffle]])
+// CHECK-IR: tail call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
   return vqrdmlahq_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s32
+// CHECK-LABEL: test_vqrdmlahq_laneq_s32
 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+// CHECK-ASM: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %b, <4 x i32> [[shuffle]])
+// CHECK-IR: tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
   return vqrdmlahq_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahh_s16
+// CHECK-LABEL: test_vqrdmlahh_s16
 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
-// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+// CHECK-ASM: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+
+// CHECK-IR: [[insb:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK-IR: [[insc:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK-IR: [[insa:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK-IR: [[add:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[mul]])
+// CHECK-IR: extractelement <4 x i16> [[add]], i64 0
   return vqrdmlahh_s16(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahs_s32
+// CHECK-LABEL: test_vqrdmlahs_s32
 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
-// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+// CHECK-ASM: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+// CHECK-IR: [[mul:%.*]] = tail call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %b, i32 %c)
+// CHECK-IR: tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[mul]])
   return vqrdmlahs_s32(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahh_lane_s16
+// CHECK-LABEL: test_vqrdmlahh_lane_s16
 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
-// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+// CHECK-ASM: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+
+// CHECK-IR: [[insb:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[shuffle]])
+// CHECK-IR: [[insa:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK-IR: [[add:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[mul]])
+// CHECK-IR: extractelement <4 x i16> [[add]], i64 0
   return vqrdmlahh_lane_s16(a, b, c, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahs_lane_s32
+// CHECK-LABEL: test_vqrdmlahs_lane_s32
 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
-// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+// CHECK-ASM: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+
+// CHECK-IR: [[extc:%.*]] = extractelement <2 x i32> %c, i32 1
+// CHECK-IR: [[mul:%.*]] = tail call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %b, i32 [[extc]])
+// CHECK-IR: tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[mul]])
   return vqrdmlahs_lane_s32(a, b, c, 1);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahh_laneq_s16
+// CHECK-LABEL: test_vqrdmlahh_laneq_s16
 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
-// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+// CHECK-ASM: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+
+// CHECK-IR: [[insb:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 7, i32 undef, i32 undef, i32 undef>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[shuffle]])
+// CHECK-IR: [[insa:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK-IR: [[add:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[mul]])
+// CHECK-IR: extractelement <4 x i16> [[add]], i64 0
   return vqrdmlahh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlahs_laneq_s32
+// CHECK-LABEL: test_vqrdmlahs_laneq_s32
 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
-// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+// CHECK-ASM: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+
+// CHECK-IR: [[extc:%.*]] = extractelement <4 x i32> %c, i32 3
+// CHECK-IR: [[mul:%.*]] = tail call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %b, i32 [[extc]])
+// CHECK-IR: tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[mul]])
   return vqrdmlahs_laneq_s32(a, b, c, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s16
+// CHECK-LABEL: test_vqrdmlsh_laneq_s16
 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+// CHECK-ASM: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %b, <4 x i16> [[shuffle]])
+// CHECK-IR: tail call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> [[mul]])
   return vqrdmlsh_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s32
+// CHECK-LABEL: test_vqrdmlsh_laneq_s32
 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+// CHECK-ASM: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+// CHECK-IR: [[mul:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %b, <2 x i32> [[shuffle]])
+// CHECK-IR: tail call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> [[mul]])
   return vqrdmlsh_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s16
+// CHECK-LABEL: test_vqrdmlshq_laneq_s16
 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+// CHECK-ASM: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-IR: [[mul:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %b, <8 x i16> [[shuffle]])
+// CHECK-IR: tail call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> [[mul]])
   return vqrdmlshq_laneq_s16(a, b, v, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s32
+// CHECK-LABEL: test_vqrdmlshq_laneq_s32
 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
-// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+// CHECK-ASM: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %b, <4 x i32> [[shuffle]])
+// CHECK-IR: tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[mul]])
   return vqrdmlshq_laneq_s32(a, b, v, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshh_s16
+// CHECK-LABEL: test_vqrdmlshh_s16
 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
-// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+// CHECK-ASM: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+
+// CHECK-IR: [[insb:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK-IR: [[insc:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]])
+// CHECK-IR: [[insa:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK-IR: [[sub:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[mul]])
+// CHECK-IR: extractelement <4 x i16> [[sub]], i64 0
   return vqrdmlshh_s16(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshs_s32
+// CHECK-LABEL: test_vqrdmlshs_s32
 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
-// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+// CHECK-ASM: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+// CHECK-IR: [[mul:%.*]] = tail call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %b, i32 %c)
+// CHECK-IR: tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[mul]])
   return vqrdmlshs_s32(a, b, c);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshh_lane_s16
+// CHECK-LABEL: test_vqrdmlshh_lane_s16
 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
-// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+// CHECK-ASM: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+
+// CHECK-IR: [[insb:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[shuffle]])
+// CHECK-IR: [[insa:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK-IR: [[sub:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[mul]])
+// CHECK-IR: extractelement <4 x i16> [[sub]], i64 0
   return vqrdmlshh_lane_s16(a, b, c, 3);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshs_lane_s32
+// CHECK-LABEL: test_vqrdmlshs_lane_s32
 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
-// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+// CHECK-ASM: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+
+// CHECK-IR: [[extc:%.*]] = extractelement <2 x i32> %c, i32 1
+// CHECK-IR: [[mul:%.*]] = tail call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %b, i32 [[extc]])
+// CHECK-IR: tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[mul]])
   return vqrdmlshs_lane_s32(a, b, c, 1);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshh_laneq_s16
+// CHECK-LABEL: test_vqrdmlshh_laneq_s16
 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
-// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+// CHECK-ASM: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+
+// CHECK-IR: [[insb:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
+// CHECK-IR: [[shuffle:%.*]] = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 7, i32 undef, i32 undef, i32 undef>
+// CHECK-IR: [[mul:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[shuffle]])
+// CHECK-IR: [[insa:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
+// CHECK-IR: [[sub:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[mul]])
+// CHECK-IR: extractelement <4 x i16> [[sub]], i64 0
   return vqrdmlshh_laneq_s16(a, b, c, 7);
 }
 
-// CHECK-AARCH64-LABEL: test_vqrdmlshs_laneq_s32
+// CHECK-LABEL: test_vqrdmlshs_laneq_s32
 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
-// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+// CHECK-ASM: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+
+// CHECK-IR: [[extc:%.*]] = extractelement <4 x i32> %c, i32 3
+// CHECK-IR: [[mul:%.*]] = tail call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %b, i32 [[extc]])
+// CHECK-IR: tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[mul]])
   return vqrdmlshs_laneq_s32(a, b, c, 3);
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D15223: [ARM] [AARCH64] Add CodeGen IR tests for {VS}QRDML{AS}H v8.1a intrinsics.

Reply via email to