This patch implemented aarch64 neon SIMD copy instruction class.
I generated this patch on top of aarch64 neon SIMD by element patch(clang : 
http://llvm-reviews.chandlerc.com/D1752 , llvm : 
http://llvm-reviews.chandlerc.com/D1753), bacause we shared DUP node and 
lowerVECTOR_SHUFFLE function.

This patch contains:
1. Covered SIMD dup instruction.
2. Implemented related ACLE intrinsic : vget_lane, vset_lane, vcopy_lane, 
vcreate, vdup_n, vdup_lane, vmov_n

Known problem:

1. In AAPCS64, half float can serve as setters and getters for function, so 
some ACLE intrinsic would receive or provide a variable with half float. But 
this feature isn't supported by Clang at moment. Even when I hack diagnose 
detector to avoid throwing error message, all half float will be converted to 
i16 in IR. 
2. There is no v4f16 in IR, which is used by some ACLE intrinsic.

Considering above problem, all ACLE intrinsic with half float isn't implemented 
in these patches. We will try to figure out how to implement this feature.

Also, I have seen the comments from Tim on patch of aarch64 neon SIMD by 
element , but because my patches are based on that, so I will make some change 
after Jiangning post his 2nd edittion patch.

http://llvm-reviews.chandlerc.com/D1768

Files:
  include/clang/Basic/arm_neon.td
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/aarch64-neon-copy.c
  utils/TableGen/NeonEmitter.cpp
Index: include/clang/Basic/arm_neon.td
===================================================================
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -118,6 +118,7 @@
 def OP_LONG_HI : Op;
 def OP_NARROW_HI : Op;
 def OP_MOVL_HI : Op;
+def OP_COPY : Op;
 
 class Inst <string n, string p, string t, Op o> {
   string Name = n;
@@ -660,6 +661,35 @@
 def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
 
 ////////////////////////////////////////////////////////////////////////////////
+// Extract or insert element from vector
+def GET_LANE : IInst<"vget_lane", "sdi",
+                        "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs">;
+def SET_LANE : IInst<"vset_lane", "dsdi",
+                 "csilPcPsUiUlUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
+def COPY_LANE : IOpInst<"vcopy_lane", "ddidi",
+                     "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs", OP_COPY>;
+def COPY_LANEQ : IOpInst<"vcopy_laneq", "ggidi",
+                     "QcQsQiQlQUcQUsQUiQUlQPcQPs", OP_COPY>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Set all lanes to same value
+def VDUP_LANE1: WOpInst<"vdup_lane", "dgi",
+                        "csilPcPsUcUsUiUlfdQcQsQiQlQPcQPsQUcQUsQUiQUlQfQd",
+                        OP_DUP_LN>;
+def VDUP_LANE2: WOpInst<"vdup_laneq", "dki",
+                        "csilPcPsUcUsUiUlfdQcQsQiQlQPcQPsQUcQUsQUiQUlQfQd",
+                        OP_DUP_LN>;
+def DUP_N   : WOpInst<"vdup_n", "ds",
+                       "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd",
+                       OP_DUP>;
+def MOV_N   : WOpInst<"vmov_n", "ds",
+                       "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd",
+                       OP_DUP>;
+////////////////////////////////////////////////////////////////////////////////
+//Initialize a vector from bit pattern
+def CREATE : NoTestOpInst<"vcreate", "dl", "csifdUcUsUiUlPcPsl", OP_CAST>;
+
+////////////////////////////////////////////////////////////////////////////////
 
 def VMLA_LANEQ   : IOpInst<"vmla_laneq", "dddji",
                            "siUsUifQsQiQUsQUiQf", OP_MLA_LNQ>;
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -1950,6 +1950,32 @@
   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   }
+//  Some intrinsic isn't overloaded.
+  switch (BuiltinID) {
+  default: break;
+  case AArch64::BI__builtin_neon_vget_lane_i8:
+  case AArch64::BI__builtin_neon_vget_lane_i16:
+  case AArch64::BI__builtin_neon_vget_lane_i32:
+  case AArch64::BI__builtin_neon_vget_lane_i64:
+  case AArch64::BI__builtin_neon_vgetq_lane_i8:
+  case AArch64::BI__builtin_neon_vgetq_lane_i16:
+  case AArch64::BI__builtin_neon_vgetq_lane_i32:
+  case AArch64::BI__builtin_neon_vgetq_lane_i64:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
+  case AArch64::BI__builtin_neon_vset_lane_i8:
+  case AArch64::BI__builtin_neon_vset_lane_i16:
+  case AArch64::BI__builtin_neon_vset_lane_i32:
+  case AArch64::BI__builtin_neon_vset_lane_i64:
+  case AArch64::BI__builtin_neon_vset_lane_f32:
+  case AArch64::BI__builtin_neon_vset_lane_f64:
+  case AArch64::BI__builtin_neon_vsetq_lane_i8:
+  case AArch64::BI__builtin_neon_vsetq_lane_i16:
+  case AArch64::BI__builtin_neon_vsetq_lane_i32:
+  case AArch64::BI__builtin_neon_vsetq_lane_i64:
+  case AArch64::BI__builtin_neon_vsetq_lane_f32:
+  case AArch64::BI__builtin_neon_vsetq_lane_f64:
+    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);
+  }
 
   // Get the last argument, which specifies the vector type.
   llvm::APSInt Result;
Index: test/CodeGen/aarch64-neon-copy.c
===================================================================
--- /dev/null
+++ test/CodeGen/aarch64-neon-copy.c
@@ -0,0 +1,942 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+uint8x8_t test_vset_lane_u8(uint8_t v1, uint8x8_t v2) {
+   // CHECK: test_vset_lane_u8
+  return vset_lane_u8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+uint16x4_t test_vset_lane_u16(uint16_t v1, uint16x4_t v2) {
+   // CHECK: test_vset_lane_u16
+  return vset_lane_u16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+uint32x2_t test_vset_lane_u32(uint32_t v1, uint32x2_t v2) {
+   // CHECK: test_vset_lane_u32
+  return vset_lane_u32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+uint64x1_t test_vset_lane_u64(uint64_t v1, uint64x1_t v2) {
+   // CHECK: test_vset_lane_u64
+  return vset_lane_u64(v1, v2, 0);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int8x8_t test_vset_lane_s8(int8_t v1, int8x8_t v2) {
+   // CHECK: test_vset_lane_s8
+  return vset_lane_s8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+int16x4_t test_vset_lane_s16(int16_t v1, int16x4_t v2) {
+   // CHECK: test_vset_lane_s16
+  return vset_lane_s16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+int32x2_t test_vset_lane_s32(int32_t v1, int32x2_t v2) {
+   // CHECK: test_vset_lane_s32
+  return vset_lane_s32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+
+  int64x1_t test_vset_lane_s64(int64_t v1, int64x1_t v2) {
+   // CHECK: test_vset_lane_s64
+  return vset_lane_s64(v1, v2, 0);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x16_t test_vsetq_lane_u8(uint8_t v1, uint8x16_t v2) {
+   // CHECK: test_vsetq_lane_u8
+  return vsetq_lane_u8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+uint16x8_t test_vsetq_lane_u16(uint16_t v1, uint16x8_t v2) {
+   // CHECK: test_vsetq_lane_u16
+  return vsetq_lane_u16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+uint32x4_t test_vsetq_lane_u32(uint32_t v1, uint32x4_t v2) {
+   // CHECK: test_vsetq_lane_u32
+  return vsetq_lane_u32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+
+  uint64x2_t test_vsetq_lane_u64(uint64_t v1, uint64x2_t v2) {
+   // CHECK: test_vsetq_lane_u64
+  return vsetq_lane_u64(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
+}
+
+int8x16_t test_vsetq_lane_s8(int8_t v1, int8x16_t v2) {
+   // CHECK: test_vsetq_lane_s8
+  return vsetq_lane_s8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+int16x8_t test_vsetq_lane_s16(int16_t v1, int16x8_t v2) {
+   // CHECK: test_vsetq_lane_s16
+  return vsetq_lane_s16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+int32x4_t test_vsetq_lane_s32(int32_t v1, int32x4_t v2) {
+   // CHECK: test_vsetq_lane_s32
+  return vsetq_lane_s32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+}
+
+int64x2_t test_vsetq_lane_s64(int64_t v1, int64x2_t v2) {
+   // CHECK: test_vsetq_lane_s64
+  return vsetq_lane_s64(v1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}}
+}
+
+poly8x8_t test_vset_lane_p8(poly8_t v1, poly8x8_t v2) {
+   // CHECK: test_vset_lane_p8
+  return vset_lane_p8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+poly16x4_t test_vset_lane_p16(poly16_t v1, poly16x4_t v2) {
+   // CHECK: test_vset_lane_p16
+  return vset_lane_p16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+poly8x16_t test_vsetq_lane_p8(poly8_t v1, poly8x16_t v2) {
+   // CHECK: test_vsetq_lane_p8
+  return vsetq_lane_p8(v1, v2, 6);
+  // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}}
+}
+
+poly16x8_t test_vsetq_lane_p16(poly16_t v1, poly16x8_t v2) {
+   // CHECK: test_vsetq_lane_p16
+  return vsetq_lane_p16(v1, v2, 2);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}}
+}
+
+float32x2_t test_vset_lane_f32(float32_t v1, float32x2_t v2) {
+   // CHECK: test_vset_lane_f32
+  return vset_lane_f32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float32x4_t test_vsetq_lane_f32(float32_t v1, float32x4_t v2) {
+   // CHECK: test_vsetq_lane_f32
+  return vsetq_lane_f32(v1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vset_lane_f64(float64_t v1, float64x1_t v2) {
+   // CHECK: test_vset_lane_f64
+  return vset_lane_f64(v1, v2, 0);
+  // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float64x2_t test_vsetq_lane_f64(float64_t v1, float64x2_t v2) {
+   // CHECK: test_vsetq_lane_f64
+  return vsetq_lane_f64(v1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
+}
+
+uint8_t test_vget_lane_u8(uint8x8_t v1) {
+  // CHECK: test_vget_lane_u8
+  return vget_lane_u8(v1, 7);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+uint16_t test_vget_lane_u16(uint16x4_t v1) {
+  // CHECK: test_vget_lane_u16
+  return vget_lane_u16(v1, 3);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+uint32_t test_vget_lane_u32(uint32x2_t v1) {
+  // CHECK: test_vget_lane_u32
+  return vget_lane_u32(v1, 1);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+uint64_t test_vget_lane_u64(uint64x1_t v1) {
+  // CHECK: test_vget_lane_u64
+  return vget_lane_u64(v1, 0);
+  // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8_t test_vgetq_lane_u8(uint8x16_t v1) {
+  // CHECK: test_vgetq_lane_u8
+  return vgetq_lane_u8(v1, 15);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[15]
+}
+
+uint16_t test_vgetq_lane_u16(uint16x8_t v1) {
+  // CHECK: test_vgetq_lane_u16
+  return vgetq_lane_u16(v1, 6);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6]
+}
+
+uint32_t test_vgetq_lane_u32(uint32x4_t v1) {
+  // CHECK: test_vgetq_lane_u32
+  return vgetq_lane_u32(v1, 2);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
+}
+
+uint64_t test_vgetq_lane_u64(uint64x2_t v1) {
+  // CHECK: test_vgetq_lane_u64
+  return vgetq_lane_u64(v1, 1);
+  // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+poly8_t test_vget_lane_p8(poly8x8_t v1) {
+  // CHECK: test_vget_lane_p8
+  return vget_lane_p8(v1, 7);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+poly16_t test_vget_lane_p16(poly16x4_t v1) {
+  // CHECK: test_vget_lane_p16
+  return vget_lane_p16(v1, 3);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+poly8_t test_vgetq_lane_p8(poly8x16_t v1) {
+  // CHECK: test_vgetq_lane_p8
+  return vgetq_lane_p8(v1, 14);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[14]
+}
+
+poly16_t test_vgetq_lane_p16(poly16x8_t v1) {
+  // CHECK: test_vgetq_lane_p16
+  return vgetq_lane_p16(v1, 6);
+  // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6]
+}
+
+int32_t test_vget_lane_s8(int8x8_t v1) {
+  // CHECK: test_vget_lane_s8
+  return vget_lane_s8(v1, 7)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+}
+
+int32_t test_vget_lane_s16(int16x4_t v1) {
+  // CHECK: test_vget_lane_s16
+  return vget_lane_s16(v1, 3)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[3]
+}
+
+int64_t test_vget_lane_s32(int32x2_t v1) {
+  // CHECK: test_vget_lane_s32
+  return vget_lane_s32(v1, 1);
+  // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+int64_t test_vget_lane_s64(int64x1_t v1) {
+  // CHECK: test_vget_lane_s64
+  return vget_lane_s64(v1, 0);
+  // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+int32_t test_vgetq_lane_s8(int8x16_t v1) {
+  // CHECK: test_vgetq_lane_s8
+  return vgetq_lane_s8(v1, 15)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[15]
+}
+
+int32_t test_vgetq_lane_s16(int16x8_t v1) {
+  // CHECK: test_vgetq_lane_s16
+  return vgetq_lane_s16(v1, 6)+1;
+  // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[6]
+}
+
+int64_t test_vgetq_lane_s32(int32x4_t v1) {
+  // CHECK: test_vgetq_lane_s32
+  return vgetq_lane_s32(v1, 2);
+  // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
+}
+
+int64_t test_vgetq_lane_s64(int64x2_t v1) {
+  // CHECK: test_vgetq_lane_s64
+  return vgetq_lane_s64(v1, 1);
+  // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+int8x8_t test_vcopy_lane_s8(int8x8_t v1, int8x8_t v2) {
+  // CHECK: test_vcopy_lane_s8
+  return vcopy_lane_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x4_t test_vcopy_lane_s16(int16x4_t v1, int16x4_t v2) {
+  // CHECK: test_vcopy_lane_s16
+  return vcopy_lane_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vcopy_lane_s32(int32x2_t v1, int32x2_t v2) {
+  // CHECK: test_vcopy_lane_s32
+  return vcopy_lane_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int64x1_t test_vcopy_lane_s64(int64x1_t v1, int64x1_t v2) {
+  // CHECK: test_vcopy_lane_s64
+  return vcopy_lane_s64(v1, 0, v2, 0);
+  // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x8_t test_vcreate_s8(uint64_t v1) {
+  // CHECK: test_vcreate_s8
+  return vcreate_s8(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int16x4_t test_vcreate_s16(uint64_t v1) {
+  // CHECK: test_vcreate_s16
+  return vcreate_s16(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int32x2_t test_vcreate_s32(uint64_t v1) {
+  // CHECK: test_vcreate_s32
+  return vcreate_s32(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int64x1_t test_vcreate_s64(uint64_t v1) {
+  // CHECK: test_vcreate_s64
+  return vcreate_s64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x8_t test_vcreate_u8(uint64_t v1) {
+  // CHECK: test_vcreate_u8
+  return vcreate_u8(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint16x4_t test_vcreate_u16(uint64_t v1) {
+  // CHECK: test_vcreate_u16
+  return vcreate_u16(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint32x2_t test_vcreate_u32(uint64_t v1) {
+  // CHECK: test_vcreate_u32
+  return vcreate_u32(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint64x1_t test_vcreate_u64(uint64_t v1) {
+  // CHECK: test_vcreate_u64
+  return vcreate_u64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly8x8_t test_vcreate_p8(uint64_t v1) {
+  // CHECK: test_vcreate_p8
+  return vcreate_p8(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly16x4_t test_vcreate_p16(uint64_t v1) {
+  // CHECK: test_vcreate_p16
+  return vcreate_p16(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+float32x2_t test_vcreate_f32(uint64_t v1) {
+  // CHECK: test_vcreate_f32
+  return vcreate_f32(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+float64x1_t test_vcreate_f64(uint64_t v1) {
+  // CHECK: test_vcreate_f64
+  return vcreate_f64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x8_t test_vdup_n_u8(uint8_t v1) {
+  // CHECK: test_vdup_n_u8
+  return vdup_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+uint16x4_t test_vdup_n_u16(uint16_t v1) {
+  // CHECK: test_vdup_n_u16
+  return vdup_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+uint32x2_t test_vdup_n_u32(uint32_t v1) {
+  // CHECK: test_vdup_n_u32
+  return vdup_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+uint64x1_t test_vdup_n_u64(uint64_t v1) {
+  // CHECK: test_vdup_n_u64
+  return vdup_n_u64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x16_t test_vdupq_n_u8(uint8_t v1) {
+  // CHECK: test_vdupq_n_u8
+  return vdupq_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+uint16x8_t test_vdupq_n_u16(uint16_t v1) {
+  // CHECK: test_vdupq_n_u16
+  return vdupq_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+uint32x4_t test_vdupq_n_u32(uint32_t v1) {
+  // CHECK: test_vdupq_n_u32
+  return vdupq_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+uint64x2_t test_vdupq_n_u64(uint64_t v1) {
+  // CHECK: test_vdupq_n_u64
+  return vdupq_n_u64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+int8x8_t test_vdup_n_s8(int8_t v1) {
+  // CHECK: test_vdup_n_s8
+  return vdup_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+int16x4_t test_vdup_n_s16(int16_t v1) {
+  // CHECK: test_vdup_n_s16
+  return vdup_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+int32x2_t test_vdup_n_s32(int32_t v1) {
+  // CHECK: test_vdup_n_s32
+  return vdup_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+int64x1_t test_vdup_n_s64(int64_t v1) {
+  // CHECK: test_vdup_n_s64
+  return vdup_n_s64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int8x16_t test_vdupq_n_s8(int8_t v1) {
+  // CHECK: test_vdupq_n_s8
+  return vdupq_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+int16x8_t test_vdupq_n_s16(int16_t v1) {
+  // CHECK: test_vdupq_n_s16
+  return vdupq_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+int32x4_t test_vdupq_n_s32(int32_t v1) {
+  // CHECK: test_vdupq_n_s32
+  return vdupq_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+int64x2_t test_vdupq_n_s64(int64_t v1) {
+  // CHECK: test_vdupq_n_s64
+  return vdupq_n_s64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+poly8x8_t test_vdup_n_p8(poly8_t v1) {
+  // CHECK: test_vdup_n_p8
+  return vdup_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+poly16x4_t test_vdup_n_p16(poly16_t v1) {
+  // CHECK: test_vdup_n_p16
+  return vdup_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+poly8x16_t test_vdupq_n_p8(poly8_t v1) {
+  // CHECK: test_vdupq_n_p8
+  return vdupq_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+poly16x8_t test_vdupq_n_p16(poly16_t v1) {
+  // CHECK: test_vdupq_n_p16
+  return vdupq_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+float32x2_t test_vdup_n_f32(float32_t v1) {
+  // CHECK: test_vdup_n_f32
+  return vdup_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vdup_n_f64(float64_t v1) {
+  // CHECK: test_vdup_n_f64
+  return vdup_n_f64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32x4_t test_vdupq_n_f32(float32_t v1) {
+  // CHECK: test_vdupq_n_f32
+  return vdupq_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vdupq_n_f64(float64_t v1) {
+  // CHECK: test_vdupq_n_f64
+  return vdupq_n_f64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int8x8_t test_vdup_lane_s8(int8x8_t v1) {
+  // CHECK: test_vdup_lane_s8
+  return vdup_lane_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+int16x4_t test_vdup_lane_s16(int16x4_t v1) {
+  // CHECK: test_vdup_lane_s16
+  return vdup_lane_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+int32x2_t test_vdup_lane_s32(int32x2_t v1) {
+  // CHECK: test_vdup_lane_s32
+  return vdup_lane_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int64x1_t test_vdup_lane_s64(int64x1_t v1) {
+  // CHECK: test_vdup_lane_s64
+  return vdup_lane_s64(v1, 0);
+  // CHECK: ret
+}
+
+int8x16_t test_vdupq_lane_s8(int8x8_t v1) {
+  // CHECK: test_vdupq_lane_s8
+  return vdupq_lane_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+int16x8_t test_vdupq_lane_s16(int16x4_t v1) {
+  // CHECK: test_vdupq_lane_s16
+  return vdupq_lane_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+int32x4_t test_vdupq_lane_s32(int32x2_t v1) {
+  // CHECK: test_vdupq_lane_s32
+  return vdupq_lane_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vdupq_lane_s64(int64x1_t v1) {
+  // CHECK: test_vdupq_lane_s64
+  return vdupq_lane_s64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vdup_lane_u8(uint8x8_t v1) {
+  // CHECK: test_vdup_lane_u8
+  return vdup_lane_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+uint16x4_t test_vdup_lane_u16(uint16x4_t v1) {
+  // CHECK: test_vdup_lane_u16
+  return vdup_lane_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+uint32x2_t test_vdup_lane_u32(uint32x2_t v1) {
+  // CHECK: test_vdup_lane_u32
+  return vdup_lane_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+uint64x1_t test_vdup_lane_u64(uint64x1_t v1) {
+  // CHECK: test_vdup_lane_u64
+  return vdup_lane_u64(v1, 0);
+  // CHECK: ret
+}
+
+uint8x16_t test_vdupq_lane_u8(uint8x8_t v1) {
+  // CHECK: test_vdupq_lane_u8
+  return vdupq_lane_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+uint16x8_t test_vdupq_lane_u16(uint16x4_t v1) {
+  // CHECK: test_vdupq_lane_u16
+  return vdupq_lane_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+uint32x4_t test_vdupq_lane_u32(uint32x2_t v1) {
+  // CHECK: test_vdupq_lane_u32
+  return vdupq_lane_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vdupq_lane_u64(uint64x1_t v1) {
+  // CHECK: test_vdupq_lane_u64
+  return vdupq_lane_u64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int8x8_t test_vdup_laneq_s8(int8x16_t v1) {
+  // CHECK: test_vdup_laneq_s8
+  return vdup_laneq_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+int16x4_t test_vdup_laneq_s16(int16x8_t v1) {
+  // CHECK: test_vdup_laneq_s16
+  return vdup_laneq_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+int32x2_t test_vdup_laneq_s32(int32x4_t v1) {
+  // CHECK: test_vdup_laneq_s32
+  return vdup_laneq_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+int64x1_t test_vdup_laneq_s64(int64x2_t v1) {
+  // CHECK: test_vdup_laneq_s64
+  return vdup_laneq_s64(v1, 0);
+  // CHECK: ret
+}
+
+int8x16_t test_vdupq_laneq_s8(int8x16_t v1) {
+  // CHECK: test_vdupq_laneq_s8
+  return vdupq_laneq_s8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+int16x8_t test_vdupq_laneq_s16(int16x8_t v1) {
+  // CHECK: test_vdupq_laneq_s16
+  return vdupq_laneq_s16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+int32x4_t test_vdupq_laneq_s32(int32x4_t v1) {
+  // CHECK: test_vdupq_laneq_s32
+  return vdupq_laneq_s32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vdupq_laneq_s64(int64x2_t v1) {
+  // CHECK: test_vdupq_laneq_s64
+  return vdupq_laneq_s64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+uint8x8_t test_vdup_laneq_u8(uint8x16_t v1) {
+  // CHECK: test_vdup_laneq_u8
+  return vdup_laneq_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+uint16x4_t test_vdup_laneq_u16(uint16x8_t v1) {
+  // CHECK: test_vdup_laneq_u16
+  return vdup_laneq_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+uint32x2_t test_vdup_laneq_u32(uint32x4_t v1) {
+  // CHECK: test_vdup_laneq_u32
+  return vdup_laneq_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+uint64x1_t test_vdup_laneq_u64(uint64x2_t v1) {
+  // CHECK: test_vdup_laneq_u64
+  return vdup_laneq_u64(v1, 0);
+  // CHECK: ret
+}
+
+uint8x16_t test_vdupq_laneq_u8(uint8x16_t v1) {
+  // CHECK: test_vdupq_laneq_u8
+  return vdupq_laneq_u8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+uint16x8_t test_vdupq_laneq_u16(uint16x8_t v1) {
+  // CHECK: test_vdupq_laneq_u16
+  return vdupq_laneq_u16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+uint32x4_t test_vdupq_laneq_u32(uint32x4_t v1) {
+  // CHECK: test_vdupq_laneq_u32
+  return vdupq_laneq_u32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vdupq_laneq_u64(uint64x2_t v1) {
+  // CHECK: test_vdupq_laneq_u64
+  return vdupq_laneq_u64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+poly8x8_t test_vdup_lane_p8(poly8x8_t v1) {
+  // CHECK: test_vdup_lane_p8
+  return vdup_lane_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+poly16x4_t test_vdup_lane_p16(poly16x4_t v1) {
+  // CHECK: test_vdup_lane_p16
+  return vdup_lane_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+poly8x16_t test_vdupq_lane_p8(poly8x8_t v1) {
+  // CHECK: test_vdupq_lane_p8
+  return vdupq_lane_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+poly16x8_t test_vdupq_lane_p16(poly16x4_t v1) {
+  // CHECK: test_vdupq_lane_p16
+  return vdupq_lane_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+poly8x8_t test_vdup_laneq_p8(poly8x16_t v1) {
+  // CHECK: test_vdup_laneq_p8
+  return vdup_laneq_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+}
+
+poly16x4_t test_vdup_laneq_p16(poly16x8_t v1) {
+  // CHECK: test_vdup_laneq_p16
+  return vdup_laneq_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+}
+
+poly8x16_t test_vdupq_laneq_p8(poly8x16_t v1) {
+  // CHECK: test_vdupq_laneq_p8
+  return vdupq_laneq_p8(v1, 5);
+  // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+}
+
+poly16x8_t test_vdupq_laneq_p16(poly16x8_t v1) {
+  // CHECK: test_vdupq_laneq_p16
+  return vdupq_laneq_p16(v1, 2);
+  // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+}
+
+float32x2_t test_vdup_lane_f32(float32x2_t v1) {
+  // CHECK: test_vdup_lane_f32
+  return vdup_lane_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+float64x1_t test_vdup_lane_f64(float64x1_t v1) {
+  // CHECK: test_vdup_lane_f64
+  return vdup_lane_f64(v1, 0);
+  // CHECK: ret
+}
+
+float32x2_t test_vdup_laneq_f32(float32x4_t v1) {
+  // CHECK: test_vdup_laneq_f32
+  return vdup_laneq_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+}
+
+float64x1_t test_vdup_laneq_f64(float64x2_t v1) {
+  // CHECK: test_vdup_laneq_f64
+  return vdup_laneq_f64(v1, 0);
+  // CHECK: ret
+}
+
+float32x4_t test_vdupq_lane_f32(float32x2_t v1) {
+  // CHECK: test_vdupq_lane_f32
+  return vdupq_lane_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vdupq_lane_f64(float64x1_t v1) {
+  // CHECK: test_vdupq_lane_f64
+  return vdupq_lane_f64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+float32x4_t test_vdupq_laneq_f32(float32x4_t v1) {
+  // CHECK: test_vdupq_laneq_f32
+  return vdupq_laneq_f32(v1, 1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vdupq_laneq_f64(float64x2_t v1) {
+  // CHECK: test_vdupq_laneq_f64
+  return vdupq_laneq_f64(v1, 0);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+int8x8_t test_vmov_n_s8(int8_t v1) {
+  // CHECK: test_vmov_n_s8
+  return vmov_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+int16x4_t test_vmov_n_s16(int16_t v1) {
+  // CHECK: test_vmov_n_s16
+  return vmov_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+int32x2_t test_vmov_n_s32(int32_t v1) {
+  // CHECK: test_vmov_n_s32
+  return vmov_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+int64x1_t test_vmov_n_s64(int64_t v1) {
+  // CHECK: test_vmov_n_s64
+  return vmov_n_s64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+int8x16_t test_vmovq_n_s8(int8_t v1) {
+  // CHECK: test_vmovq_n_s8
+  return vmovq_n_s8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+int16x8_t test_vmovq_n_s16(int16_t v1) {
+  // CHECK: test_vmovq_n_s16
+  return vmovq_n_s16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+int32x4_t test_vmovq_n_s32(int32_t v1) {
+  // CHECK: test_vmovq_n_s32
+  return vmovq_n_s32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+int64x2_t test_vmovq_n_s64(int64_t v1) {
+  // CHECK: test_vmovq_n_s64
+  return vmovq_n_s64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+uint8x8_t test_vmov_n_u8(uint8_t v1) {
+  // CHECK: test_vmov_n_u8
+  return vmov_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+uint16x4_t test_vmov_n_u16(uint16_t v1) {
+  // CHECK: test_vmov_n_u16
+  return vmov_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+uint32x2_t test_vmov_n_u32(uint32_t v1) {
+  // CHECK: test_vmov_n_u32
+  return vmov_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+}
+
+uint64x1_t test_vmov_n_u64(uint64_t v1) {
+  // CHECK: test_vmov_n_u64
+  return vmov_n_u64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+uint8x16_t test_vmovq_n_u8(uint8_t v1) {
+  // CHECK: test_vmovq_n_u8
+  return vmovq_n_u8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+uint16x8_t test_vmovq_n_u16(uint16_t v1) {
+  // CHECK: test_vmovq_n_u16
+  return vmovq_n_u16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+uint32x4_t test_vmovq_n_u32(uint32_t v1) {
+  // CHECK: test_vmovq_n_u32
+  return vmovq_n_u32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+}
+
+uint64x2_t test_vmovq_n_u64(uint64_t v1) {
+  // CHECK: test_vmovq_n_u64
+  return vmovq_n_u64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+poly8x8_t test_vmov_n_p8(poly8_t v1) {
+  // CHECK: test_vmov_n_p8
+  return vmov_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+}
+
+poly16x4_t test_vmov_n_p16(poly16_t v1) {
+  // CHECK: test_vmov_n_p16
+  return vmov_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+}
+
+poly8x16_t test_vmovq_n_p8(poly8_t v1) {
+  // CHECK: test_vmovq_n_p8
+  return vmovq_n_p8(v1);
+  // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+}
+
+poly16x8_t test_vmovq_n_p16(poly16_t v1) {
+  // CHECK: test_vmovq_n_p16
+  return vmovq_n_p16(v1);
+  // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+}
+
+float32x2_t test_vmov_n_f32(float32_t v1) {
+  // CHECK: test_vmov_n_f32
+  return vmov_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+}
+
+float64x1_t test_vmov_n_f64(float64_t v1) {
+  // CHECK: test_vmov_n_f64
+  return vmov_n_f64(v1);
+  // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32x4_t test_vmovq_n_f32(float32_t v1) {
+  // CHECK: test_vmovq_n_f32
+  return vmovq_n_f32(v1);
+  // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+}
+
+float64x2_t test_vmovq_n_f64(float64_t v1) {
+  // CHECK: test_vmovq_n_f64
+  return vmovq_n_f64(v1);
+  // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
Index: utils/TableGen/NeonEmitter.cpp
===================================================================
--- utils/TableGen/NeonEmitter.cpp
+++ utils/TableGen/NeonEmitter.cpp
@@ -139,7 +139,8 @@
   OpDiv,
   OpLongHi,
   OpNarrowHi,
-  OpMovlHi
+  OpMovlHi,
+  OpCopy
 };
 
 enum ClassKind {
@@ -304,6 +305,7 @@
     OpMap["OP_LONG_HI"] = OpLongHi;
     OpMap["OP_NARROW_HI"] = OpNarrowHi;
     OpMap["OP_MOVL_HI"] = OpMovlHi;
+    OpMap["OP_COPY"] = OpCopy;
 
     Record *SI = R.getClass("SInst");
     Record *II = R.getClass("IInst");
@@ -1359,7 +1361,8 @@
 }
 
 // Generate the string "(argtype a, argtype b, ...)"
-static std::string GenArgs(const std::string &proto, StringRef typestr) {
+static std::string GenArgs(const std::string &proto, StringRef typestr,
+                           const std::string &name) {
   bool define = UseMacro(proto);
   char arg = 'a';
 
@@ -1377,6 +1380,9 @@
       s += TypeString(proto[i], typestr) + " __";
     }
     s.push_back(arg);
+    //To avoid argument multiple definition, add extra number for renaming.
+    if (name == "vcopy_lane")
+      s.push_back('1');
     if ((i + 1) < e)
       s += ", ";
   }
@@ -1387,7 +1393,8 @@
 
 // Macro arguments are not type-checked like inline function arguments, so
 // assign them to local temporaries to get the right type checking.
-static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
+static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
+                                  const std::string &name ) {
   char arg = 'a';
   std::string s;
   bool generatedLocal = false;
@@ -1398,11 +1405,18 @@
     if (MacroArgUsedDirectly(proto, i))
       continue;
     generatedLocal = true;
+    bool extranumber = false;
+    if(name == "vcopy_lane")
+      extranumber = true;
 
     s += TypeString(proto[i], typestr) + " __";
     s.push_back(arg);
+    if(extranumber)
+      s.push_back('1');
     s += " = (";
     s.push_back(arg);
+    if(extranumber)
+      s.push_back('1');
     s += "); ";
   }
 
@@ -1873,6 +1887,12 @@
          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
     break;
   }
+  case OpCopy: {
+    s += TypeString('s', typestr) + " __c2 = " +
+         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
+         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
+    break;
+  }
   default:
     PrintFatalError("unknown OpKind!");
   }
@@ -2097,12 +2117,12 @@
   s += mangledName;
 
   // Function arguments
-  s += GenArgs(proto, inTypeStr);
+  s += GenArgs(proto, inTypeStr, name);
 
   // Definition.
   if (define) {
     s += " __extension__ ({ \\\n  ";
-    s += GenMacroLocals(proto, inTypeStr);
+    s += GenMacroLocals(proto, inTypeStr, name);
   } else if (kind == OpUnavailable) {
     s += " __attribute__((unavailable));\n";
     return s;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to