Hi t.p.northover,
Please review, thanks.
http://llvm-reviews.chandlerc.com/D2149
Files:
include/clang/Basic/TargetBuiltins.h
include/clang/Basic/arm_neon.td
lib/CodeGen/CGBuiltin.cpp
lib/Sema/SemaChecking.cpp
lib/Sema/SemaType.cpp
test/CodeGen/aarch64-poly64.c
utils/TableGen/NeonEmitter.cpp
Index: include/clang/Basic/TargetBuiltins.h
===================================================================
--- include/clang/Basic/TargetBuiltins.h
+++ include/clang/Basic/TargetBuiltins.h
@@ -90,6 +90,7 @@
Int64,
Poly8,
Poly16,
+ Poly64,
Float16,
Float32,
Float64
Index: include/clang/Basic/arm_neon.td
===================================================================
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -512,23 +512,23 @@
////////////////////////////////////////////////////////////////////////////////
// Load/Store
-// With additional QUl, Ql, Qd type.
+// With additional QUl, Ql, Qd, Pl, QPl type.
def LD1 : WInst<"vld1", "dc",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD2 : WInst<"vld2", "2c",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD3 : WInst<"vld3", "3c",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def LD4 : WInst<"vld4", "4c",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST1 : WInst<"vst1", "vpd",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST2 : WInst<"vst2", "vp2",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST3 : WInst<"vst3", "vp3",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
def ST4 : WInst<"vst4", "vp4",
- "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">;
////////////////////////////////////////////////////////////////////////////////
// Addition
@@ -563,8 +563,9 @@
////////////////////////////////////////////////////////////////////////////////
// Logical operations
-// With additional Qd type.
-def BSL : SInst<"vbsl", "dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQd">;
+// With additional Qd, Ql, QPl type.
+def BSL : SInst<"vbsl", "dudd",
+ "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQdPlQPl">;
////////////////////////////////////////////////////////////////////////////////
// Absolute Difference
@@ -579,13 +580,16 @@
////////////////////////////////////////////////////////////////////////////////
// Comparison
-// With additional Qd type.
+// With additional Qd, Ql, QPl type.
+def VVCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPcPlQPl",
+ OP_EQ>;
def FCAGE : IInst<"vcage", "udd", "fQfQd">;
def FCAGT : IInst<"vcagt", "udd", "fQfQd">;
def FCALE : IInst<"vcale", "udd", "fQfQd">;
def FCALT : IInst<"vcalt", "udd", "fQfQd">;
// With additional Ql, QUl, Qd types.
-def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+def CMTST : WInst<"vtst", "udd",
+ "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">;
def CFMEQ : SOpInst<"vceq", "udd",
"csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>;
@@ -627,6 +631,13 @@
def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi",
OP_LONG_HI>;
+////////////////////////////////////////////////////////////////////////////////
+// Shifts with insert, with additional Ql, QPl type.
+def SRI_N : WInst<"vsri_n", "dddi",
+ "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">;
+def SLI_N : WInst<"vsli_n", "dddi",
+ "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">;
+
// Right shift narrow high
def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi",
"HsHiHlHUsHUiHUl", OP_NARROW_HI>;
@@ -678,36 +689,40 @@
////////////////////////////////////////////////////////////////////////////////
// Extract or insert element from vector
def GET_LANE : IInst<"vget_lane", "sdi",
- "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
+ "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">;
def SET_LANE : IInst<"vset_lane", "dsdi",
- "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
+ "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">;
def COPY_LANE : IOpInst<"vcopy_lane", "ddidi",
- "csiPcPsUcUsUiPcPsf", OP_COPY_LN>;
+ "csiPcPsUcUsUiPcPsfPl", OP_COPY_LN>;
def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi",
- "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQd", OP_COPYQ_LN>;
+ "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>;
def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki",
"csiPcPsUcUsUif", OP_COPY_LNQ>;
def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi",
- "QcQsQiQlQUcQUsQUiQUlQPcQPsQfd", OP_COPY_LN>;
-
+ "QcQsQiQlQUcQUsQUiQUlQPcQPsQfdQPl", OP_COPY_LN>;
////////////////////////////////////////////////////////////////////////////////
// Set all lanes to same value
def VDUP_LANE1: WOpInst<"vdup_lane", "dgi",
- "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd",
+ "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
OP_DUP_LN>;
def VDUP_LANE2: WOpInst<"vdup_laneq", "dki",
- "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQd",
+ "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
OP_DUP_LN>;
def DUP_N : WOpInst<"vdup_n", "ds",
- "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd",
+ "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQdPlQPl",
OP_DUP>;
def MOV_N : WOpInst<"vmov_n", "ds",
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd",
OP_DUP>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Combining vectors, with additional Pl
+def COMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPsPl", OP_CONC>;
+
////////////////////////////////////////////////////////////////////////////////
-//Initialize a vector from bit pattern
-def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPsl", OP_CAST>;
+//Initialize a vector from bit pattern, with additional Pl
+def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPslPl", OP_CAST>;
////////////////////////////////////////////////////////////////////////////////
@@ -778,7 +793,7 @@
////////////////////////////////////////////////////////////////////////////////
// Newly added Vector Extract for f64
def VEXT_A64 : WInst<"vext", "dddi",
- "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQd">;
+ "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQdPlQPl">;
////////////////////////////////////////////////////////////////////////////////
// Crypto
@@ -802,17 +817,17 @@
////////////////////////////////////////////////////////////////////////////////
// Permutation
def VTRN1 : SOpInst<"vtrn1", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN1>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>;
def VZIP1 : SOpInst<"vzip1", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP1>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>;
def VUZP1 : SOpInst<"vuzp1", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP1>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>;
def VTRN2 : SOpInst<"vtrn2", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_TRN2>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>;
def VZIP2 : SOpInst<"vzip2", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_ZIP2>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>;
def VUZP2 : SOpInst<"vuzp2", "ddd",
- "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>;
+ "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>;
////////////////////////////////////////////////////////////////////////////////
// Scalar Arithmetic
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -1621,6 +1621,7 @@
case NeonTypeFlags::Int32:
return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
case NeonTypeFlags::Int64:
+ case NeonTypeFlags::Poly64:
return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
case NeonTypeFlags::Float32:
return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
@@ -2327,13 +2328,13 @@
// Shift Right And Insert (Immediate)
case AArch64::BI__builtin_neon_vsrid_n_s64:
case AArch64::BI__builtin_neon_vsrid_n_u64:
- Int = Intrinsic::aarch64_neon_vsrid_n;
- s = "vsri"; OverloadInt = false; break;
+ Int = Intrinsic::aarch64_neon_vsri;
+ s = "vsri"; OverloadInt = true; break;
// Shift Left And Insert (Immediate)
case AArch64::BI__builtin_neon_vslid_n_s64:
case AArch64::BI__builtin_neon_vslid_n_u64:
- Int = Intrinsic::aarch64_neon_vslid_n;
- s = "vsli"; OverloadInt = false; break;
+ Int = Intrinsic::aarch64_neon_vsli;
+ s = "vsli"; OverloadInt = true; break;
// Signed Saturating Shift Right Narrow (Immediate)
case AArch64::BI__builtin_neon_vqshrnh_n_s16:
case AArch64::BI__builtin_neon_vqshrns_n_s32:
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -339,6 +339,7 @@
case NeonTypeFlags::Int32:
return shift ? 31 : (2 << IsQuad) - 1;
case NeonTypeFlags::Int64:
+ case NeonTypeFlags::Poly64:
return shift ? 63 : (1 << IsQuad) - 1;
case NeonTypeFlags::Float16:
assert(!shift && "cannot shift float types!");
@@ -356,7 +357,8 @@
/// getNeonEltType - Return the QualType corresponding to the elements of
/// the vector type specified by the NeonTypeFlags. This is used to check
/// the pointer arguments for Neon load/store intrinsics.
-static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) {
+static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
+ bool IsAArch64) {
switch (Flags.getEltType()) {
case NeonTypeFlags::Int8:
return Flags.isUnsigned() ? Context.UnsignedCharTy : Context.SignedCharTy;
@@ -367,11 +369,13 @@
case NeonTypeFlags::Int64:
return Flags.isUnsigned() ? Context.UnsignedLongLongTy : Context.LongLongTy;
case NeonTypeFlags::Poly8:
- return Context.SignedCharTy;
+ return IsAArch64 ? Context.UnsignedCharTy : Context.SignedCharTy;
case NeonTypeFlags::Poly16:
- return Context.ShortTy;
+ return IsAArch64 ? Context.UnsignedShortTy : Context.ShortTy;
+ case NeonTypeFlags::Poly64:
+ return Context.UnsignedLongLongTy;
case NeonTypeFlags::Float16:
- return Context.UnsignedShortTy;
+ return Context.HalfTy;
case NeonTypeFlags::Float32:
return Context.FloatTy;
case NeonTypeFlags::Float64:
@@ -415,7 +419,7 @@
Arg = ICE->getSubExpr();
ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg);
QualType RHSTy = RHS.get()->getType();
- QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context);
+ QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context, true);
if (HasConstPtr)
EltTy = EltTy.withConst();
QualType LHSTy = Context.getPointerType(EltTy);
@@ -602,7 +606,7 @@
Arg = ICE->getSubExpr();
ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg);
QualType RHSTy = RHS.get()->getType();
- QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context);
+ QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context, false);
if (HasConstPtr)
EltTy = EltTy.withConst();
QualType LHSTy = Context.getPointerType(EltTy);
Index: lib/Sema/SemaType.cpp
===================================================================
--- lib/Sema/SemaType.cpp
+++ lib/Sema/SemaType.cpp
@@ -4766,9 +4766,10 @@
if (VecKind == VectorType::NeonPolyVector) {
if (IsAArch64) {
- // AArch64 polynomial vectors are unsigned
+ // AArch64 polynomial vectors are unsigned and support poly64.
return BTy->getKind() == BuiltinType::UChar ||
- BTy->getKind() == BuiltinType::UShort;
+ BTy->getKind() == BuiltinType::UShort ||
+ BTy->getKind() == BuiltinType::ULongLong;
} else {
// AArch32 polynomial vector are signed.
return BTy->getKind() == BuiltinType::SChar ||
Index: test/CodeGen/aarch64-poly64.c
===================================================================
--- /dev/null
+++ test/CodeGen/aarch64-poly64.c
@@ -0,0 +1,282 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics with poly64
+
+#include <arm_neon.h>
+
+uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
+ // CHECK: test_vceq_p64
+ return vceq_p64(a, b);
+ // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vceqq_p64
+ return vceqq_p64(a, b);
+ // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
+ // CHECK: test_vtst_p64
+ return vtst_p64(a, b);
+ // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vtstq_p64
+ return vtstq_p64(a, b);
+ // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
+ // CHECK: test_vbsl_p64
+ return vbsl_p64(a, b, c);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
+ // CHECK: test_vbslq_p64
+ return vbslq_p64(a, b, c);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly64_t test_vget_lane_p64(poly64x1_t v) {
+ // CHECK: test_vget_lane_p64
+ return vget_lane_p64(v, 0);
+ // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+}
+
+poly64_t test_vgetq_lane_p64(poly64x2_t v) {
+ // CHECK: test_vgetq_lane_p64
+ return vgetq_lane_p64(v, 1);
+ // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
+ // CHECK: test_vset_lane_p64
+ return vset_lane_p64(a, v, 0);
+ // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
+ // CHECK: test_vsetq_lane_p64
+ return vsetq_lane_p64(a, v, 1);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
+}
+
+poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
+ // CHECK: test_vcopy_lane_p64
+ return vcopy_lane_p64(a, 0, b, 0);
+ // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
+ // CHECK: test_vcopyq_lane_p64
+ return vcopyq_lane_p64(a, 1, b, 0);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vcopyq_laneq_p64
+ return vcopyq_laneq_p64(a, 1, b, 1);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
+}
+
+poly64x1_t test_vcreate_p64(uint64_t a) {
+ // CHECK: test_vcreate_p64
+ return vcreate_p64(a);
+ // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+
+poly64x1_t test_vdup_n_p64(poly64_t a) {
+ // CHECK: test_vdup_n_p64
+ return vdup_n_p64(a);
+ // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+}
+poly64x2_t test_vdupq_n_p64(poly64_t a) {
+ // CHECK: test_vdup_n_p64
+ return vdupq_n_p64(a);
+ // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+}
+
+poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
+ // CHECK: test_vdup_lane_p64
+ return vdup_lane_p64(vec, 0);
+ // CHECK: ret
+}
+
+poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
+ // CHECK: test_vdupq_lane_p64
+ return vdupq_lane_p64(vec, 0);
+ // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
+ // CHECK: test_vdupq_laneq_p64
+ return vdupq_laneq_p64(vec, 1);
+ // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+}
+
+poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
+ // CHECK: test_vcombine_p64
+ return vcombine_p64(low, high);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x1_t test_vld1_p64(poly64_t const * ptr) {
+ // CHECK: test_vld1_p64
+ return vld1_p64(ptr);
+ // CHECK: ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
+ // CHECK: test_vld1q_p64
+ return vld1q_p64(ptr);
+ // CHECK: ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
+ // CHECK: test_vst1_p64
+ return vst1_p64(ptr, val);
+ // CHECK: st1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
+ // CHECK: test_vst1q_p64
+ return vst1q_p64(ptr, val);
+ // CHECK: st1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
+ // CHECK: test_vld2_p64
+ return vld2_p64(ptr);
+ // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
+ // CHECK: test_vld2q_p64
+ return vld2q_p64(ptr);
+ // CHECK: ld2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
+ // CHECK: test_vld3_p64
+ return vld3_p64(ptr);
+ // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
+ // CHECK: test_vld3q_p64
+ return vld3q_p64(ptr);
+ // CHECK: ld3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
+ // CHECK: test_vld4_p64
+ return vld4_p64(ptr);
+ // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
+ // CHECK: test_vld4q_p64
+ return vld4q_p64(ptr);
+ // CHECK: ld4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
+ // CHECK: test_vst2_p64
+ return vst2_p64(ptr, val);
+ // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
+ // CHECK: test_vst2q_p64
+ return vst2q_p64(ptr, val);
+ // CHECK: st2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
+ // CHECK: test_vst3_p64
+ return vst3_p64(ptr, val);
+ // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
+ // CHECK: test_vst3q_p64
+ return vst3q_p64(ptr, val);
+ // CHECK: st3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
+ // CHECK: test_vst4_p64
+ return vst4_p64(ptr, val);
+ // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+}
+
+void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
+ // CHECK: test_vst4q_p64
+ return vst4q_p64(ptr, val);
+ // CHECK: st4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+}
+
+poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
+ // CHECK: test_vext_p64
+ return vext_u64(a, b, 0);
+
+}
+
+poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vextq_p64
+ return vextq_p64(a, b, 1);
+ // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+}
+
+poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vzip1q_p64
+ return vzip1q_p64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vzip2q_p64
+ return vzip2q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vuzp1q_p64
+ return vuzp1q_p64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vuzp2q_p64
+ return vuzp2q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vtrn1q_p64
+ return vtrn1q_p64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vtrn2q_p64
+ return vtrn2q_u64(a, b);
+ // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+}
+
+poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
+ // CHECK: test_vsri_n_p64
+ return vsri_n_p64(a, b, 33);
+ // CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #33
+}
+
+poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
+ // CHECK: test_vsriq_n_p64
+ return vsriq_n_p64(a, b, 64);
+ // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #64
+}
Index: utils/TableGen/NeonEmitter.cpp
===================================================================
--- utils/TableGen/NeonEmitter.cpp
+++ utils/TableGen/NeonEmitter.cpp
@@ -164,6 +164,7 @@
Int64,
Poly8,
Poly16,
+ Poly64,
Float16,
Float32,
Float64
@@ -603,7 +604,7 @@
s += quad ? "x4" : "x2";
break;
case 'l':
- s += "int64";
+ s += (poly && !usgn)? "poly64" : "int64";
if (scal)
break;
s += quad ? "x2" : "x1";
@@ -787,7 +788,7 @@
break;
case 'l':
switch (ck) {
- case ClassS: typeCode = usgn ? "u64" : "s64"; break;
+ case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
case ClassI: typeCode = "i64"; break;
case ClassW: typeCode = "64"; break;
default: break;
@@ -1965,7 +1966,7 @@
ET = NeonTypeFlags::Int32;
break;
case 'l':
- ET = NeonTypeFlags::Int64;
+ ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
break;
case 'h':
ET = NeonTypeFlags::Float16;
@@ -2243,6 +2244,7 @@
OS << "#ifdef __aarch64__\n";
OS << "typedef uint8_t poly8_t;\n";
OS << "typedef uint16_t poly16_t;\n";
+ OS << "typedef uint64_t poly64_t;\n";
OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
@@ -2250,19 +2252,21 @@
// Emit Neon vector typedefs.
std::string TypedefTypes(
- "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs");
+ "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
SmallVector<StringRef, 24> TDTypeVec;
ParseTypes(0, TypedefTypes, TDTypeVec);
// Emit vector typedefs.
bool isA64 = false;
+ bool preinsert;
+ bool postinsert;
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
bool dummy, quad = false, poly = false;
char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
- bool preinsert = false;
- bool postinsert = false;
+ preinsert = false;
+ postinsert = false;
- if (type == 'd') {
+ if (type == 'd' || (type == 'l' && poly)) {
preinsert = isA64? false: true;
isA64 = true;
} else {
@@ -2288,6 +2292,9 @@
OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
}
+ postinsert = isA64? true: false;
+ if (postinsert)
+ OS << "#endif\n";
OS << "\n";
// Emit struct typedefs.
@@ -2296,10 +2303,10 @@
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
bool dummy, quad = false, poly = false;
char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
- bool preinsert = false;
- bool postinsert = false;
+ preinsert = false;
+ postinsert = false;
- if (type == 'd') {
+ if (type == 'd' || (type == 'l' && poly)) {
preinsert = isA64? false: true;
isA64 = true;
} else {
@@ -2321,6 +2328,10 @@
OS << "\n";
}
}
+ postinsert = isA64? true: false;
+ if (postinsert)
+ OS << "#endif\n";
+ OS << "\n";
OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits