Hi t.p.northover,
http://llvm-reviews.chandlerc.com/D2070
Files:
include/clang/Basic/arm_neon.td
lib/CodeGen/CGBuiltin.cpp
test/CodeGen/aarch64-neon-tbl.c
utils/TableGen/NeonEmitter.cpp
Index: include/clang/Basic/arm_neon.td
===================================================================
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -172,7 +172,8 @@
// y: scalar of float
// o: scalar of double
// k: default elt width, double num elts
-// #: array of default vectors
+// 2,3,4: array of default vectors
+// B,C,D: array of default elts, force 'Q' size modifier.
// p: pointer type
// c: const pointer type
@@ -760,6 +761,21 @@
def FMINNMV : SInst<"vminnmv", "sd", "Qf">;
////////////////////////////////////////////////////////////////////////////////
+// Table lookup
+let InstName = "vtbl" in {
+def VQTBL1_A64 : WInst<"vqtbl1", "djt", "UccPcQUcQcQPc">;
+def VQTBL2_A64 : WInst<"vqtbl2", "dBt", "UccPcQUcQcQPc">;
+def VQTBL3_A64 : WInst<"vqtbl3", "dCt", "UccPcQUcQcQPc">;
+def VQTBL4_A64 : WInst<"vqtbl4", "dDt", "UccPcQUcQcQPc">;
+}
+let InstName = "vtbx" in {
+def VQTBX1_A64 : WInst<"vqtbx1", "ddjt", "UccPcQUcQcQPc">;
+def VQTBX2_A64 : WInst<"vqtbx2", "ddBt", "UccPcQUcQcQPc">;
+def VQTBX3_A64 : WInst<"vqtbx3", "ddCt", "UccPcQUcQcQPc">;
+def VQTBX4_A64 : WInst<"vqtbx4", "ddDt", "UccPcQUcQcQPc">;
+}
+
+////////////////////////////////////////////////////////////////////////////////
// Scalar Arithmetic
// Scalar Addition
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2197,13 +2197,165 @@
return CGF.Builder.CreateBitCast(Result, ResultType, s);
}
-Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
+static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF,
+ unsigned BuiltinID,
+ const CallExpr *E) {
+ unsigned int Int = 0;
+ const char *s = NULL;
+
+ unsigned TblPos;
+ switch (BuiltinID) {
+ default:
+ return 0;
+ case AArch64::BI__builtin_neon_vtbl1_v:
+ case AArch64::BI__builtin_neon_vqtbl1_v:
+ case AArch64::BI__builtin_neon_vqtbl1q_v:
+ case AArch64::BI__builtin_neon_vtbl2_v:
+ case AArch64::BI__builtin_neon_vqtbl2_v:
+ case AArch64::BI__builtin_neon_vqtbl2q_v:
+ case AArch64::BI__builtin_neon_vtbl3_v:
+ case AArch64::BI__builtin_neon_vqtbl3_v:
+ case AArch64::BI__builtin_neon_vqtbl3q_v:
+ case AArch64::BI__builtin_neon_vtbl4_v:
+ case AArch64::BI__builtin_neon_vqtbl4_v:
+ case AArch64::BI__builtin_neon_vqtbl4q_v:
+ TblPos = 0;
+ break;
+ case AArch64::BI__builtin_neon_vtbx1_v:
+ case AArch64::BI__builtin_neon_vqtbx1_v:
+ case AArch64::BI__builtin_neon_vqtbx1q_v:
+ case AArch64::BI__builtin_neon_vtbx2_v:
+ case AArch64::BI__builtin_neon_vqtbx2_v:
+ case AArch64::BI__builtin_neon_vqtbx2q_v:
+ case AArch64::BI__builtin_neon_vtbx3_v:
+ case AArch64::BI__builtin_neon_vqtbx3_v:
+ case AArch64::BI__builtin_neon_vqtbx3q_v:
+ case AArch64::BI__builtin_neon_vtbx4_v:
+ case AArch64::BI__builtin_neon_vqtbx4_v:
+ case AArch64::BI__builtin_neon_vqtbx4q_v:
+ TblPos = 1;
+ break;
+ }
+
+ assert(E->getNumArgs() >= 3);
+
+ // Get the last argument, which specifies the vector type.
+ llvm::APSInt Result;
+ const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+ if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+ return 0;
+
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags Type(Result.getZExtValue());
+ llvm::VectorType *VTy = GetNeonType(&CGF, Type);
+ llvm::Type *Ty = VTy;
+ if (!Ty)
+ return 0;
+ SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
+ }
+
+ Arg = E->getArg(TblPos);
+ llvm::Type *TblTy = CGF.ConvertType(Arg->getType());
+ llvm::VectorType *VTblTy = cast<llvm::VectorType>(TblTy);
+ llvm::Type *Tys[2] = { Ty, VTblTy };
+ unsigned nElts = VTy->getNumElements();
+
+ // AArch64 scalar builtins are not overloaded, they do not have an extra
+ // argument that specifies the vector type, need to handle each case.
+ switch (BuiltinID) {
+ case AArch64::BI__builtin_neon_vtbl1_v:
+ case AArch64::BI__builtin_neon_vqtbl1_v:
+ case AArch64::BI__builtin_neon_vqtbl1q_v:
+ Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
+ case AArch64::BI__builtin_neon_vtbl2_v:
+ case AArch64::BI__builtin_neon_vqtbl2_v:
+ case AArch64::BI__builtin_neon_vqtbl2q_v:
+ Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
+ case AArch64::BI__builtin_neon_vtbl3_v:
+ case AArch64::BI__builtin_neon_vqtbl3_v:
+ case AArch64::BI__builtin_neon_vqtbl3q_v:
+ Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
+ case AArch64::BI__builtin_neon_vtbl4_v:
+ case AArch64::BI__builtin_neon_vqtbl4_v:
+ case AArch64::BI__builtin_neon_vqtbl4q_v:
+ Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
+ case AArch64::BI__builtin_neon_vtbx1_v: {
+ llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
+ Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
+ Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
+ CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+
+ SmallVector<Value *, 4> TblOps;
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ Function *TblF = CGF.CGM.getIntrinsic(Intrinsic::aarch64_neon_vtbl1, Tys);
+ Value *TblRes = CGF.EmitNeonCall(TblF, TblOps, "vtbl1");
+
+ SmallVector<Value *, 4> BslOps;
+ BslOps.push_back(CmpRes);
+ BslOps.push_back(Ops[0]);
+ BslOps.push_back(TblRes);
+ Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+ return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+ }
+ case AArch64::BI__builtin_neon_vqtbx1_v:
+ case AArch64::BI__builtin_neon_vqtbx1q_v:
+ Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
+ case AArch64::BI__builtin_neon_vtbx2_v:
+ case AArch64::BI__builtin_neon_vqtbx2_v:
+ case AArch64::BI__builtin_neon_vqtbx2q_v:
+ Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
+ case AArch64::BI__builtin_neon_vtbx3_v: {
+ llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
+ Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
+ Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
+ TwentyFourV);
+ CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+
+ SmallVector<Value *, 4> TblOps;
+ TblOps.push_back(Ops[1]);
+ TblOps.push_back(Ops[2]);
+ TblOps.push_back(Ops[3]);
+ TblOps.push_back(Ops[4]);
+ Function *TblF = CGF.CGM.getIntrinsic(Intrinsic::aarch64_neon_vtbl3, Tys);
+ Value *TblRes = CGF.EmitNeonCall(TblF, TblOps, "vtbl3");
+
+ SmallVector<Value *, 4> BslOps;
+ BslOps.push_back(CmpRes);
+ BslOps.push_back(Ops[0]);
+ BslOps.push_back(TblRes);
+ Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+ return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+ }
+ case AArch64::BI__builtin_neon_vqtbx3_v:
+ case AArch64::BI__builtin_neon_vqtbx3q_v:
+ Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
+ case AArch64::BI__builtin_neon_vtbx4_v:
+ case AArch64::BI__builtin_neon_vqtbx4_v:
+ case AArch64::BI__builtin_neon_vqtbx4q_v:
+ Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
+ }
+
+ if (!Int)
+ return 0;
+
+ Function *F = CGF.CGM.getIntrinsic(Int, Tys);
+ return CGF.EmitNeonCall(F, Ops, s);
+}
+
+Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
// Process AArch64 scalar builtins
if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
return Result;
+ // Process AArch64 table lookup builtins
+ if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
+ return Result;
+
if (BuiltinID == AArch64::BI__clear_cache) {
assert(E->getNumArgs() == 2 &&
"Variadic __clear_cache slipped through on AArch64");
Index: test/CodeGen/aarch64-neon-tbl.c
===================================================================
--- /dev/null
+++ test/CodeGen/aarch64-neon-tbl.c
@@ -0,0 +1,445 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vtbl1_s8
+ return vtbl1_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
+ // CHECK: test_vqtbl1_s8
+ return vqtbl1_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
+ // CHECK: test_vtbl2_s8
+ return vtbl2_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
+ // CHECK: test_vqtbl2_s8
+ return vqtbl2_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
+ // CHECK: test_vtbl3_s8
+ return vtbl3_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
+ // CHECK: test_vqtbl3_s8
+ return vqtbl3_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
+ // CHECK: test_vtbl4_s8
+ return vtbl4_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
+ // CHECK: test_vqtbl4_s8
+ return vqtbl4_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vqtbl1q_s8
+ return vqtbl1q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
+ // CHECK: test_vqtbl2q_s8
+ return vqtbl2q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
+ // CHECK: test_vqtbl3q_s8
+ return vqtbl3q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
+ // CHECK: test_vqtbl4q_s8
+ return vqtbl4q_s8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+ // CHECK: test_vtbx1_s8
+ return vtbx1_s8(a, b, c);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
+ // CHECK: test_vtbx2_s8
+ return vtbx2_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
+ // CHECK: test_vtbx3_s8
+ return vtbx3_s8(a, b, c);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
+ // CHECK: test_vtbx4_s8
+ return vtbx4_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
+ // CHECK: test_vqtbx1_s8
+ return vqtbx1_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
+ // CHECK: test_vqtbx2_s8
+ return vqtbx2_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
+ // CHECK: test_vqtbx3_s8
+ return vqtbx3_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
+ // CHECK: test_vqtbx4_s8
+ return vqtbx4_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+ // CHECK: test_vqtbx1q_s8
+ return vqtbx1q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
+ // CHECK: test_vqtbx2q_s8
+ return vqtbx2q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
+ // CHECK: test_vqtbx3q_s8
+ return vqtbx3q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
+ // CHECK: test_vqtbx4q_s8
+ return vqtbx4q_s8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vtbl1_u8
+ return vtbl1_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl1_u8
+ return vqtbl1_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
+ // CHECK: test_vtbl2_u8
+ return vtbl2_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl2_u8
+ return vqtbl2_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
+ // CHECK: test_vtbl3_u8
+ return vtbl3_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl3_u8
+ return vqtbl3_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
+ // CHECK: test_vtbl4_u8
+ return vtbl4_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl4_u8
+ return vqtbl4_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl1q_u8
+ return vqtbl1q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl2q_u8
+ return vqtbl2q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl3q_u8
+ return vqtbl3q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl4q_u8
+ return vqtbl4q_u8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+ // CHECK: test_vtbx1_u8
+ return vtbx1_u8(a, b, c);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
+ // CHECK: test_vtbx2_u8
+ return vtbx2_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
+ // CHECK: test_vtbx3_u8
+ return vtbx3_u8(a, b, c);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
+ // CHECK: test_vtbx4_u8
+ return vtbx4_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx1_u8
+ return vqtbx1_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx2_u8
+ return vqtbx2_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx3_u8
+ return vqtbx3_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx4_u8
+ return vqtbx4_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx1q_u8
+ return vqtbx1q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx2q_u8
+ return vqtbx2q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx3q_u8
+ return vqtbx3q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx4q_u8
+ return vqtbx4q_u8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
+ // CHECK: test_vtbl1_p8
+ return vtbl1_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl1_p8
+ return vqtbl1_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
+ // CHECK: test_vtbl2_p8
+ return vtbl2_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl2_p8
+ return vqtbl2_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
+ // CHECK: test_vtbl3_p8
+ return vtbl3_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl3_p8
+ return vqtbl3_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
+ // CHECK: test_vtbl4_p8
+ return vtbl4_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
+ // CHECK: test_vqtbl4_p8
+ return vqtbl4_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl1q_p8
+ return vqtbl1q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl2q_p8
+ return vqtbl2q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl3q_p8
+ return vqtbl3q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
+ // CHECK: test_vqtbl4q_p8
+ return vqtbl4q_p8(a, b);
+ // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
+ // CHECK: test_vtbx1_p8
+ return vtbx1_p8(a, b, c);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
+ // CHECK: test_vtbx2_p8
+ return vtbx2_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
+ // CHECK: test_vtbx3_p8
+ return vtbx3_p8(a, b, c);
+ // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
+ // CHECK: test_vtbx4_p8
+ return vtbx4_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx1_p8
+ return vqtbx1_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx2_p8
+ return vqtbx2_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx3_p8
+ return vqtbx3_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
+ // CHECK: test_vqtbx4_p8
+ return vqtbx4_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx1q_p8
+ return vqtbx1q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx2q_p8
+ return vqtbx2q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx3q_p8
+ return vqtbx3q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
+ // CHECK: test_vqtbx4q_p8
+ return vqtbx4q_p8(a, b, c);
+ // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
Index: utils/TableGen/NeonEmitter.cpp
===================================================================
--- utils/TableGen/NeonEmitter.cpp
+++ utils/TableGen/NeonEmitter.cpp
@@ -474,6 +474,9 @@
case 'g':
quad = false;
break;
+ case 'B':
+ case 'C':
+ case 'D':
case 'j':
quad = true;
break;
@@ -537,6 +540,10 @@
return type;
}
+static bool IsMultiVecProto(const char p) {
+ return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
+}
+
/// TypeString - for a modifier and type, generate the name of the typedef for
/// that type. QUc -> uint8x8_t.
static std::string TypeString(const char mod, StringRef typestr) {
@@ -611,11 +618,11 @@
PrintFatalError("unhandled type!");
}
- if (mod == '2')
+ if (mod == '2' || mod == 'B')
s += "x2";
- if (mod == '3')
+ if (mod == '3' || mod == 'C')
s += "x3";
- if (mod == '4')
+ if (mod == '4' || mod == 'D')
s += "x4";
// Append _t, finishing the type string typedef type.
@@ -692,7 +699,7 @@
// returning structs of 2, 3, or 4 vectors which are returned in a sret-like
// fashion, storing them to a pointer arg.
if (ret) {
- if (mod >= '2' && mod <= '4')
+ if (IsMultiVecProto(mod))
return "vv*"; // void result with void* first argument
if (mod == 'f' || (ck != ClassB && type == 'f'))
return quad ? "V4f" : "V2f";
@@ -709,11 +716,11 @@
}
// Non-return array types are passed as individual vectors.
- if (mod == '2')
+ if (mod == '2' || mod == 'B')
return quad ? "V16ScV16Sc" : "V8ScV8Sc";
- if (mod == '3')
+ if (mod == '3' || mod == 'C')
return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
- if (mod == '4')
+ if (mod == '4' || mod == 'D')
return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
if (mod == 'f' || (ck != ClassB && type == 'f'))
@@ -1926,7 +1933,7 @@
// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
// sret-like argument.
- bool sret = (proto[0] >= '2' && proto[0] <= '4');
+ bool sret = IsMultiVecProto(proto[0]);
bool define = UseMacro(proto);
@@ -1986,12 +1993,19 @@
// Handle multiple-vector values specially, emitting each subvector as an
// argument to the __builtin.
+ unsigned NumOfVec = 0;
if (proto[i] >= '2' && proto[i] <= '4') {
+ NumOfVec = proto[i] - '0';
+ } else if (proto[i] >= 'B' && proto[i] <= 'D') {
+ NumOfVec = proto[i] - 'A' + 1;
+ }
+
+ if (NumOfVec > 0) {
// Check if an explicit cast is needed.
if (argType != 'c' || argPoly || argUsgn)
args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
- for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
+ for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
s += args + ".val[" + utostr(vi) + "]";
if ((vi + 1) < ve)
s += ", ";
@@ -2483,7 +2497,7 @@
// Builtins that return a struct of multiple vectors have an extra
// leading arg for the struct return.
- if (Proto[0] >= '2' && Proto[0] <= '4')
+ if (IsMultiVecProto(Proto[0]))
++immidx;
// Add one to the index for each argument until we reach the immediate
@@ -2494,12 +2508,15 @@
immidx += 1;
break;
case '2':
+ case 'B':
immidx += 2;
break;
case '3':
+ case 'C':
immidx += 3;
break;
case '4':
+ case 'D':
immidx += 4;
break;
case 'i':
@@ -2607,7 +2624,7 @@
}
}
// For sret builtins, adjust the pointer argument index.
- if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
+ if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
PtrArgNum += 1;
// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits