Hi t.p.northover,

http://llvm-reviews.chandlerc.com/D2070

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D2070?vs=5278&id=5456#toc

Files:
  include/clang/Basic/arm_neon.td
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/aarch64-neon-tbl.c
  utils/TableGen/NeonEmitter.cpp
Index: include/clang/Basic/arm_neon.td
===================================================================
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -183,7 +183,8 @@
 // y: scalar of float
 // o: scalar of double
 // k: default elt width, double num elts
-// #: array of default vectors
+// 2,3,4: array of default vectors
+// B,C,D: array of default elts, force 'Q' size modifier.
 // p: pointer type
 // c: const pointer type
 
@@ -815,6 +816,21 @@
                     "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPs", OP_UZP2>;
 
 ////////////////////////////////////////////////////////////////////////////////
+// Table lookup
+let InstName = "vtbl" in {
+def VQTBL1_A64 : WInst<"vqtbl1", "djt",  "UccPcQUcQcQPc">;
+def VQTBL2_A64 : WInst<"vqtbl2", "dBt",  "UccPcQUcQcQPc">;
+def VQTBL3_A64 : WInst<"vqtbl3", "dCt",  "UccPcQUcQcQPc">;
+def VQTBL4_A64 : WInst<"vqtbl4", "dDt",  "UccPcQUcQcQPc">;
+}
+let InstName = "vtbx" in {
+def VQTBX1_A64 : WInst<"vqtbx1", "ddjt", "UccPcQUcQcQPc">;
+def VQTBX2_A64 : WInst<"vqtbx2", "ddBt", "UccPcQUcQcQPc">;
+def VQTBX3_A64 : WInst<"vqtbx3", "ddCt", "UccPcQUcQcQPc">;
+def VQTBX4_A64 : WInst<"vqtbx4", "ddDt", "UccPcQUcQcQPc">;
+}
+
+////////////////////////////////////////////////////////////////////////////////
 // Scalar Arithmetic
 
 // Scalar Addition
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2462,13 +2462,165 @@
   return CGF.Builder.CreateBitCast(Result, ResultType, s);
 }
 
-Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
-                                                     const CallExpr *E) {
+static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF,
+                                        unsigned BuiltinID,
+                                        const CallExpr *E) {
+  unsigned int Int = 0;
+  const char *s = NULL;
+
+  unsigned TblPos;
+  switch (BuiltinID) {
+  default:
+    return 0;
+  case AArch64::BI__builtin_neon_vtbl1_v:
+  case AArch64::BI__builtin_neon_vqtbl1_v:
+  case AArch64::BI__builtin_neon_vqtbl1q_v:
+  case AArch64::BI__builtin_neon_vtbl2_v:
+  case AArch64::BI__builtin_neon_vqtbl2_v:
+  case AArch64::BI__builtin_neon_vqtbl2q_v:
+  case AArch64::BI__builtin_neon_vtbl3_v:
+  case AArch64::BI__builtin_neon_vqtbl3_v:
+  case AArch64::BI__builtin_neon_vqtbl3q_v:
+  case AArch64::BI__builtin_neon_vtbl4_v:
+  case AArch64::BI__builtin_neon_vqtbl4_v:
+  case AArch64::BI__builtin_neon_vqtbl4q_v:
+    TblPos = 0;
+    break;
+  case AArch64::BI__builtin_neon_vtbx1_v:
+  case AArch64::BI__builtin_neon_vqtbx1_v:
+  case AArch64::BI__builtin_neon_vqtbx1q_v:
+  case AArch64::BI__builtin_neon_vtbx2_v:
+  case AArch64::BI__builtin_neon_vqtbx2_v:
+  case AArch64::BI__builtin_neon_vqtbx2q_v:
+  case AArch64::BI__builtin_neon_vtbx3_v:
+  case AArch64::BI__builtin_neon_vqtbx3_v:
+  case AArch64::BI__builtin_neon_vqtbx3q_v:
+  case AArch64::BI__builtin_neon_vtbx4_v:
+  case AArch64::BI__builtin_neon_vqtbx4_v:
+  case AArch64::BI__builtin_neon_vqtbx4q_v:
+    TblPos = 1;
+    break;
+  }
+
+  assert(E->getNumArgs() >= 3);
+
+  // Get the last argument, which specifies the vector type.
+  llvm::APSInt Result;
+  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+    return 0;
+
+  // Determine the type of this overloaded NEON intrinsic.
+  NeonTypeFlags Type(Result.getZExtValue());
+  llvm::VectorType *VTy = GetNeonType(&CGF, Type);
+  llvm::Type *Ty = VTy;
+  if (!Ty)
+    return 0;
 
+  SmallVector<Value *, 4> Ops;
+  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
+  }
+
+  Arg = E->getArg(TblPos);
+  llvm::Type *TblTy = CGF.ConvertType(Arg->getType());
+  llvm::VectorType *VTblTy = cast<llvm::VectorType>(TblTy);
+  llvm::Type *Tys[2] = { Ty, VTblTy };
+  unsigned nElts = VTy->getNumElements();  
+
+  // AArch64 scalar builtins are not overloaded, they do not have an extra
+  // argument that specifies the vector type, need to handle each case.
+  switch (BuiltinID) {
+  case AArch64::BI__builtin_neon_vtbl1_v:
+  case AArch64::BI__builtin_neon_vqtbl1_v:
+  case AArch64::BI__builtin_neon_vqtbl1q_v:
+    Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
+  case AArch64::BI__builtin_neon_vtbl2_v:
+  case AArch64::BI__builtin_neon_vqtbl2_v:
+  case AArch64::BI__builtin_neon_vqtbl2q_v:
+    Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
+  case AArch64::BI__builtin_neon_vtbl3_v:
+  case AArch64::BI__builtin_neon_vqtbl3_v:
+  case AArch64::BI__builtin_neon_vqtbl3q_v:
+    Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
+  case AArch64::BI__builtin_neon_vtbl4_v:
+  case AArch64::BI__builtin_neon_vqtbl4_v:
+  case AArch64::BI__builtin_neon_vqtbl4q_v:
+    Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
+  case AArch64::BI__builtin_neon_vtbx1_v: {
+    llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
+    Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
+    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
+    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+
+    SmallVector<Value *, 4> TblOps;
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    Function *TblF = CGF.CGM.getIntrinsic(Intrinsic::aarch64_neon_vtbl1, Tys);
+    Value *TblRes = CGF.EmitNeonCall(TblF, TblOps, "vtbl1");
+
+    SmallVector<Value *, 4> BslOps;
+    BslOps.push_back(CmpRes);
+    BslOps.push_back(Ops[0]);
+    BslOps.push_back(TblRes);
+    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+  }
+  case AArch64::BI__builtin_neon_vqtbx1_v:
+  case AArch64::BI__builtin_neon_vqtbx1q_v:
+    Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
+  case AArch64::BI__builtin_neon_vtbx2_v:
+  case AArch64::BI__builtin_neon_vqtbx2_v:
+  case AArch64::BI__builtin_neon_vqtbx2q_v:
+    Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
+  case AArch64::BI__builtin_neon_vtbx3_v: {
+    llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
+    Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
+    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
+                                           TwentyFourV);
+    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
+  
+    SmallVector<Value *, 4> TblOps;
+    TblOps.push_back(Ops[1]);
+    TblOps.push_back(Ops[2]);
+    TblOps.push_back(Ops[3]);
+    TblOps.push_back(Ops[4]);
+    Function *TblF = CGF.CGM.getIntrinsic(Intrinsic::aarch64_neon_vtbl3, Tys);
+    Value *TblRes = CGF.EmitNeonCall(TblF, TblOps, "vtbl3");
+  
+    SmallVector<Value *, 4> BslOps;
+    BslOps.push_back(CmpRes);
+    BslOps.push_back(Ops[0]);
+    BslOps.push_back(TblRes);
+    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
+    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
+  }
+  case AArch64::BI__builtin_neon_vqtbx3_v:
+  case AArch64::BI__builtin_neon_vqtbx3q_v:
+    Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
+  case AArch64::BI__builtin_neon_vtbx4_v:
+  case AArch64::BI__builtin_neon_vqtbx4_v:
+  case AArch64::BI__builtin_neon_vqtbx4q_v:
+    Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
+  }
+
+  if (!Int)
+    return 0;
+
+  Function *F = CGF.CGM.getIntrinsic(Int, Tys);
+  return CGF.EmitNeonCall(F, Ops, s);
+}
+
+Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
+                                               const CallExpr *E) {
   // Process AArch64 scalar builtins
   if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
     return Result;
 
+  // Process AArch64 table lookup builtins
+  if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
+    return Result;
+
   if (BuiltinID == AArch64::BI__clear_cache) {
     assert(E->getNumArgs() == 2 &&
            "Variadic __clear_cache slipped through on AArch64");
Index: test/CodeGen/aarch64-neon-tbl.c
===================================================================
--- /dev/null
+++ test/CodeGen/aarch64-neon-tbl.c
@@ -0,0 +1,445 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
+  // CHECK: test_vtbl1_s8
+  return vtbl1_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
+  // CHECK: test_vqtbl1_s8
+  return vqtbl1_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
+  // CHECK: test_vtbl2_s8
+  return vtbl2_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
+  // CHECK: test_vqtbl2_s8
+  return vqtbl2_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
+  // CHECK: test_vtbl3_s8
+  return vtbl3_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
+  // CHECK: test_vqtbl3_s8
+  return vqtbl3_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
+  // CHECK: test_vtbl4_s8
+  return vtbl4_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
+  // CHECK: test_vqtbl4_s8
+  return vqtbl4_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
+  // CHECK: test_vqtbl1q_s8
+  return vqtbl1q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
+  // CHECK: test_vqtbl2q_s8
+  return vqtbl2q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
+  // CHECK: test_vqtbl3q_s8
+  return vqtbl3q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
+  // CHECK: test_vqtbl4q_s8
+  return vqtbl4q_s8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
+  // CHECK: test_vtbx1_s8
+  return vtbx1_s8(a, b, c);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
+  // CHECK: test_vtbx2_s8
+  return vtbx2_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
+  // CHECK: test_vtbx3_s8
+  return vtbx3_s8(a, b, c);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
+  // CHECK: test_vtbx4_s8
+  return vtbx4_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
+  // CHECK: test_vqtbx1_s8
+  return vqtbx1_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
+  // CHECK: test_vqtbx2_s8
+  return vqtbx2_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
+  // CHECK: test_vqtbx3_s8
+  return vqtbx3_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
+  // CHECK: test_vqtbx4_s8
+  return vqtbx4_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
+  // CHECK: test_vqtbx1q_s8
+  return vqtbx1q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
+  // CHECK: test_vqtbx2q_s8
+  return vqtbx2q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
+  // CHECK: test_vqtbx3q_s8
+  return vqtbx3q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
+  // CHECK: test_vqtbx4q_s8
+  return vqtbx4q_s8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtbl1_u8
+  return vtbl1_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl1_u8
+  return vqtbl1_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
+  // CHECK: test_vtbl2_u8
+  return vtbl2_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl2_u8
+  return vqtbl2_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
+  // CHECK: test_vtbl3_u8
+  return vtbl3_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl3_u8
+  return vqtbl3_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
+  // CHECK: test_vtbl4_u8
+  return vtbl4_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl4_u8
+  return vqtbl4_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl1q_u8
+  return vqtbl1q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl2q_u8
+  return vqtbl2q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl3q_u8
+  return vqtbl3q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl4q_u8
+  return vqtbl4q_u8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
+  // CHECK: test_vtbx1_u8
+  return vtbx1_u8(a, b, c);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
+  // CHECK: test_vtbx2_u8
+  return vtbx2_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
+  // CHECK: test_vtbx3_u8
+  return vtbx3_u8(a, b, c);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
+  // CHECK: test_vtbx4_u8
+  return vtbx4_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx1_u8
+  return vqtbx1_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx2_u8
+  return vqtbx2_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx3_u8
+  return vqtbx3_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx4_u8
+  return vqtbx4_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx1q_u8
+  return vqtbx1q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx2q_u8
+  return vqtbx2q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx3q_u8
+  return vqtbx3q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx4q_u8
+  return vqtbx4q_u8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
+  // CHECK: test_vtbl1_p8
+  return vtbl1_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl1_p8
+  return vqtbl1_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
+  // CHECK: test_vtbl2_p8
+  return vtbl2_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl2_p8
+  return vqtbl2_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
+  // CHECK: test_vtbl3_p8
+  return vtbl3_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl3_p8
+  return vqtbl3_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
+  // CHECK: test_vtbl4_p8
+  return vtbl4_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
+  // CHECK: test_vqtbl4_p8
+  return vqtbl4_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl1q_p8
+  return vqtbl1q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl2q_p8
+  return vqtbl2q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl3q_p8
+  return vqtbl3q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
+  // CHECK: test_vqtbl4q_p8
+  return vqtbl4q_p8(a, b);
+  // CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
+  // CHECK: test_vtbx1_p8
+  return vtbx1_p8(a, b, c);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
+  // CHECK: test_vtbx2_p8
+  return vtbx2_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
+  // CHECK: test_vtbx3_p8
+  return vtbx3_p8(a, b, c);
+  // CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+  // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
+  // CHECK: test_vtbx4_p8
+  return vtbx4_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx1_p8
+  return vqtbx1_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx2_p8
+  return vqtbx2_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx3_p8
+  return vqtbx3_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
+  // CHECK: test_vqtbx4_p8
+  return vqtbx4_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx1q_p8
+  return vqtbx1q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx2q_p8
+  return vqtbx2q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx3q_p8
+  return vqtbx3q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
+  // CHECK: test_vqtbx4q_p8
+  return vqtbx4q_p8(a, b, c);
+  // CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+}
Index: utils/TableGen/NeonEmitter.cpp
===================================================================
--- utils/TableGen/NeonEmitter.cpp
+++ utils/TableGen/NeonEmitter.cpp
@@ -494,6 +494,9 @@
     case 'g':
       quad = false;
       break;
+    case 'B':
+    case 'C':
+    case 'D':
     case 'j':
       quad = true;
       break;
@@ -557,6 +560,10 @@
   return type;
 }
 
+static bool IsMultiVecProto(const char p) {
+  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
+}
+
 /// TypeString - for a modifier and type, generate the name of the typedef for
 /// that type.  QUc -> uint8x8_t.
 static std::string TypeString(const char mod, StringRef typestr) {
@@ -631,11 +638,11 @@
       PrintFatalError("unhandled type!");
   }
 
-  if (mod == '2')
+  if (mod == '2' || mod == 'B')
     s += "x2";
-  if (mod == '3')
+  if (mod == '3' || mod == 'C')
     s += "x3";
-  if (mod == '4')
+  if (mod == '4' || mod == 'D')
     s += "x4";
 
   // Append _t, finishing the type string typedef type.
@@ -712,7 +719,7 @@
   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
   // fashion, storing them to a pointer arg.
   if (ret) {
-    if (mod >= '2' && mod <= '4')
+    if (IsMultiVecProto(mod))
       return "vv*"; // void result with void* first argument
     if (mod == 'f' || (ck != ClassB && type == 'f'))
       return quad ? "V4f" : "V2f";
@@ -729,11 +736,11 @@
   }
 
   // Non-return array types are passed as individual vectors.
-  if (mod == '2')
+  if (mod == '2' || mod == 'B')
     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
-  if (mod == '3')
+  if (mod == '3' || mod == 'C')
     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
-  if (mod == '4')
+  if (mod == '4' || mod == 'D')
     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
 
   if (mod == 'f' || (ck != ClassB && type == 'f'))
@@ -1996,7 +2003,7 @@
 
   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   // sret-like argument.
-  bool sret = (proto[0] >= '2' && proto[0] <= '4');
+  bool sret = IsMultiVecProto(proto[0]);
 
   bool define = UseMacro(proto);
 
@@ -2056,12 +2063,19 @@
 
     // Handle multiple-vector values specially, emitting each subvector as an
     // argument to the __builtin.
+    unsigned NumOfVec = 0;
     if (proto[i] >= '2' && proto[i] <= '4') {
+      NumOfVec = proto[i] - '0';
+    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
+      NumOfVec = proto[i] - 'A' + 1;
+    }
+    
+    if (NumOfVec > 0) {
       // Check if an explicit cast is needed.
       if (argType != 'c' || argPoly || argUsgn)
         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
 
-      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
+      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
         s += args + ".val[" + utostr(vi) + "]";
         if ((vi + 1) < ve)
           s += ", ";
@@ -2586,7 +2600,7 @@
 
       // Builtins that return a struct of multiple vectors have an extra
       // leading arg for the struct return.
-      if (Proto[0] >= '2' && Proto[0] <= '4')
+      if (IsMultiVecProto(Proto[0]))
         ++immidx;
 
       // Add one to the index for each argument until we reach the immediate
@@ -2597,12 +2611,15 @@
           immidx += 1;
           break;
         case '2':
+        case 'B':
           immidx += 2;
           break;
         case '3':
+        case 'C':
           immidx += 3;
           break;
         case '4':
+        case 'D':
           immidx += 4;
           break;
         case 'i':
@@ -2710,7 +2727,7 @@
       }
     }
     // For sret builtins, adjust the pointer argument index.
-    if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
+    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
       PtrArgNum += 1;
 
     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to