olista01 added you to the CC list for the revision "AAPCS: Cannot split
argument between GPRs and the stack after a CPRC has been allocated to the
stack".
Hi rengolin,
According to the AAPCS, arguments can only be split between GPRs and the stack
if no prior arguments have been allocated to the stack. This situation can
occur if there a large number of floating-point arguments, not all of which can
be allocated to VFP registers. Currently, llvm/clang does not understand this.
Unfortunately, since the llvm backend does not know about structs, this has to
be fixed in clang. The attached patch extends a similar mechanism currently
used for homogeneous aggregates, to insert dummy arguments to prevent the
general-purpose registers being used incorrectly.
http://llvm-reviews.chandlerc.com/D2726
Files:
lib/CodeGen/TargetInfo.cpp
test/CodeGen/arm-aapcs-vfp.c
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -3199,10 +3199,12 @@
ABIKind getABIKind() const { return Kind; }
private:
- ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
+ ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic,
+ unsigned &AllocatedGPRs) const;
ABIArgInfo classifyArgumentType(QualType RetTy, int *VFPRegs,
unsigned &AllocatedVFP,
- bool &IsHA, bool isVariadic) const;
+ bool &IsHA, bool isVariadic,
+ unsigned &AllocatedGPR, bool &IsCPRC) const;
bool isIllegalVectorType(QualType Ty) const;
virtual void computeInfo(CGFunctionInfo &FI) const;
@@ -3298,23 +3300,43 @@
// unallocated are marked as unavailable.
unsigned AllocatedVFP = 0;
int VFPRegs[16] = { 0 };
- FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
+ unsigned AllocatedGPR = 0;
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(),
+ AllocatedGPR);
for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
it != ie; ++it) {
- unsigned PreAllocation = AllocatedVFP;
+ unsigned PreAllocationVFP = AllocatedVFP;
+ unsigned PreAllocationGPR = AllocatedGPR;
bool IsHA = false;
+ bool IsCPRC = false;
// 6.1.2.3 There is one VFP co-processor register class using registers
// s0-s15 (d0-d7) for passing arguments.
const unsigned NumVFPs = 16;
- it->info = classifyArgumentType(it->type, VFPRegs, AllocatedVFP, IsHA, FI.isVariadic());
+ const unsigned NumGPRs = 4;
+ it->info = classifyArgumentType(it->type, VFPRegs, AllocatedVFP, IsHA,
+ FI.isVariadic(), AllocatedGPR, IsCPRC);
+ assert((IsCPRC || !IsHA) && "Homogeneous aggregates must be CPRCs");
// If we do not have enough VFP registers for the HA, any VFP registers
// that are unallocated are marked as unavailable. To achieve this, we add
- // padding of (NumVFPs - PreAllocation) floats.
+ // padding of (NumVFPs - PreAllocationVFP) floats.
// Note that IsHA will only be set when using the AAPCS-VFP calling convention,
// and the callee is not variadic.
- if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
+ if (IsHA && AllocatedVFP > NumVFPs && PreAllocationVFP < NumVFPs) {
llvm::Type *PaddingTy = llvm::ArrayType::get(
- llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
+ llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocationVFP);
+ it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
+ }
+
+ // If we have allocated some arguments onto the stack (due to running
+ // out of VFP registers), we cannot split an argument between GPRs and
+ // the stack. If this situation occurs, we add padding to prevent the
+ // GPRs from being used. In this situiation, the current argument could
+ // only be allocated by rule C.8, so rule C.6 would mark these GPRs as
+ // unusable anyway.
+ const bool StackUsed = PreAllocationGPR > NumGPRs || PreAllocationVFP > NumVFPs;
+ if (!IsCPRC && PreAllocationGPR < NumGPRs && AllocatedGPR > NumGPRs && StackUsed) {
+ llvm::Type *PaddingTy = llvm::ArrayType::get(
+ llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPR);
it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
}
}
@@ -3453,8 +3475,12 @@
unsigned Alignment,
unsigned NumRequired) {
// Early Exit.
- if (AllocatedVFP >= 16)
+ if (AllocatedVFP >= 16) {
+ // We use AllocatedVFP > 16 to signal that some CPRCs were allocated on
+ // the stack.
+ AllocatedVFP = 17;
return;
+ }
// C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
// VFP registers of the appropriate type unallocated then the argument is
// allocated to the lowest-numbered sequence of such registers.
@@ -3479,9 +3505,24 @@
AllocatedVFP = 17; // We do not have enough VFP registers.
}
+/// Update AllocatedGPRs to record the number if general purpose registers
+/// which have been allocated. It is valid for AllocatedGPRs to go above 4,
+/// this represents arguments being stored on the stack.
+static void markAllocatedGPRs(unsigned &AllocatedGPRs, unsigned Alignment,
+ unsigned NumRequired) {
+ assert((Alignment == 1 || Alignment == 2) && "Alignment must be 4 or 8 bytes");
+
+ if (Alignment == 2 && AllocatedGPRs & 0x1)
+ AllocatedGPRs += 1;
+
+ AllocatedGPRs += NumRequired;
+}
+
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs,
unsigned &AllocatedVFP,
- bool &IsHA, bool isVariadic) const {
+ bool &IsHA, bool isVariadic,
+ unsigned &AllocatedGPRs,
+ bool &IsCPRC) const {
// We update number of allocated VFPs according to
// 6.1.2.1 The following argument types are VFP CPRCs:
// A single-precision floating-point type (including promoted
@@ -3497,49 +3538,76 @@
if (Size <= 32) {
llvm::Type *ResType =
llvm::Type::getInt32Ty(getVMContext());
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getDirect(ResType);
}
if (Size == 64) {
llvm::Type *ResType = llvm::VectorType::get(
llvm::Type::getInt32Ty(getVMContext()), 2);
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+ if (getABIKind() == ARMABIInfo::AAPCS || isVariadic){
+ markAllocatedGPRs(AllocatedGPRs, 2, 2);
+ } else {
+ markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+ IsCPRC = true;
+ }
return ABIArgInfo::getDirect(ResType);
}
if (Size == 128) {
llvm::Type *ResType = llvm::VectorType::get(
llvm::Type::getInt32Ty(getVMContext()), 4);
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 4, 4);
+ if (getABIKind() == ARMABIInfo::AAPCS || isVariadic) {
+ markAllocatedGPRs(AllocatedGPRs, 2, 4);
+ } else {
+ markAllocatedVFPs(VFPRegs, AllocatedVFP, 4, 4);
+ IsCPRC = true;
+ }
return ABIArgInfo::getDirect(ResType);
}
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
}
// Update VFPRegs for legal vector types.
- if (const VectorType *VT = Ty->getAs<VectorType>()) {
- uint64_t Size = getContext().getTypeSize(VT);
- // Size of a legal vector should be power of 2 and above 64.
- markAllocatedVFPs(VFPRegs, AllocatedVFP, Size >= 128 ? 4 : 2, Size / 32);
+ if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
+ if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ uint64_t Size = getContext().getTypeSize(VT);
+ // Size of a legal vector should be power of 2 and above 64.
+ markAllocatedVFPs(VFPRegs, AllocatedVFP, Size >= 128 ? 4 : 2, Size / 32);
+ IsCPRC = true;
+ }
}
// Update VFPRegs for floating point types.
- if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
- if (BT->getKind() == BuiltinType::Half ||
- BT->getKind() == BuiltinType::Float)
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 1, 1);
- if (BT->getKind() == BuiltinType::Double ||
- BT->getKind() == BuiltinType::LongDouble)
- markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+ if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+ if (BT->getKind() == BuiltinType::Half ||
+ BT->getKind() == BuiltinType::Float) {
+ markAllocatedVFPs(VFPRegs, AllocatedVFP, 1, 1);
+ IsCPRC = true;
+ }
+ if (BT->getKind() == BuiltinType::Double ||
+ BT->getKind() == BuiltinType::LongDouble) {
+ markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, 2);
+ IsCPRC = true;
+ }
+ }
}
if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
- if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
Ty = EnumTy->getDecl()->getIntegerType();
+ }
+ unsigned Size = getContext().getTypeSize(Ty);
+ if (!IsCPRC)
+ markAllocatedGPRs(AllocatedGPRs, Size > 32 ? 2 : 1, (Size + 31) / 32);
return (Ty->isPromotableIntegerType() ?
ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
}
- if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+ }
// Ignore empty records.
if (isEmptyRecord(getContext(), Ty, true))
@@ -3566,6 +3634,7 @@
markAllocatedVFPs(VFPRegs, AllocatedVFP, 2, Members * 2);
}
IsHA = true;
+ IsCPRC = true;
return ABIArgInfo::getExpand();
}
}
@@ -3580,6 +3649,8 @@
getABIKind() == ARMABIInfo::AAPCS)
ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
+ // Update Allocated GPRs
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getIndirect(0, /*ByVal=*/true,
/*Realign=*/TyAlign > ABIAlign);
}
@@ -3592,9 +3663,11 @@
if (getContext().getTypeAlign(Ty) <= 32) {
ElemTy = llvm::Type::getInt32Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
+ markAllocatedGPRs(AllocatedGPRs, 1, SizeRegs);
} else {
ElemTy = llvm::Type::getInt64Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
+ markAllocatedGPRs(AllocatedGPRs, 2, SizeRegs * 2);
}
llvm::Type *STy =
@@ -3687,13 +3760,16 @@
return true;
}
-ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic) const {
+ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
+ unsigned &AllocatedGPRs) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
// Large vector types should be returned via memory.
- if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
+ if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getIndirect(0);
+ }
if (!isAggregateTypeForABI(RetTy)) {
// Treat an enum type as its underlying type.
@@ -3706,8 +3782,10 @@
// Structures with either a non-trivial destructor or a non-trivial
// copy constructor are always indirect.
- if (isRecordReturnIndirect(RetTy, getCXXABI()))
+ if (isRecordReturnIndirect(RetTy, getCXXABI())) {
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+ }
// Are we following APCS?
if (getABIKind() == APCS) {
@@ -3734,6 +3812,7 @@
}
// Otherwise return in memory.
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getIndirect(0);
}
@@ -3764,6 +3843,7 @@
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
}
+ markAllocatedGPRs(AllocatedGPRs, 1, 1);
return ABIArgInfo::getIndirect(0);
}
Index: test/CodeGen/arm-aapcs-vfp.c
===================================================================
--- test/CodeGen/arm-aapcs-vfp.c
+++ test/CodeGen/arm-aapcs-vfp.c
@@ -103,3 +103,13 @@
// CHECK-LABEL: define arm_aapcs_vfpcc void @f33(%struct.s33* byval %s)
struct s33 { char buf[32*32]; };
void f33(struct s33 s) { }
+
+typedef struct { long long x; int y; } struct_long_long_int;
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i64 %k, i32 %l)
+void test_vfp_stack_gpr_split_1(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, long long k, int l) {}
+
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], i64 %k.0, i32 %k.1)
+void test_vfp_stack_gpr_split_2(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_long_long_int k) {}
+
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [3 x i32], i64 %k.0, i32 %k.1)
+struct_long_long_int test_vfp_stack_gpr_split_3(double a, double b, double c, double d, double e, double f, double g, double h, double i, struct_long_long_int k) {}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits