On Oct 31, 2012, at 9:29 AM, Bob Wilson <[email protected]> wrote:
> > On Oct 30, 2012, at 4:21 PM, manman ren <[email protected]> wrote: > >> Author: mren >> Date: Tue Oct 30 18:21:41 2012 >> New Revision: 167058 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev >> Log: >> ARM AAPCS-VFP: fix handling of homogeneous aggreate. >> >> If HA can only partially fit into VFP registers, we add padding to make sure >> HA will be on stack and later VFP CPRCs will be on stack as well. > > Thanks for working on this, Manman. I noticed while reviewing this that the > AAPCS-VFP ABI is even trickier than I remembered. The VFP registers aren't > simply allocated in order. If you skip over some registers due to alignment > constraints, you may need to "backfill" those registers for later arguments. > See test/CodeGen/ARM/arguments_f64_backfill.ll for an example of this. Thanks, I will look into that. -Manman > > That means that the front-end is going to have to keep track of the registers > available for backfilling so you can have an accurate count of how many > remain available for homogeneous aggregates. > >> >> Modified: >> cfe/trunk/lib/CodeGen/TargetInfo.cpp >> cfe/trunk/test/CodeGen/arm-homogenous.c >> >> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff >> ============================================================================== >> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) >> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012 >> @@ -2863,7 +2863,8 @@ >> ABIKind getABIKind() const { return Kind; } >> >> ABIArgInfo classifyReturnType(QualType RetTy) const; >> - ABIArgInfo classifyArgumentType(QualType RetTy) const; >> + ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP, >> + bool &IsHA) const; >> bool isIllegalVectorType(QualType Ty) const; >> >> virtual void computeInfo(CGFunctionInfo &FI) const; >> @@ -2907,10 +2908,32 @@ >> } >> >> void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { >> + // To correctly handle Homogeneous Aggregate, we need to keep track of the >> + // number of VFP registers allocated so far. >> + // C.1.vfp If the argument is a VFP CPRC and there are sufficient >> consecutive >> + // VFP registers of the appropriate type unallocated then the argument is >> + // allocated to the lowest-numbered sequence of such registers. >> + // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are >> + // unallocated are marked as unavailable. >> + unsigned AllocatedVFP = 0; >> FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); >> for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); >> - it != ie; ++it) >> - it->info = classifyArgumentType(it->type); >> + it != ie; ++it) { >> + unsigned PreAllocation = AllocatedVFP; >> + bool IsHA = false; >> + // 6.1.2.3 There is one VFP co-processor register class using registers >> + // s0-s15 (d0-d7) for passing arguments. >> + const unsigned NumVFPs = 16; >> + it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA); >> + // If we do not have enough VFP registers for the HA, any VFP registers >> + // that are unallocated are marked as unavailable. To achieve this, we >> add >> + // padding of (NumVFPs - PreAllocation) floats. >> + if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) { >> + llvm::Type *PaddingTy = llvm::ArrayType::get( >> + llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation); >> + it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy); >> + } >> + } >> >> // Always honor user-specified calling convention. >> if (FI.getCallingConvention() != llvm::CallingConv::C) >> @@ -3012,7 +3035,17 @@ >> return (Members > 0 && Members <= 4); >> } >> >> -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const { >> +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned >> &AllocatedVFP, >> + bool &IsHA) const { >> + // We update number of allocated VFPs according to >> + // 6.1.2.1 The following argument types are VFP CPRCs: >> + // A single-precision floating-point type (including promoted >> + // half-precision types); A double-precision floating-point type; >> + // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate >> + // with a Base Type of a single- or double-precision floating-point >> type, >> + // 64-bit containerized vectors or 128-bit containerized vectors with >> one >> + // to four Elements. >> + >> // Handle illegal vector types here. >> if (isIllegalVectorType(Ty)) { >> uint64_t Size = getContext().getTypeSize(Ty); >> @@ -3024,15 +3057,38 @@ >> if (Size == 64) { >> llvm::Type *ResType = llvm::VectorType::get( >> llvm::Type::getInt32Ty(getVMContext()), 2); >> + // Align AllocatedVFP to an even number to use a D register. >> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2); >> + AllocatedVFP += 2; // 1 D register = 2 S registers >> return ABIArgInfo::getDirect(ResType); >> } >> if (Size == 128) { >> llvm::Type *ResType = llvm::VectorType::get( >> llvm::Type::getInt32Ty(getVMContext()), 4); >> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4); >> + AllocatedVFP += 4; // 1 Q register = 4 S registers >> return ABIArgInfo::getDirect(ResType); >> } >> return ABIArgInfo::getIndirect(0, /*ByVal=*/false); >> } >> + // Update AllocatedVFP for legal vector types. >> + if (const VectorType *VT = Ty->getAs<VectorType>()) { >> + uint64_t Size = getContext().getTypeSize(VT); >> + // Size of a legal vector should be power of 2 and above 64. >> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : >> 2); >> + AllocatedVFP += (Size / 32); >> + } >> + // Update AllocatedVFP for floating point types. >> + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { >> + if (BT->getKind() == BuiltinType::Half || >> + BT->getKind() == BuiltinType::Float) >> + AllocatedVFP += 1; >> + if (BT->getKind() == BuiltinType::Double || >> + BT->getKind() == BuiltinType::LongDouble) { >> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2); >> + AllocatedVFP += 2; >> + } >> + } >> >> if (!isAggregateTypeForABI(Ty)) { >> // Treat an enum type as its underlying type. >> @@ -3053,10 +3109,28 @@ >> return ABIArgInfo::getIndirect(0, /*ByVal=*/false); >> >> if (getABIKind() == ARMABIInfo::AAPCS_VFP) { >> - // Homogeneous Aggregates need to be expanded. >> + // Homogeneous Aggregates need to be expanded when we can fit the >> aggregate >> + // into VFP registers. >> const Type *Base = 0; >> - if (isHomogeneousAggregate(Ty, Base, getContext())) { >> + uint64_t Members = 0; >> + if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) { >> assert(Base && "Base class should be set for homogeneous aggregate"); >> + // Base can be a floating-point or a vector. >> + if (Base->isVectorType()) { >> + // ElementSize is in number of floats. >> + unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4; >> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, >> + ElementSize); >> + AllocatedVFP += Members * ElementSize; >> + } else if (Base->isSpecificBuiltinType(BuiltinType::Float)) >> + AllocatedVFP += Members; >> + else { >> + assert(Base->isSpecificBuiltinType(BuiltinType::Double) || >> + Base->isSpecificBuiltinType(BuiltinType::LongDouble)); >> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2); >> + AllocatedVFP += Members * 2; // Base type is double. >> + } >> + IsHA = true; >> return ABIArgInfo::getExpand(); >> } >> } >> >> Modified: cfe/trunk/test/CodeGen/arm-homogenous.c >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff >> ============================================================================== >> --- cfe/trunk/test/CodeGen/arm-homogenous.c (original) >> +++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012 >> @@ -156,6 +156,40 @@ >> } >> // CHECK: declare arm_aapcs_vfpcc >> %union.union_with_struct_with_fundamental_elems >> @returns_union_with_struct_with_fundamental_elems() >> >> +// Make sure HAs that can be partially fit into VFP registers will be >> allocated >> +// on stack and that later VFP candidates will go on stack as well. >> +typedef struct { >> + double x; >> + double a2; >> + double a3; >> + double a4; >> +} struct_of_four_doubles; >> +extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles >> b, struct_of_four_doubles c, double d); >> +struct_of_four_doubles g_s4d; >> + >> +void test_struct_of_four_doubles(void) { >> +// CHECK: test_struct_of_four_doubles >> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double >> {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x >> float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, >> double {{.*}}) >> + takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0); >> +} >> + >> +typedef __attribute__(( ext_vector_type(8) )) char __char8; >> +typedef __attribute__(( ext_vector_type(4) )) short __short4; >> +typedef struct { >> + __char8 a1; >> + __short4 a2; >> + __char8 a3; >> + __short4 a4; >> +} struct_of_vecs; >> +extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs >> c, double d); >> +struct_of_vecs g_vec; >> + >> +void test_struct_of_vecs(void) { >> +// CHECK: test_struct_of_vecs >> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 >> x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x >> float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> >> {{.*}}, double {{.*}}) >> + takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0); >> +} >> + >> // FIXME: Tests necessary: >> // - Vectors >> // - C++ stuff >> >> >> _______________________________________________ >> cfe-commits mailing list >> [email protected] >> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits > _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
