Re: [PATCH] ARM: Homogeneous aggregates must be allocated to contiguous registers (clang part)

Oliver Stannard Wed, 19 Mar 2014 02:56:32 -0700

  The code which I removed from clang enforced two AACPS-VFP rules:
  * CPRCs cannot be split between registers and the stack
  * If anything has been allocated to the stack, large arguments cannot be 
split between GPRs and the stack


  My change to LLVM only covers the first case, so I have reverted the code in 
clang which fixes the second case. This means that the amount of code removed 
from clang is now small, but this set of patches still reduces the PCS 
knowledge embedded in the IR somewhat.

http://llvm-reviews.chandlerc.com/D3083

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D3083?vs=7839&id=7938#toc

Files:
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/TargetInfo.cpp
  test/CodeGen/arm-aapcs-vfp.c
  test/CodeGen/arm-homogenous.c

Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -158,6 +158,23 @@
   return CC_C;
 }
 
+static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) {
+  switch (FI.getEffectiveCallingConvention()) {
+  case llvm::CallingConv::C:
+    switch (Target.getTriple().getEnvironment()) {
+    case llvm::Triple::EABIHF:
+    case llvm::Triple::GNUEABIHF:
+      return true;
+    default:
+      return false;
+    }
+  case llvm::CallingConv::ARM_AAPCS_VFP:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Arrange the argument and result information for a call to an
 /// unknown C++ non-static member function of the given abstract type.
 /// (Zero value of RD means we don't have any meaningful "this" argument type,
@@ -990,8 +1007,11 @@
       // If the coerce-to type is a first class aggregate, flatten it.  Either
       // way is semantically identical, but fast-isel and the optimizer
       // generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::Type *argType = argAI.getCoerceToType();
-      if (llvm::StructType *st = dyn_cast<llvm::StructType>(argType)) {
+      llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
+      if (st && !isAAPCSVFP(FI, getTarget())) {
         for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
           argTypes.push_back(st->getElementType(i));
       } else {
@@ -1193,23 +1213,24 @@
       else if (ParamType->isUnsignedIntegerOrEnumerationType())
         Attrs.addAttribute(llvm::Attribute::ZExt);
       // FALL THROUGH
-    case ABIArgInfo::Direct:
+    case ABIArgInfo::Direct: {
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
 
       // FIXME: handle sseregparm someday...
 
-      if (llvm::StructType *STy =
-          dyn_cast<llvm::StructType>(AI.getCoerceToType())) {
+      llvm::StructType *STy =
+          dyn_cast<llvm::StructType>(AI.getCoerceToType());
+      if (!isAAPCSVFP(FI, getTarget()) && STy) {
         unsigned Extra = STy->getNumElements()-1;  // 1 will be added below.
         if (Attrs.hasAttributes())
           for (unsigned I = 0; I < Extra; ++I)
             PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index + I,
                                                   Attrs));
         Index += Extra;
       }
       break;
-
+    }
     case ABIArgInfo::Indirect:
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
@@ -1468,8 +1489,10 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
-      if (STy && STy->getNumElements() > 1) {
+      if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) {
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
         llvm::Type *DstTy =
           cast<llvm::PointerType>(Ptr->getType())->getElementType();
@@ -2705,8 +2728,11 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      if (llvm::StructType *STy =
-            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
+      llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
+      if (STy && !isAAPCSVFP(CallInfo, getTarget())) {
         llvm::Type *SrcTy =
           cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -3199,7 +3199,7 @@
 
 private:
   ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, bool &IsHA, bool isVariadic,
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
                                   bool &IsCPRC) const;
   bool isIllegalVectorType(QualType Ty) const;
 
@@ -3305,22 +3305,10 @@
        it != ie; ++it) {
     unsigned PreAllocationVFPs = AllocatedVFPs;
     unsigned PreAllocationGPRs = AllocatedGPRs;
-    bool IsHA = false;
     bool IsCPRC = false;
     // 6.1.2.3 There is one VFP co-processor register class using registers
     // s0-s15 (d0-d7) for passing arguments.
-    it->info = classifyArgumentType(it->type, IsHA, FI.isVariadic(), IsCPRC);
-    assert((IsCPRC || !IsHA) && "Homogeneous aggregates must be CPRCs");
-    // If we do not have enough VFP registers for the HA, any VFP registers
-    // that are unallocated are marked as unavailable. To achieve this, we add
-    // padding of (NumVFPs - PreAllocationVFP) floats.
-    // Note that IsHA will only be set when using the AAPCS-VFP calling convention,
-    // and the callee is not variadic.
-    if (IsHA && AllocatedVFPs > NumVFPs && PreAllocationVFPs < NumVFPs) {
-      llvm::Type *PaddingTy = llvm::ArrayType::get(
-          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocationVFPs);
-      it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
-    }
+    it->info = classifyArgumentType(it->type, FI.isVariadic(), IsCPRC);
 
     // If we have allocated some arguments onto the stack (due to running
     // out of VFP registers), we cannot split an argument between GPRs and
@@ -3517,8 +3505,7 @@
     VFPRegs[i] = 0;
 }
 
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool &IsHA,
-                                            bool isVariadic,
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
                                             bool &IsCPRC) const {
   // We update number of allocated VFPs according to
   // 6.1.2.1 The following argument types are VFP CPRCs:
@@ -3630,9 +3617,8 @@
                Base->isSpecificBuiltinType(BuiltinType::LongDouble));
         markAllocatedVFPs(2, Members * 2);
       }
-      IsHA = true;
       IsCPRC = true;
-      return ABIArgInfo::getExpand();
+      return ABIArgInfo::getDirect();
     }
   }
 
Index: test/CodeGen/arm-aapcs-vfp.c
===================================================================
--- test/CodeGen/arm-aapcs-vfp.c
+++ test/CodeGen/arm-aapcs-vfp.c
@@ -19,35 +19,35 @@
   float f3;
   float f4;
 };
-// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}})
 extern struct homogeneous_struct struct_callee(struct homogeneous_struct);
 struct homogeneous_struct test_struct(struct homogeneous_struct arg) {
   return struct_callee(arg);
 }
 
-// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, [4 x i32] %{{.*}}, ...)
+// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, ...)
 struct homogeneous_struct test_struct_variadic(struct homogeneous_struct arg, ...) {
   return struct_callee(arg);
 }
 
 struct nested_array {
   double d[4];
 };
-// CHECK: define arm_aapcs_vfpcc void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}})
 extern void array_callee(struct nested_array);
 void test_array(struct nested_array arg) {
   array_callee(arg);
 }
 
 extern void complex_callee(__complex__ double);
-// CHECK: define arm_aapcs_vfpcc void @test_complex(double %{{.*}}, double %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}})
 void test_complex(__complex__ double cd) {
   complex_callee(cd);
 }
 
 // Long double is the same as double on AAPCS, it should be homogeneous.
 extern void complex_long_callee(__complex__ long double);
-// CHECK: define arm_aapcs_vfpcc void @test_complex_long(double %{{.*}}, double %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_complex_long({ double, double } %{{.*}})
 void test_complex_long(__complex__ long double cd) {
   complex_callee(cd);
 }
@@ -61,7 +61,7 @@
   float f3;
   float f4;
 };
-// CHECK: define arm_aapcs_vfpcc void @test_big([5 x i32] %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_big({ [5 x i32] } %{{.*}})
 extern void big_callee(struct big_struct);
 void test_big(struct big_struct arg) {
   big_callee(arg);
@@ -74,14 +74,14 @@
   float f1;
   int i2;
 };
-// CHECK: define arm_aapcs_vfpcc void @test_hetero([2 x i32] %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_hetero({ [2 x i32] } %{{.*}})
 extern void hetero_callee(struct heterogeneous_struct);
 void test_hetero(struct heterogeneous_struct arg) {
   hetero_callee(arg);
 }
 
 // Neon multi-vector types are homogeneous aggregates.
-// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}})
 int8x16_t f0(int8x16x4_t v4) {
   return vaddq_s8(v4.val[0], v4.val[3]);
 }
@@ -94,7 +94,7 @@
   int32x2_t v3;
   int16x4_t v4;
 };
-// CHECK: define arm_aapcs_vfpcc void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}})
 extern void neon_callee(struct neon_struct);
 void test_neon(struct neon_struct arg) {
   neon_callee(arg);
@@ -108,8 +108,8 @@
 // CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i64 %k, i32 %l)
 void test_vfp_stack_gpr_split_1(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, long long k, int l) {}
 
-// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], i64 %k.0, i32 %k.1)
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, { [2 x i64] } %{{.*}})
 void test_vfp_stack_gpr_split_2(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_long_long_int k) {}
 
-// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [3 x i32], i64 %k.0, i32 %k.1)
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, { [2 x i64] } %{{.*}})
 struct_long_long_int test_vfp_stack_gpr_split_3(double a, double b, double c, double d, double e, double f, double g, double h, double i, struct_long_long_int k) {}
Index: test/CodeGen/arm-homogenous.c
===================================================================
--- test/CodeGen/arm-homogenous.c
+++ test/CodeGen/arm-homogenous.c
@@ -17,7 +17,7 @@
 void test_union_with_first_floats(void) {
   takes_union_with_first_floats(g_u_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats([4 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats({ [4 x i32] })
 
 void test_return_union_with_first_floats(void) {
   g_u_f = returns_union_with_first_floats();
@@ -37,7 +37,7 @@
 void test_union_with_non_first_floats(void) {
   takes_union_with_non_first_floats(g_u_nf_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats([4 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats({ [4 x i32] })
 
 void test_return_union_with_non_first_floats(void) {
   g_u_nf_f = returns_union_with_non_first_floats();
@@ -57,7 +57,7 @@
 void test_struct_with_union_with_first_floats(void) {
   takes_struct_with_union_with_first_floats(g_s_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats([5 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats({ [5 x i32] })
 
 void test_return_struct_with_union_with_first_floats(void) {
   g_s_f = returns_struct_with_union_with_first_floats();
@@ -77,7 +77,7 @@
 void test_struct_with_union_with_non_first_floats(void) {
   takes_struct_with_union_with_non_first_floats(g_s_nf_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats([5 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats({ [5 x i32] })
 
 void test_return_struct_with_union_with_non_first_floats(void) {
   g_s_nf_f = returns_struct_with_union_with_non_first_floats();
@@ -103,9 +103,9 @@
 
 void test_struct_with_fundamental_elems(void) {
   takes_struct_with_fundamental_elems(g_s);
-// CHECK:  call arm_aapcs_vfpcc  void @takes_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float{{.*}}, float {{.*}})
+// CHECK:  call arm_aapcs_vfpcc  void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems {{.*}})
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(float, float, float, float)
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems)
 
 void test_return_struct_with_fundamental_elems(void) {
   g_s = returns_struct_with_fundamental_elems();
@@ -124,9 +124,9 @@
 
 void test_struct_with_array(void) {
   takes_struct_with_array(g_s_a);
-// CHECK:   call arm_aapcs_vfpcc  void @takes_struct_with_array(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}})
+// CHECK:   call arm_aapcs_vfpcc  void @takes_struct_with_array(%struct.struct_with_array {{.*}})
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(float, float, float, float)
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(%struct.struct_with_array)
 
 void test_return_struct_with_array(void) {
   g_s_a = returns_struct_with_array();
@@ -146,9 +146,9 @@
 
 void test_union_with_struct_with_fundamental_elems(void) {
   takes_union_with_struct_with_fundamental_elems(g_u_s_fe);
-// CHECK: call arm_aapcs_vfpcc  void @takes_union_with_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}})
+// CHECK: call arm_aapcs_vfpcc  void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems {{.*}})
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(float, float, float, float)
+// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems)
 
 void test_return_union_with_struct_with_fundamental_elems(void) {
   g_u_s_fe = returns_union_with_struct_with_fundamental_elems();
@@ -169,22 +169,22 @@
 
 void test_struct_of_four_doubles(void) {
 // CHECK: test_struct_of_four_doubles
-// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}})
   takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
 }
 
 extern void takes_struct_of_four_doubles_variadic(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d, ...);
 
 void test_struct_of_four_doubles_variadic(void) {
 // CHECK: test_struct_of_four_doubles_variadic
-// CHECK: call arm_aapcs_vfpcc void (double, [4 x i64], [4 x i64], double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, [4 x i64] {{.*}}, [4 x i64] {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void (double, { [4 x i64] }, { [4 x i64] }, double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, { [4 x i64] } {{.*}}, { [4 x i64] } {{.*}}, double {{.*}})
   takes_struct_of_four_doubles_variadic(3.0, g_s4d, g_s4d, 4.0);
 }
 
 extern void takes_struct_with_backfill(float f1, double a, float f2, struct_of_four_doubles b, struct_of_four_doubles c, double d);
 void test_struct_with_backfill(void) {
 // CHECK: test_struct_with_backfill
-// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [4 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}})
   takes_struct_with_backfill(3.0, 3.1, 3.2, g_s4d, g_s4d, 4.0);
 }
 
@@ -201,7 +201,7 @@
 
 void test_struct_of_vecs(void) {
 // CHECK: test_struct_of_vecs
-// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}})
   takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
 }

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: [PATCH] ARM: Homogeneous aggregates must be allocated to contiguous registers (clang part)

Reply via email to