Please consider the new patch.
This time, if the coerce-to type is larger than the real type, we
reconstruct a value of the coerce-to type in a stack allocation, then
bitcast & copy it into the destination.
This is roughly the same logic as in CreateCoercedStore, but with
multiple sources.

On Sat, Feb 4, 2012 at 6:12 PM, Evgeniy Stepanov
<[email protected]> wrote:
> On Sat, Feb 4, 2012 at 1:07 AM, Eli Friedman <[email protected]> wrote:
>> On Fri, Feb 3, 2012 at 4:05 AM, Evgeniy Stepanov
>> <[email protected]> wrote:
>>> Hi,
>>>
>>> this is a fix for PR11905.
>>>
>>> The current behaviour when generation a function prolog for arguments
>>> of coerced types is to allocate a stack temp of the real argument type
>>> and store the argument(s) there. This is wrong, because storage size
>>> of the coerce-to type can be larger than that of the real type.
>>>
>>> The new behaviour is to allocate a temp of the coerced type, copy it,
>>> and then reference through a bitcasted pointer of the real type.
>>>
>>> Please review.
>>
>> The real type is sometimes larger than the coerced type... switching
>> from one to the other only changes which cases are broken issues.
>
> Could you give an example of that?
> In that case, what would you think of allocating the largest of the two types?
>
>>
>> -Eli
Index: test/CodeGen/arm-arguments.c
===================================================================
--- test/CodeGen/arm-arguments.c	(revision 149785)
+++ test/CodeGen/arm-arguments.c	(working copy)
@@ -153,3 +153,15 @@
 // AAPCS: define arm_aapcscc void @f30({{.*}} noalias sret
 struct s30 { _Complex int f0; };
 struct s30 f30() {}
+
+// PR11905
+struct s31 { char x; };
+void f31(struct s31 s) { }
+// AAPCS: @f31([1 x i32] %s.coerce)
+// AAPCS: %s = alloca %struct.s31, align 4
+// AAPCS: %tmp = alloca [1 x i32]
+// AAPCS: store [1 x i32] %s.coerce, [1 x i32]* %tmp
+// APCS-GNU: @f31([1 x i32] %s.coerce)
+// APCS-GNU: %s = alloca %struct.s31, align 4
+// APCS-GNU: %tmp = alloca [1 x i32]
+// APCS-GNU: store [1 x i32] %s.coerce, [1 x i32]* %tmp
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp	(revision 149785)
+++ lib/CodeGen/CGCall.cpp	(working copy)
@@ -1013,7 +1013,7 @@
         break;
       }
 
-      llvm::AllocaInst *Alloca = CreateMemTemp(Ty, "coerce");
+      llvm::AllocaInst *Alloca = CreateMemTemp(Ty, Arg->getName());
 
       // The alignment we need to use is the max of the requested alignment for
       // the argument plus the alignment required by our access code below.
@@ -1037,16 +1037,37 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      if (llvm::StructType *STy =
-            dyn_cast<llvm::StructType>(ArgI.getCoerceToType())) {
-        Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+      llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
+      if (STy && STy->getNumElements() > 1) {
+	uint64_t SrcSize = CGM.getTargetData().getTypeAllocSize(STy);
+	llvm::Type *DstTy =
+	  cast<llvm::PointerType>(Ptr->getType())->getElementType();
+	uint64_t DstSize = CGM.getTargetData().getTypeAllocSize(DstTy);
 
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          assert(AI != Fn->arg_end() && "Argument mismatch!");
-          AI->setName(Arg->getName() + ".coerce" + Twine(i));
-          llvm::Value *EltPtr = Builder.CreateConstGEP2_32(Ptr, 0, i);
-          Builder.CreateStore(AI++, EltPtr);
-        }
+	if (SrcSize <= DstSize) {
+	  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+
+	  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+	    assert(AI != Fn->arg_end() && "Argument mismatch!");
+	    AI->setName(Arg->getName() + ".coerce" + Twine(i));
+	    llvm::Value *EltPtr = Builder.CreateConstGEP2_32(Ptr, 0, i);
+	    Builder.CreateStore(AI++, EltPtr);
+	  }
+	} else {
+	  llvm::Value *TempAlloca = CreateTempAlloca(ArgI.getCoerceToType(), "coerce");
+
+	  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+	    assert(AI != Fn->arg_end() && "Argument mismatch!");
+	    AI->setName(Arg->getName() + ".coerce" + Twine(i));
+	    llvm::Value *EltPtr = Builder.CreateConstGEP2_32(TempAlloca, 0, i);
+	    Builder.CreateStore(AI++, EltPtr);
+	  }
+
+	  llvm::Value *Casted =
+	    Builder.CreateBitCast(TempAlloca, llvm::PointerType::getUnqual(ConvertTypeForMem(Ty)));
+	  llvm::LoadInst *Load = Builder.CreateLoad(Casted);
+	  Builder.CreateStore(Load, Ptr);
+	}
       } else {
         // Simple case, just do a coerced store of the argument into the alloca.
         assert(AI != Fn->arg_end() && "Argument mismatch!");
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to