This revision was automatically updated to reflect the committed changes.
Closed by commit rL241898: Respect alignment when loading up a coerced function
argument (authored by uweigand).
Changed prior to commit:
http://reviews.llvm.org/D11033?vs=29341&id=29440#toc
Repository:
rL LLVM
http://reviews.llvm.org/D11033
Files:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/test/CodeGen/align-systemz.c
cfe/trunk/test/CodeGen/arm64-abi-vector.c
cfe/trunk/test/CodeGen/arm64-arguments.c
cfe/trunk/test/CodeGen/arm64-be-bitfield.c
cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c
cfe/trunk/test/CodeGen/ppc64le-aggregates.c
cfe/trunk/test/CodeGenCXX/2012-03-16-StoreAlign.cpp
cfe/trunk/test/CodeGenCXX/varargs.cpp
Index: cfe/trunk/lib/CodeGen/CGCall.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp
+++ cfe/trunk/lib/CodeGen/CGCall.cpp
@@ -912,20 +912,21 @@
/// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as
-/// a pointer to an object of type \arg Ty.
+/// a pointer to an object of type \arg Ty, known to be aligned to
+/// \arg SrcAlign bytes.
///
/// This safely handles the case when the src type is smaller than the
/// destination type; in this situation the values of bits which not
/// present in the src are undefined.
static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
- llvm::Type *Ty,
+ llvm::Type *Ty, CharUnits SrcAlign,
CodeGenFunction &CGF) {
llvm::Type *SrcTy =
cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
// If SrcTy and Ty are the same, just do a load.
if (SrcTy == Ty)
- return CGF.Builder.CreateLoad(SrcPtr);
+ return CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity());
uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);
@@ -940,7 +941,8 @@
// extension or truncation to the desired type.
if ((isa<llvm::IntegerType>(Ty) || isa<llvm::PointerType>(Ty)) &&
(isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy))) {
- llvm::LoadInst *Load = CGF.Builder.CreateLoad(SrcPtr);
+ llvm::LoadInst *Load =
+ CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity());
return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF);
}
@@ -954,64 +956,67 @@
// to that information.
llvm::Value *Casted =
CGF.Builder.CreateBitCast(SrcPtr, llvm::PointerType::getUnqual(Ty));
- llvm::LoadInst *Load = CGF.Builder.CreateLoad(Casted);
- // FIXME: Use better alignment / avoid requiring aligned load.
- Load->setAlignment(1);
- return Load;
+ return CGF.Builder.CreateAlignedLoad(Casted, SrcAlign.getQuantity());
}
// Otherwise do coercion through memory. This is stupid, but
// simple.
- llvm::Value *Tmp = CGF.CreateTempAlloca(Ty);
+ llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(Ty);
+ Tmp->setAlignment(SrcAlign.getQuantity());
llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
llvm::Value *SrcCasted = CGF.Builder.CreateBitCast(SrcPtr, I8PtrTy);
- // FIXME: Use better alignment.
CGF.Builder.CreateMemCpy(Casted, SrcCasted,
llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
- 1, false);
- return CGF.Builder.CreateLoad(Tmp);
+ SrcAlign.getQuantity(), false);
+ return CGF.Builder.CreateAlignedLoad(Tmp, SrcAlign.getQuantity());
}
// Function to store a first-class aggregate into memory. We prefer to
// store the elements rather than the aggregate to be more friendly to
// fast-isel.
// FIXME: Do we need to recurse here?
static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
llvm::Value *DestPtr, bool DestIsVolatile,
- bool LowAlignment) {
+ CharUnits DestAlign) {
// Prefer scalar stores to first-class aggregate stores.
if (llvm::StructType *STy =
dyn_cast<llvm::StructType>(Val->getType())) {
+ const llvm::StructLayout *Layout =
+ CGF.CGM.getDataLayout().getStructLayout(STy);
+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(STy, DestPtr, 0, i);
llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i);
- llvm::StoreInst *SI = CGF.Builder.CreateStore(Elt, EltPtr,
- DestIsVolatile);
- if (LowAlignment)
- SI->setAlignment(1);
+ uint64_t EltOffset = Layout->getElementOffset(i);
+ CharUnits EltAlign =
+ DestAlign.alignmentAtOffset(CharUnits::fromQuantity(EltOffset));
+ CGF.Builder.CreateAlignedStore(Elt, EltPtr, EltAlign.getQuantity(),
+ DestIsVolatile);
}
} else {
- llvm::StoreInst *SI = CGF.Builder.CreateStore(Val, DestPtr, DestIsVolatile);
- if (LowAlignment)
- SI->setAlignment(1);
+ CGF.Builder.CreateAlignedStore(Val, DestPtr, DestAlign.getQuantity(),
+ DestIsVolatile);
}
}
/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
-/// where the source and destination may have different types.
+/// where the source and destination may have different types. The
+/// destination is known to be aligned to \arg DstAlign bytes.
///
/// This safely handles the case when the src type is larger than the
/// destination type; the upper bits of the src will be lost.
static void CreateCoercedStore(llvm::Value *Src,
llvm::Value *DstPtr,
bool DstIsVolatile,
+ CharUnits DstAlign,
CodeGenFunction &CGF) {
llvm::Type *SrcTy = Src->getType();
llvm::Type *DstTy =
cast<llvm::PointerType>(DstPtr->getType())->getElementType();
if (SrcTy == DstTy) {
- CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
+ CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(),
+ DstIsVolatile);
return;
}
@@ -1027,7 +1032,8 @@
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
(isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
- CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
+ CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(),
+ DstIsVolatile);
return;
}
@@ -1037,8 +1043,7 @@
if (SrcSize <= DstSize) {
llvm::Value *Casted =
CGF.Builder.CreateBitCast(DstPtr, llvm::PointerType::getUnqual(SrcTy));
- // FIXME: Use better alignment / avoid requiring aligned store.
- BuildAggStore(CGF, Src, Casted, DstIsVolatile, true);
+ BuildAggStore(CGF, Src, Casted, DstIsVolatile, DstAlign);
} else {
// Otherwise do coercion through memory. This is stupid, but
// simple.
@@ -1049,15 +1054,15 @@
//
// FIXME: Assert that we aren't truncating non-padding bits when have access
// to that information.
- llvm::Value *Tmp = CGF.CreateTempAlloca(SrcTy);
- CGF.Builder.CreateStore(Src, Tmp);
+ llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(SrcTy);
+ Tmp->setAlignment(DstAlign.getQuantity());
+ CGF.Builder.CreateAlignedStore(Src, Tmp, DstAlign.getQuantity());
llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
llvm::Value *DstCasted = CGF.Builder.CreateBitCast(DstPtr, I8PtrTy);
- // FIXME: Use better alignment.
CGF.Builder.CreateMemCpy(DstCasted, Casted,
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
- 1, false);
+ DstAlign.getQuantity(), false);
}
}
@@ -1996,13 +2001,15 @@
Alloca->setAlignment(AlignmentToUse);
llvm::Value *V = Alloca;
llvm::Value *Ptr = V; // Pointer to store into.
+ CharUnits PtrAlign = CharUnits::fromQuantity(AlignmentToUse);
// If the value is offset in memory, apply the offset now.
if (unsigned Offs = ArgI.getDirectOffset()) {
Ptr = Builder.CreateBitCast(Ptr, Builder.getInt8PtrTy());
Ptr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), Ptr, Offs);
Ptr = Builder.CreateBitCast(Ptr,
llvm::PointerType::getUnqual(ArgI.getCoerceToType()));
+ PtrAlign = PtrAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
}
// Fast-isel and the optimizer generally like scalar values better than
@@ -2047,7 +2054,7 @@
assert(NumIRArgs == 1);
auto AI = FnArgs[FirstIRArg];
AI->setName(Arg->getName() + ".coerce");
- CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this);
+ CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, PtrAlign, *this);
}
@@ -2415,15 +2422,17 @@
}
} else {
llvm::Value *V = ReturnValue;
+ CharUnits Align = getContext().getTypeAlignInChars(RetTy);
// If the value is offset in memory, apply the offset now.
if (unsigned Offs = RetAI.getDirectOffset()) {
V = Builder.CreateBitCast(V, Builder.getInt8PtrTy());
V = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), V, Offs);
V = Builder.CreateBitCast(V,
llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
+ Align = Align.alignmentAtOffset(CharUnits::fromQuantity(Offs));
}
- RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this);
+ RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), Align, *this);
}
// In ARC, end functions that return a retainable type with a call
@@ -3286,20 +3295,25 @@
// FIXME: Avoid the conversion through memory if possible.
llvm::Value *SrcPtr;
+ CharUnits SrcAlign;
if (RV.isScalar() || RV.isComplex()) {
SrcPtr = CreateMemTemp(I->Ty, "coerce");
+ SrcAlign = TypeAlign;
LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
- } else
+ } else {
SrcPtr = RV.getAggregateAddr();
+ // This alignment is guaranteed by EmitCallArg.
+ SrcAlign = TypeAlign;
+ }
// If the value is offset in memory, apply the offset now.
if (unsigned Offs = ArgInfo.getDirectOffset()) {
SrcPtr = Builder.CreateBitCast(SrcPtr, Builder.getInt8PtrTy());
SrcPtr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), SrcPtr, Offs);
SrcPtr = Builder.CreateBitCast(SrcPtr,
llvm::PointerType::getUnqual(ArgInfo.getCoerceToType()));
-
+ SrcAlign = SrcAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
}
// Fast-isel and the optimizer generally like scalar values better than
@@ -3338,7 +3352,8 @@
// In the simple case, just pass the coerced loaded value.
assert(NumIRArgs == 1);
IRCallArgs[FirstIRArg] =
- CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
+ CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
+ SrcAlign, *this);
}
break;
@@ -3535,12 +3550,13 @@
case TEK_Aggregate: {
llvm::Value *DestPtr = ReturnValue.getValue();
bool DestIsVolatile = ReturnValue.isVolatile();
+ CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy);
if (!DestPtr) {
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
DestIsVolatile = false;
}
- BuildAggStore(*this, CI, DestPtr, DestIsVolatile, false);
+ BuildAggStore(*this, CI, DestPtr, DestIsVolatile, DestAlign);
return RValue::getAggregate(DestPtr);
}
case TEK_Scalar: {
@@ -3557,22 +3573,26 @@
llvm::Value *DestPtr = ReturnValue.getValue();
bool DestIsVolatile = ReturnValue.isVolatile();
+ CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy);
if (!DestPtr) {
DestPtr = CreateMemTemp(RetTy, "coerce");
DestIsVolatile = false;
}
// If the value is offset in memory, apply the offset now.
llvm::Value *StorePtr = DestPtr;
+ CharUnits StoreAlign = DestAlign;
if (unsigned Offs = RetAI.getDirectOffset()) {
StorePtr = Builder.CreateBitCast(StorePtr, Builder.getInt8PtrTy());
StorePtr =
Builder.CreateConstGEP1_32(Builder.getInt8Ty(), StorePtr, Offs);
StorePtr = Builder.CreateBitCast(StorePtr,
llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
+ StoreAlign =
+ StoreAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
}
- CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+ CreateCoercedStore(CI, StorePtr, DestIsVolatile, StoreAlign, *this);
return convertTempToRValue(DestPtr, RetTy, SourceLocation());
}
Index: cfe/trunk/test/CodeGen/arm64-abi-vector.c
===================================================================
--- cfe/trunk/test/CodeGen/arm64-abi-vector.c
+++ cfe/trunk/test/CodeGen/arm64-abi-vector.c
@@ -309,7 +309,7 @@
// CHECK: args_vec_5c
// CHECK: [[C5:%.*]] = alloca <5 x i8>, align 8
// CHECK: [[TMP:%.*]] = bitcast <5 x i8>* [[C5]] to <2 x i32>*
-// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1
+// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8
double sum = fixed;
sum = sum + c5.x + c5.y;
return sum;
@@ -325,7 +325,7 @@
// CHECK: args_vec_9c
// CHECK: [[C9:%.*]] = alloca <9 x i8>, align 16
// CHECK: [[TMP:%.*]] = bitcast <9 x i8>* [[C9]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
double sum = fixed;
sum = sum + c9.x + c9.y;
return sum;
@@ -355,7 +355,7 @@
// CHECK: args_vec_3s
// CHECK: [[C3:%.*]] = alloca <3 x i16>, align 8
// CHECK: [[TMP:%.*]] = bitcast <3 x i16>* [[C3]] to <2 x i32>*
-// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 1
+// CHECK: store <2 x i32> {{%.*}}, <2 x i32>* [[TMP]], align 8
double sum = fixed;
sum = sum + c3.x + c3.y;
return sum;
@@ -371,7 +371,7 @@
// CHECK: args_vec_5s
// CHECK: [[C5:%.*]] = alloca <5 x i16>, align 16
// CHECK: [[TMP:%.*]] = bitcast <5 x i16>* [[C5]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
double sum = fixed;
sum = sum + c5.x + c5.y;
return sum;
@@ -387,7 +387,7 @@
// CHECK: args_vec_3i
// CHECK: [[C3:%.*]] = alloca <3 x i32>, align 16
// CHECK: [[TMP:%.*]] = bitcast <3 x i32>* [[C3]] to <4 x i32>*
-// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 1
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* [[TMP]], align 16
double sum = fixed;
sum = sum + c3.x + c3.y;
return sum;
Index: cfe/trunk/test/CodeGen/arm64-arguments.c
===================================================================
--- cfe/trunk/test/CodeGen/arm64-arguments.c
+++ cfe/trunk/test/CodeGen/arm64-arguments.c
@@ -219,8 +219,8 @@
// CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
// CHECK: %s1 = alloca %struct.s36, align 16
// CHECK: %s2 = alloca %struct.s36, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
// CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
// CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
// CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
@@ -275,8 +275,8 @@
// CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
// CHECK: %s1 = alloca %struct.s38, align 8
// CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8
// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
@@ -287,8 +287,8 @@
s38_no_align g38_2;
int caller38() {
// CHECK: define i32 @caller38()
-// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
// CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
return f38(3, g38, g38_2);
}
@@ -299,18 +299,18 @@
// CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
// CHECK: %s1 = alloca %struct.s38, align 8
// CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8
// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1
return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
}
int caller38_stack() {
// CHECK: define i32 @caller38_stack()
-// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
// CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
}
@@ -328,8 +328,8 @@
// CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
// CHECK: %s1 = alloca %struct.s39, align 16
// CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
@@ -340,8 +340,8 @@
s39_with_align g39_2;
int caller39() {
// CHECK: define i32 @caller39()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
// CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
return f39(3, g39, g39_2);
}
@@ -352,18 +352,18 @@
// CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
// CHECK: %s1 = alloca %struct.s39, align 16
// CHECK: %s2 = alloca %struct.s39, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
}
int caller39_stack() {
// CHECK: define i32 @caller39_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
// CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
}
@@ -383,8 +383,8 @@
// CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
// CHECK: %s1 = alloca %struct.s40, align 8
// CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8
// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
@@ -395,8 +395,8 @@
s40_no_align g40_2;
int caller40() {
// CHECK: define i32 @caller40()
-// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
// CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
return f40(3, g40, g40_2);
}
@@ -407,18 +407,18 @@
// CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
// CHECK: %s1 = alloca %struct.s40, align 8
// CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8
// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1
return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
}
int caller40_stack() {
// CHECK: define i32 @caller40_stack()
-// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
// CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
}
@@ -438,8 +438,8 @@
// CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
// CHECK: %s1 = alloca %struct.s41, align 16
// CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
@@ -450,8 +450,8 @@
s41_with_align g41_2;
int caller41() {
// CHECK: define i32 @caller41()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
// CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
return f41(3, g41, g41_2);
}
@@ -462,18 +462,18 @@
// CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
// CHECK: %s1 = alloca %struct.s41, align 16
// CHECK: %s2 = alloca %struct.s41, align 16
-// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
-// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
+// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
+// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
}
int caller41_stack() {
// CHECK: define i32 @caller41_stack()
-// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
// CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
}
Index: cfe/trunk/test/CodeGen/ppc64le-aggregates.c
===================================================================
--- cfe/trunk/test/CodeGen/ppc64le-aggregates.c
+++ cfe/trunk/test/CodeGen/ppc64le-aggregates.c
@@ -54,57 +54,57 @@
struct f2a2b func_f2a2b(struct f2a2b x) { return x; }
// CHECK-LABEL: @call_f1
-// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 4
// CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
struct f1 global_f1;
void call_f1(void) { global_f1 = func_f1(global_f1); }
// CHECK-LABEL: @call_f2
-// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 4
// CHECK: call [2 x float] @func_f2([2 x float] %[[TMP]])
struct f2 global_f2;
void call_f2(void) { global_f2 = func_f2(global_f2); }
// CHECK-LABEL: @call_f3
-// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 4
// CHECK: call [3 x float] @func_f3([3 x float] %[[TMP]])
struct f3 global_f3;
void call_f3(void) { global_f3 = func_f3(global_f3); }
// CHECK-LABEL: @call_f4
-// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 4
// CHECK: call [4 x float] @func_f4([4 x float] %[[TMP]])
struct f4 global_f4;
void call_f4(void) { global_f4 = func_f4(global_f4); }
// CHECK-LABEL: @call_f5
-// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 4
// CHECK: call [5 x float] @func_f5([5 x float] %[[TMP]])
struct f5 global_f5;
void call_f5(void) { global_f5 = func_f5(global_f5); }
// CHECK-LABEL: @call_f6
-// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 4
// CHECK: call [6 x float] @func_f6([6 x float] %[[TMP]])
struct f6 global_f6;
void call_f6(void) { global_f6 = func_f6(global_f6); }
// CHECK-LABEL: @call_f7
-// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 4
// CHECK: call [7 x float] @func_f7([7 x float] %[[TMP]])
struct f7 global_f7;
void call_f7(void) { global_f7 = func_f7(global_f7); }
// CHECK-LABEL: @call_f8
-// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 4
// CHECK: call [8 x float] @func_f8([8 x float] %[[TMP]])
struct f8 global_f8;
void call_f8(void) { global_f8 = func_f8(global_f8); }
// CHECK-LABEL: @call_f9
// CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
// CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 1, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
// CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
struct f9 global_f9;
Index: cfe/trunk/test/CodeGen/align-systemz.c
===================================================================
--- cfe/trunk/test/CodeGen/align-systemz.c
+++ cfe/trunk/test/CodeGen/align-systemz.c
@@ -25,3 +25,19 @@
s = es;
}
+
+// Alignment should be respected for coerced argument loads
+
+struct arg { long y __attribute__((packed, aligned(4))); };
+
+extern struct arg x;
+void f(struct arg);
+
+void test (void)
+{
+ f(x);
+}
+
+// CHECK-LABEL: @test
+// CHECK: load i64, i64* getelementptr inbounds (%struct.arg, %struct.arg* @x, i32 0, i32 0), align 4
+
Index: cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c
===================================================================
--- cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c
+++ cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c
@@ -14,15 +14,15 @@
// CHECK: %d = alloca %struct.s4, align 4
// CHECK: %e = alloca %struct.s5, align 8
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %a, i32 0, i32 0
-// CHECK: store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %b, i32 0, i32 0
-// CHECK: store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %d, i32 0, i32 0
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK: store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %e, i32 0, i32 0
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK: store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
// CHECK: ret void
void foo(void)
@@ -36,14 +36,14 @@
// CHECK-LABEL: define void @foo
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %p1, i32 0, i32 0
-// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %p2, i32 0, i32 0
-// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %p4, i32 0, i32 0
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %p5, i32 0, i32 0
// CHECK: %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK: %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
// CHECK: call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}})
// CHECK: ret void
Index: cfe/trunk/test/CodeGen/arm64-be-bitfield.c
===================================================================
--- cfe/trunk/test/CodeGen/arm64-be-bitfield.c
+++ cfe/trunk/test/CodeGen/arm64-be-bitfield.c
@@ -7,7 +7,7 @@
// Get the high 32-bits and then shift appropriately for big-endian.
signed callee_b0f(struct bt3 bp11) {
// IR: callee_b0f(i64 [[ARG:%.*]])
-// IR: store i64 [[ARG]], i64* [[PTR:%.*]]
+// IR: store i64 [[ARG]], i64* [[PTR:%.*]], align 8
// IR: [[BITCAST:%.*]] = bitcast i64* [[PTR]] to i8*
// IR: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* [[BITCAST]], i64 4
// ARM: asr x0, x0, #54
Index: cfe/trunk/test/CodeGenCXX/varargs.cpp
===================================================================
--- cfe/trunk/test/CodeGenCXX/varargs.cpp
+++ cfe/trunk/test/CodeGenCXX/varargs.cpp
@@ -37,7 +37,7 @@
// CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[X]] to i8*
// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 8, i32 4, i1 false)
// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[TMP]] to i64*
- // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 1
+ // CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 4
// CHECK-NEXT: call void (...) @_ZN5test13fooEz(i64 [[T1]])
// CHECK-NEXT: ret void
}
Index: cfe/trunk/test/CodeGenCXX/2012-03-16-StoreAlign.cpp
===================================================================
--- cfe/trunk/test/CodeGenCXX/2012-03-16-StoreAlign.cpp
+++ cfe/trunk/test/CodeGenCXX/2012-03-16-StoreAlign.cpp
@@ -28,7 +28,7 @@
};
// CHECK: @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth = linkonce_odr global %struct.Length zeroinitializer, align 4
-// CHECK: store float %{{.*}}, float* getelementptr inbounds (%struct.Length, %struct.Length* @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth, i32 0, i32 0), align 1
+// CHECK: store float %{{.*}}, float* getelementptr inbounds (%struct.Length, %struct.Length* @_ZZN3Foo19getPageSizeFromNameERK6LengthE10legalWidth, i32 0, i32 0), align 4
bool bar(Length &b) {
Foo f;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits