| This patch enforces SuitableAlign’s alignment when loading objects with more relaxed alignment. Currently, SuitableAlign is used in a warning when type is over aligned. This patch optionally enforces this in IRGen. I defined a new field in TargetInfo which is optionally set to SuitableAlign. Currently, this is only defined for Apple’s targets as I don’t know its implication for all other supported targets. Long term, we want to use one value for both (Sema and code gen). This is rdar://16254558 |
Index: include/clang/Basic/TargetInfo.h
===================================================================
--- include/clang/Basic/TargetInfo.h (revision 214123)
+++ include/clang/Basic/TargetInfo.h (working copy)
@@ -66,6 +66,7 @@
unsigned char LongWidth, LongAlign;
unsigned char LongLongWidth, LongLongAlign;
unsigned char SuitableAlign;
+ unsigned char CompleteObjectAlign;
unsigned char MinGlobalAlign;
unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
unsigned short MaxVectorAlign;
@@ -313,6 +314,14 @@
/// \brief Return the alignment that is suitable for storing any
/// object with a fundamental alignment requirement.
unsigned getSuitableAlign() const { return SuitableAlign; }
+
+ /// \brief Return the alignment that is suitable for storing any
+ /// complete object with a fundamental alignment requirement. Ideally,
+ /// this should be replaced with SuitableAlign (which share identical
+ /// purpose; latter is used in Sema warning and former in
+ /// IRGen). But, currently they are distinct because of unknown implication
+ /// of alignment change on non-darwin targets.
+ unsigned getCompleteObjectAlign() const { return CompleteObjectAlign; }
/// getMinGlobalAlign - Return the minimum alignment of a global variable,
/// unless its alignment is explicitly reduced via attributes.
Index: lib/Basic/TargetInfo.cpp
===================================================================
--- lib/Basic/TargetInfo.cpp (revision 214123)
+++ lib/Basic/TargetInfo.cpp (working copy)
@@ -36,6 +36,7 @@
LongWidth = LongAlign = 32;
LongLongWidth = LongLongAlign = 64;
SuitableAlign = 64;
+ CompleteObjectAlign = 0;
MinGlobalAlign = 0;
HalfWidth = 16;
HalfAlign = 16;
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp (revision 214123)
+++ lib/Basic/Targets.cpp (working copy)
@@ -3109,6 +3109,7 @@
LongDoubleWidth = 128;
LongDoubleAlign = 128;
SuitableAlign = 128;
+ CompleteObjectAlign = SuitableAlign;
MaxVectorAlign = 256;
SizeType = UnsignedLong;
IntPtrType = SignedLong;
@@ -3428,6 +3429,7 @@
: DarwinTargetInfo<X86_64TargetInfo>(Triple) {
Int64Type = SignedLongLong;
MaxVectorAlign = 256;
+ CompleteObjectAlign = SuitableAlign;
// The 64-bit iOS simulator uses the builtin bool type for Objective-C.
llvm::Triple T = llvm::Triple(Triple);
if (T.getOS() == llvm::Triple::IOS)
@@ -3544,6 +3546,9 @@
DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 64;
const llvm::Triple &T = getTriple();
+ if (T.isOSDarwin())
+ CompleteObjectAlign = SuitableAlign;
+
// size_t is unsigned long on Darwin and NetBSD.
if (T.isOSDarwin() || T.getOS() == llvm::Triple::NetBSD)
SizeType = UnsignedLong;
@@ -3615,6 +3620,8 @@
IsAAPCS = false;
DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32;
+ if (T.isOSDarwin())
+ CompleteObjectAlign = SuitableAlign;
// size_t is unsigned int on FreeBSD.
if (T.getOS() == llvm::Triple::FreeBSD)
@@ -4361,8 +4368,12 @@
RegParmMax = 8;
MaxAtomicInlineWidth = 128;
MaxAtomicPromoteWidth = 128;
-
+
LongDoubleWidth = LongDoubleAlign = 128;
+ SuitableAlign = 128;
+ if (getTriple().getOS() == llvm::Triple::IOS)
+ CompleteObjectAlign = SuitableAlign;
+
LongDoubleFormat = &llvm::APFloat::IEEEquad;
// {} in inline assembly are neon specifiers, not assembly variant
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h (revision 214123)
+++ lib/CodeGen/CodeGenFunction.h (working copy)
@@ -1397,10 +1397,32 @@
CGM.getTBAAInfo(T));
}
+ static bool RestrictedCompleteObjectAlign(QualType T) {
+ if (const TypedefType *TD = dyn_cast<TypedefType>(T.getTypePtr())) {
+ if (TypedefNameDecl *Typedef = TD->getDecl())
+ return !Typedef->hasAttr<AlignedAttr>();
+ return true;
+ }
+ // Assume elaborated types (struct, etc.) enforce their own alignment
rules.
+ return !isa<ElaboratedType>(T.getTypePtr());
+ }
+
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
CharUnits Alignment;
- if (!T->isIncompleteType())
+ if (!T->isIncompleteType()) {
Alignment = getContext().getTypeAlignInChars(T);
+ // For targets with more restrictive alignment for complet objects,
+ // use the smaller of two alignments (unless type has specified its
+ // own alignment via aligned attribute).
+ unsigned CompleteObjectAlign =
+ getContext().getTargetInfo().getCompleteObjectAlign();
+ if (CompleteObjectAlign && RestrictedCompleteObjectAlign(T)) {
+ CompleteObjectAlign /= getContext().getCharWidth();
+ Alignment = CharUnits::fromQuantity(
+ std::min(unsigned(Alignment.getQuantity()),
+ unsigned(CompleteObjectAlign)));
+ }
+ }
return LValue::MakeAddr(V, T, Alignment, getContext(),
CGM.getTBAAInfo(T));
}
Index: test/CodeGen/arm-arguments.c
===================================================================
--- test/CodeGen/arm-arguments.c (revision 214123)
+++ test/CodeGen/arm-arguments.c (working copy)
@@ -215,11 +215,11 @@
// APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
// APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// APCS-GNU: load <4 x float>* %[[d]], align 16
+// APCS-GNU: load <4 x float>* %[[d]], align 4
// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval
align 16, %struct.s35* byval align 16)
// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// AAPCS: load <4 x float>* %[[d]], align 16
+// AAPCS: load <4 x float>* %[[d]], align 8
Index: test/CodeGenCXX/align-avx-complete-objects.cpp
===================================================================
--- test/CodeGenCXX/align-avx-complete-objects.cpp (revision 0)
+++ test/CodeGenCXX/align-avx-complete-objects.cpp (working copy)
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -x c++ %s -O0 -triple=x86_64-apple-darwin -target-feature
+avx2 -emit-llvm -o - -Werror | FileCheck %s
+// rdar://16254558
+
+typedef float AVX2Float __attribute__((__vector_size__(32)));
+
+
+volatile float TestAlign(void)
+{
+ volatile AVX2Float *p = new AVX2Float;
+ *p = *p;
+ AVX2Float r = *p;
+ return r[0];
+}
+
+// CHECK: [[R:%.*]] = alloca <8 x float>, align 32
+// CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @_Znwm(i64 32)
+// CHECK-NEXT: [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>*
+// CHECK-NEXT: store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8
+// CHECK-NEXT: [[ONE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT: [[TWO:%.*]] = load volatile <8 x float>* [[ONE]], align 16
+// CHECK-NEXT: [[THREE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT: store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]],
align 16
+// CHECK-NEXT: [[FOUR:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT: [[FIVE:%.*]] = load volatile <8 x float>* [[FOUR]], align 16
+// CHECK-NEXT: store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32
+// CHECK-NEXT: [[SIX:%.*]] = load <8 x float>* [[R]], align 32
+// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0
+// CHECK-NEXT: ret float [[VECEXT]]
- Fariborz |
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
