This patch enforces SuitableAlign’s alignment when loading objects with more relaxed alignment. Currently, SuitableAlign is used in a
warning when type is over aligned. This patch optionally enforces this in IRGen. I defined a new field in TargetInfo which is optionally set to
SuitableAlign. Currently, this is only defined for Apple’s targets as I don’t know its implication for all other supported targets. Long term,
we want to use one value for both (Sema and code gen). This is rdar://16254558

Index: include/clang/Basic/TargetInfo.h
===================================================================
--- include/clang/Basic/TargetInfo.h    (revision 214123)
+++ include/clang/Basic/TargetInfo.h    (working copy)
@@ -66,6 +66,7 @@
   unsigned char LongWidth, LongAlign;
   unsigned char LongLongWidth, LongLongAlign;
   unsigned char SuitableAlign;
+  unsigned char CompleteObjectAlign;
   unsigned char MinGlobalAlign;
   unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
   unsigned short MaxVectorAlign;
@@ -313,6 +314,14 @@
   /// \brief Return the alignment that is suitable for storing any
   /// object with a fundamental alignment requirement.
   unsigned getSuitableAlign() const { return SuitableAlign; }
+  
+  /// \brief Return the alignment that is suitable for storing any
+  /// complete object with a fundamental alignment requirement. Ideally,
+  /// this should be replaced with SuitableAlign (which share identical
+  /// purpose; latter is used in Sema warning and former in
+  /// IRGen). But, currently they are distinct because of unknown implication
+  /// of alignment change on non-darwin targets.
+  unsigned getCompleteObjectAlign() const { return CompleteObjectAlign; }
 
   /// getMinGlobalAlign - Return the minimum alignment of a global variable,
   /// unless its alignment is explicitly reduced via attributes.
Index: lib/Basic/TargetInfo.cpp
===================================================================
--- lib/Basic/TargetInfo.cpp    (revision 214123)
+++ lib/Basic/TargetInfo.cpp    (working copy)
@@ -36,6 +36,7 @@
   LongWidth = LongAlign = 32;
   LongLongWidth = LongLongAlign = 64;
   SuitableAlign = 64;
+  CompleteObjectAlign = 0;
   MinGlobalAlign = 0;
   HalfWidth = 16;
   HalfAlign = 16;
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp       (revision 214123)
+++ lib/Basic/Targets.cpp       (working copy)
@@ -3109,6 +3109,7 @@
     LongDoubleWidth = 128;
     LongDoubleAlign = 128;
     SuitableAlign = 128;
+    CompleteObjectAlign = SuitableAlign;
     MaxVectorAlign = 256;
     SizeType = UnsignedLong;
     IntPtrType = SignedLong;
@@ -3428,6 +3429,7 @@
       : DarwinTargetInfo<X86_64TargetInfo>(Triple) {
     Int64Type = SignedLongLong;
     MaxVectorAlign = 256;
+    CompleteObjectAlign = SuitableAlign;
     // The 64-bit iOS simulator uses the builtin bool type for Objective-C.
     llvm::Triple T = llvm::Triple(Triple);
     if (T.getOS() == llvm::Triple::IOS)
@@ -3544,6 +3546,9 @@
     DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 64;
     const llvm::Triple &T = getTriple();
 
+    if (T.isOSDarwin())
+      CompleteObjectAlign = SuitableAlign;
+    
     // size_t is unsigned long on Darwin and NetBSD.
     if (T.isOSDarwin() || T.getOS() == llvm::Triple::NetBSD)
       SizeType = UnsignedLong;
@@ -3615,6 +3620,8 @@
     IsAAPCS = false;
 
     DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32;
+    if (T.isOSDarwin())
+      CompleteObjectAlign = SuitableAlign;
 
     // size_t is unsigned int on FreeBSD.
     if (T.getOS() == llvm::Triple::FreeBSD)
@@ -4361,8 +4368,12 @@
     RegParmMax = 8;
     MaxAtomicInlineWidth = 128;
     MaxAtomicPromoteWidth = 128;
-
+        
     LongDoubleWidth = LongDoubleAlign = 128;
+    SuitableAlign = 128;
+    if (getTriple().getOS() == llvm::Triple::IOS)
+      CompleteObjectAlign = SuitableAlign;
+      
     LongDoubleFormat = &llvm::APFloat::IEEEquad;
 
     // {} in inline assembly are neon specifiers, not assembly variant
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h       (revision 214123)
+++ lib/CodeGen/CodeGenFunction.h       (working copy)
@@ -1397,10 +1397,32 @@
                             CGM.getTBAAInfo(T));
   }
 
+  static bool RestrictedCompleteObjectAlign(QualType T) {
+    if (const TypedefType *TD = dyn_cast<TypedefType>(T.getTypePtr())) {
+      if (TypedefNameDecl *Typedef = TD->getDecl())
+        return !Typedef->hasAttr<AlignedAttr>();
+      return true;
+    }
+    // Assume elaborated types (struct, etc.) enforce their own alignment 
rules.
+    return !isa<ElaboratedType>(T.getTypePtr());
+  }
+  
   LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
     CharUnits Alignment;
-    if (!T->isIncompleteType())
+    if (!T->isIncompleteType()) {
       Alignment = getContext().getTypeAlignInChars(T);
+      // For targets with more restrictive alignment for complet objects,
+      // use the smaller of two alignments (unless type has specified its
+      // own alignment via aligned attribute).
+      unsigned CompleteObjectAlign =
+        getContext().getTargetInfo().getCompleteObjectAlign();
+      if (CompleteObjectAlign && RestrictedCompleteObjectAlign(T)) {
+        CompleteObjectAlign /= getContext().getCharWidth();
+        Alignment = CharUnits::fromQuantity(
+                      std::min(unsigned(Alignment.getQuantity()),
+                               unsigned(CompleteObjectAlign)));
+      }
+    }
     return LValue::MakeAddr(V, T, Alignment, getContext(),
                             CGM.getTBAAInfo(T));
   }
Index: test/CodeGen/arm-arguments.c
===================================================================
--- test/CodeGen/arm-arguments.c        (revision 214123)
+++ test/CodeGen/arm-arguments.c        (working copy)
@@ -215,11 +215,11 @@
 // APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
 // APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
 // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// APCS-GNU: load <4 x float>* %[[d]], align 16
+// APCS-GNU: load <4 x float>* %[[d]], align 4
 // AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval 
align 16, %struct.s35* byval align 16)
 // AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
 // AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
 // AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
 // AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// AAPCS: load <4 x float>* %[[d]], align 16
+// AAPCS: load <4 x float>* %[[d]], align 8
Index: test/CodeGenCXX/align-avx-complete-objects.cpp
===================================================================
--- test/CodeGenCXX/align-avx-complete-objects.cpp      (revision 0)
+++ test/CodeGenCXX/align-avx-complete-objects.cpp      (working copy)
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -x c++ %s -O0 -triple=x86_64-apple-darwin -target-feature 
+avx2 -emit-llvm -o - -Werror | FileCheck %s
+// rdar://16254558
+
+typedef float AVX2Float __attribute__((__vector_size__(32)));
+
+
+volatile float TestAlign(void)
+{
+       volatile AVX2Float *p = new AVX2Float;
+        *p = *p;
+        AVX2Float r = *p;
+        return r[0];
+}
+
+// CHECK: [[R:%.*]] = alloca <8 x float>, align 32
+// CHECK-NEXT:  [[CALL:%.*]] = call noalias i8* @_Znwm(i64 32)
+// CHECK-NEXT:  [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>*
+// CHECK-NEXT:  store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8
+// CHECK-NEXT:  [[ONE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  [[TWO:%.*]] = load volatile <8 x float>* [[ONE]], align 16
+// CHECK-NEXT:  [[THREE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]], 
align 16
+// CHECK-NEXT:  [[FOUR:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  [[FIVE:%.*]] = load volatile <8 x float>* [[FOUR]], align 16
+// CHECK-NEXT:  store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32
+// CHECK-NEXT:  [[SIX:%.*]] = load <8 x float>* [[R]], align 32
+// CHECK-NEXT:  [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0
+// CHECK-NEXT:  ret float [[VECEXT]]

- Fariborz


_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to