rsmith created this revision.
rsmith added a reviewer: rjmccall.
Herald added subscribers: cfe-commits, sanjoy.

When initializing a base class, we might accidentally overwrite a virtual base 
in its tail padding in some situations (in particular, when emitting a trivial 
copy constructor as a memcpy, we copy the full object size rather than only the 
dsize):

  struct A { char c; A(const A&) : c(1) {} };
  struct B { int n; char c[3]; ~B(); };
  struct C : B, virtual A {};
  C f(C c) { return c; }

(Here, Clang emits an 8-byte memcpy for the `B` base in `f`, overwriting the 
already-initialized `A` vbase.)

It's easy enough to prevent that from happening entirely, but we would still 
like to copy the padded-to-alignment size whenever we can (for example, see the 
tests in `test/CodeGenCXX/assign-construct-memcpy.cpp`). This patch adds a flag 
to `AggValueSlot` to indicate whether the tail padding of the aggregate might 
overlap some other object, and propagate that flag far enough that we can see 
it when emitting a trivial copy ctor call. When initializing a base class, we 
treat it as potentially-overlapping only if the base class lies partly outside 
the nvsize of the derived class -- otherwise, because we know non-virtual 
subobjects are initialized in address order, an overlarge store is valid.

This patch also lays some groundwork for P0840 <http://wg21.link/p0840>, which 
permits packing into the tail padding for fields marked with a new attribute, 
wherein the same situation can arise (a vbase can be constructed in the tail 
padding of the last field if it has the attribute):

  struct A { char c; A(const A&) : c(1) {} };
  struct B { int n; char c[3]; ~B(); };
  struct C : virtual A { [[no_unique_address]] B b; };
  C f(C c) { return c; }

(Copying a `C` object must not perform an 8-byte copy for the `B` subobject, 
even though it's a most-derived object, because its tail padding contains an 
`A`.)

It also prepares us for the possibility that WG21 might decide that guaranteed 
copy elision is supposed to apply to base class initialization (for classes 
without virtual bases), which would mean that we cannot assume that the tail 
padding of the return slot of a function has no overlap with other objects:

  struct A { char c; };
  struct B { int n; char c[3]; ~B(); };
  B f() { return /*...*/ }
  struct C : B, virtual A {
    C() : A{1}, B(f()) {}
  };

If `B(f())` is not permitted to make a copy, then `f()` cannot trample on its 
return slot's tail padding (which in this example might contain an 
already-initialized `A` object).


Repository:
  rC Clang

https://reviews.llvm.org/D45306

Files:
  CodeGen/CGAtomic.cpp
  CodeGen/CGBlocks.cpp
  CodeGen/CGCall.cpp
  CodeGen/CGClass.cpp
  CodeGen/CGDecl.cpp
  CodeGen/CGDeclCXX.cpp
  CodeGen/CGExpr.cpp
  CodeGen/CGExprAgg.cpp
  CodeGen/CGExprCXX.cpp
  CodeGen/CGObjC.cpp
  CodeGen/CGOpenMPRuntime.cpp
  CodeGen/CGStmt.cpp
  CodeGen/CGValue.h
  CodeGen/CodeGenFunction.h
  CodeGen/ItaniumCXXABI.cpp
  CodeGenCXX/tail-padding.cpp

Index: CodeGenCXX/tail-padding.cpp
===================================================================
--- /dev/null
+++ CodeGenCXX/tail-padding.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
+
+// PR36992
+namespace Implicit {
+  struct A { char c; A(const A&); };
+  struct B { int n; char c[3]; ~B(); };
+  struct C : B, virtual A {};
+  static_assert(sizeof(C) == sizeof(void*) + 8);
+  C f(C c) { return c; }
+
+  // CHECK: define {{.*}} @_ZN8Implicit1CC1EOS0_
+  // CHECK: call void @_ZN8Implicit1AC2ERKS0_(
+  // Note: this must memcpy 7 bytes, not 8, to avoid trampling over the virtual base class.
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 7, i1 false)
+  // CHECK: store i32 {{.*}} @_ZTVN8Implicit1CE
+}
+
+namespace InitWithinNVSize {
+  // This is the same as the previous test, except that the A base lies
+  // entirely within the nvsize of C. This makes it valid to copy at the
+  // full width.
+  struct A { char c; A(const A&); };
+  struct B { int n; char c[3]; ~B(); };
+  struct C : B, virtual A { char x; };
+  static_assert(sizeof(C) > sizeof(void*) + 8);
+  C f(C c) { return c; }
+
+  // CHECK: define {{.*}} @_ZN16InitWithinNVSize1CC1EOS0_
+  // CHECK: call void @_ZN16InitWithinNVSize1AC2ERKS0_(
+  // This copies over the 'C::x' member, but that's OK because we've not initialized it yet.
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  // CHECK: store i32 {{.*}} @_ZTVN16InitWithinNVSize1CE
+  // CHECK: store i8
+}
Index: CodeGen/ItaniumCXXABI.cpp
===================================================================
--- CodeGen/ItaniumCXXABI.cpp
+++ CodeGen/ItaniumCXXABI.cpp
@@ -3896,7 +3896,7 @@
                         caughtExnAlignment);
     LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
     LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
-    CGF.EmitAggregateCopy(Dest, Src, CatchType);
+    CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap);
     return;
   }
 
@@ -3923,7 +3923,8 @@
                   AggValueSlot::forAddr(ParamAddr, Qualifiers(),
                                         AggValueSlot::IsNotDestructed,
                                         AggValueSlot::DoesNotNeedGCBarriers,
-                                        AggValueSlot::IsNotAliased));
+                                        AggValueSlot::IsNotAliased,
+                                        AggValueSlot::DoesNotOverlap));
 
   // Leave the terminate scope.
   CGF.EHStack.popTerminate();
Index: CodeGen/CodeGenFunction.h
===================================================================
--- CodeGen/CodeGenFunction.h
+++ CodeGen/CodeGenFunction.h
@@ -2060,7 +2060,8 @@
                                  T.getQualifiers(),
                                  AggValueSlot::IsNotDestructed,
                                  AggValueSlot::DoesNotNeedGCBarriers,
-                                 AggValueSlot::IsNotAliased);
+                                 AggValueSlot::IsNotAliased,
+                                 AggValueSlot::DoesNotOverlap);
   }
 
   /// Emit a cast to void* in the appropriate address space.
@@ -2117,28 +2118,52 @@
     }
     return false;
   }
-  /// EmitAggregateCopy - Emit an aggregate assignment.
-  ///
-  /// The difference to EmitAggregateCopy is that tail padding is not copied.
-  /// This is required for correctness when assigning non-POD structures in C++.
+
+  /// Determine whether a return value slot may overlap some other object.
+  AggValueSlot::Overlap_t overlapForReturnValue() {
+    // FIXME: Assuming no overlap here breaks guaranteed copy elision for base
+    // class subobjects. These cases may need to be revisited depending on the
+    // resolution of the relevant core issue.
+    return AggValueSlot::DoesNotOverlap;
+  }
+
+  /// Determine whether a field initialization may overlap some other object.
+  AggValueSlot::Overlap_t overlapForFieldInit(FieldDecl *FD) {
+    // FIXME: These cases can result in overlap as a result of P0840R0's
+    // [[no_unique_address]] attribute. We can still infer NoOverlap in the
+    // presence of that attribute if the field is within the nvsize of its
+    // containing class, because non-virtual subobjects are initialized in
+    // address order.
+    return AggValueSlot::DoesNotOverlap;
+  }
+
+  /// Determine whether a base class initialization may overlap some other
+  /// object.
+  AggValueSlot::Overlap_t overlapForBaseInit(const CXXRecordDecl *RD,
+                                             const CXXRecordDecl *BaseRD,
+                                             bool IsVirtual);
+
+  /// Emit an aggregate assignment.
   void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) {
     bool IsVolatile = hasVolatileMember(EltTy);
-    EmitAggregateCopy(Dest, Src, EltTy, IsVolatile, /* isAssignment= */ true);
+    EmitAggregateCopy(Dest, Src, EltTy, AggValueSlot::MayOverlap, IsVolatile);
   }
 
-  void EmitAggregateCopyCtor(LValue Dest, LValue Src) {
-    EmitAggregateCopy(Dest, Src, Src.getType(),
-                      /* IsVolatile= */ false, /* IsAssignment= */ false);
+  void EmitAggregateCopyCtor(LValue Dest, LValue Src,
+                             AggValueSlot::Overlap_t MayOverlap) {
+    EmitAggregateCopy(Dest, Src, Src.getType(), MayOverlap);
   }
 
   /// EmitAggregateCopy - Emit an aggregate copy.
   ///
-  /// \param isVolatile - True iff either the source or the destination is
-  /// volatile.
-  /// \param isAssignment - If false, allow padding to be copied.  This often
-  /// yields more efficient.
+  /// \param isVolatile \c true iff either the source or the destination is
+  ///        volatile.
+  /// \param MayOverlap Whether the tail padding of the destination might be
+  ///        occupied by some other object. More efficient code can often be
+  ///        generated if not.
   void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy,
-                         bool isVolatile = false, bool isAssignment = false);
+                         AggValueSlot::Overlap_t MayOverlap,
+                         bool isVolatile = false);
 
   /// GetAddrOfLocalVar - Return the address of a local variable.
   Address GetAddrOfLocalVar(const VarDecl *VD) {
@@ -2302,11 +2327,13 @@
 
   void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
                               bool ForVirtualBase, bool Delegating,
-                              Address This, const CXXConstructExpr *E);
+                              Address This, const CXXConstructExpr *E,
+                              AggValueSlot::Overlap_t Overlap);
 
   void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
                               bool ForVirtualBase, bool Delegating,
-                              Address This, CallArgList &Args);
+                              Address This, CallArgList &Args,
+                              AggValueSlot::Overlap_t Overlap);
 
   /// Emit assumption load for all bases. Requires to be be called only on
   /// most-derived class and not under construction of the object.
Index: CodeGen/CGValue.h
===================================================================
--- CodeGen/CGValue.h
+++ CodeGen/CGValue.h
@@ -472,17 +472,25 @@
   /// evaluating an expression which constructs such an object.
   bool AliasedFlag : 1;
 
+  /// This is set to true if the tail padding of this slot might overlap 
+  /// another object that may have already been initialized (and whose
+  /// value must be preserved by this initialization). If so, we may only
+  /// store up to the dsize of the type. Otherwise we can widen stores to
+  /// the size of the type.
+  bool OverlapFlag : 1;
+
 public:
   enum IsAliased_t { IsNotAliased, IsAliased };
   enum IsDestructed_t { IsNotDestructed, IsDestructed };
   enum IsZeroed_t { IsNotZeroed, IsZeroed };
+  enum Overlap_t { DoesNotOverlap, MayOverlap };
   enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
 
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
   static AggValueSlot ignored() {
     return forAddr(Address::invalid(), Qualifiers(), IsNotDestructed,
-                   DoesNotNeedGCBarriers, IsNotAliased);
+                   DoesNotNeedGCBarriers, IsNotAliased, DoesNotOverlap);
   }
 
   /// forAddr - Make a slot for an aggregate value.
@@ -500,6 +508,7 @@
                               IsDestructed_t isDestructed,
                               NeedsGCBarriers_t needsGC,
                               IsAliased_t isAliased,
+                              Overlap_t mayOverlap,
                               IsZeroed_t isZeroed = IsNotZeroed) {
     AggValueSlot AV;
     if (addr.isValid()) {
@@ -514,16 +523,18 @@
     AV.ObjCGCFlag = needsGC;
     AV.ZeroedFlag = isZeroed;
     AV.AliasedFlag = isAliased;
+    AV.OverlapFlag = mayOverlap;
     return AV;
   }
 
   static AggValueSlot forLValue(const LValue &LV,
                                 IsDestructed_t isDestructed,
                                 NeedsGCBarriers_t needsGC,
                                 IsAliased_t isAliased,
+                                Overlap_t mayOverlap,
                                 IsZeroed_t isZeroed = IsNotZeroed) {
-    return forAddr(LV.getAddress(),
-                   LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed);
+    return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC,
+                   isAliased, mayOverlap, isZeroed);
   }
 
   IsDestructed_t isExternallyDestructed() const {
@@ -571,6 +582,10 @@
     return IsAliased_t(AliasedFlag);
   }
 
+  Overlap_t mayOverlap() const {
+    return Overlap_t(OverlapFlag);
+  }
+
   RValue asRValue() const {
     if (isIgnored()) {
       return RValue::getIgnored();
@@ -583,6 +598,14 @@
   IsZeroed_t isZeroed() const {
     return IsZeroed_t(ZeroedFlag);
   }
+
+  /// Get the preferred size to use when storing a value to this slot. This
+  /// is the type size unless that might overlap another object, in which
+  /// case it's the dsize.
+  CharUnits getPreferredSize(ASTContext &Ctx, QualType Type) const {
+    return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).first
+                        : Ctx.getTypeSizeInChars(Type);
+  }
 };
 
 }  // end namespace CodeGen
Index: CodeGen/CGStmt.cpp
===================================================================
--- CodeGen/CGStmt.cpp
+++ CodeGen/CGStmt.cpp
@@ -1007,7 +1007,7 @@
   } else if (RV.isAggregate()) {
     LValue Dest = MakeAddrLValue(ReturnValue, Ty);
     LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty);
-    EmitAggregateCopy(Dest, Src, Ty);
+    EmitAggregateCopy(Dest, Src, Ty, overlapForReturnValue());
   } else {
     EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty),
                        /*init*/ true);
@@ -1085,11 +1085,12 @@
                                 /*isInit*/ true);
       break;
     case TEK_Aggregate:
-      EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue,
-                                            Qualifiers(),
-                                            AggValueSlot::IsDestructed,
-                                            AggValueSlot::DoesNotNeedGCBarriers,
-                                            AggValueSlot::IsNotAliased));
+      EmitAggExpr(RV, AggValueSlot::forAddr(
+                          ReturnValue, Qualifiers(),
+                          AggValueSlot::IsDestructed,
+                          AggValueSlot::DoesNotNeedGCBarriers,
+                          AggValueSlot::IsNotAliased,
+                          overlapForReturnValue()));
       break;
     }
   }
Index: CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- CodeGen/CGOpenMPRuntime.cpp
+++ CodeGen/CGOpenMPRuntime.cpp
@@ -4793,7 +4793,7 @@
                 CGF.getNaturalTypeAlignment(SharedsTy));
     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
-    CGF.EmitAggregateCopy(Dest, Src, SharedsTy);
+    CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
   }
   // Emit initial values for private copies (if any).
   TaskResultTy Result;
Index: CodeGen/CGObjC.cpp
===================================================================
--- CodeGen/CGObjC.cpp
+++ CodeGen/CGObjC.cpp
@@ -1013,8 +1013,9 @@
       // that's not necessarily the same as "on the stack", so
       // we still potentially need objc_memmove_collectable.
       EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType),
-                        /* Src= */ LV, ivarType);
-      return; }
+                        /* Src= */ LV, ivarType, overlapForReturnValue());
+      return;
+    }
     case TEK_Scalar: {
       llvm::Value *value;
       if (propType->isReferenceType()) {
@@ -1439,7 +1440,8 @@
       EmitAggExpr(IvarInit->getInit(),
                   AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed,
                                           AggValueSlot::DoesNotNeedGCBarriers,
-                                          AggValueSlot::IsNotAliased));
+                                          AggValueSlot::IsNotAliased,
+                                          AggValueSlot::DoesNotOverlap));
     }
     // constructor returns 'self'.
     CodeGenTypes &Types = CGM.getTypes();
@@ -3381,7 +3383,8 @@
                                     Qualifiers(),
                                     AggValueSlot::IsDestructed,
                                     AggValueSlot::DoesNotNeedGCBarriers,
-                                    AggValueSlot::IsNotAliased));
+                                    AggValueSlot::IsNotAliased,
+                                    AggValueSlot::DoesNotOverlap));
   
   FinishFunction();
   HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
Index: CodeGen/CGExprCXX.cpp
===================================================================
--- CodeGen/CGExprCXX.cpp
+++ CodeGen/CGExprCXX.cpp
@@ -279,7 +279,10 @@
         const Expr *Arg = *CE->arg_begin();
         LValue RHS = EmitLValue(Arg);
         LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType());
-        EmitAggregateCopy(Dest, RHS, Arg->getType());
+        // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's
+        // constructing a new complete object of type Ctor.
+        EmitAggregateCopy(Dest, RHS, Arg->getType(),
+                          AggValueSlot::DoesNotOverlap);
         return RValue::get(This.getPointer());
       }
       llvm_unreachable("unknown trivial member function");
@@ -631,7 +634,7 @@
     
     // Call the constructor.
     EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating,
-                           Dest.getAddress(), E);
+                           Dest.getAddress(), E, Dest.mayOverlap());
   }
 }
 
@@ -933,7 +936,8 @@
 }
 
 static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
-                                    QualType AllocType, Address NewPtr) {
+                                    QualType AllocType, Address NewPtr,
+                                    AggValueSlot::Overlap_t MayOverlap) {
   // FIXME: Refactor with EmitExprAsInit.
   switch (CGF.getEvaluationKind(AllocType)) {
   case TEK_Scalar:
@@ -949,7 +953,8 @@
       = AggValueSlot::forAddr(NewPtr, AllocType.getQualifiers(),
                               AggValueSlot::IsDestructed,
                               AggValueSlot::DoesNotNeedGCBarriers,
-                              AggValueSlot::IsNotAliased);
+                              AggValueSlot::IsNotAliased,
+                              MayOverlap);
     CGF.EmitAggExpr(Init, Slot);
     return;
   }
@@ -1018,7 +1023,8 @@
           AggValueSlot::forAddr(CurPtr, ElementType.getQualifiers(),
                                 AggValueSlot::IsDestructed,
                                 AggValueSlot::DoesNotNeedGCBarriers,
-                                AggValueSlot::IsNotAliased);
+                                AggValueSlot::IsNotAliased,
+                                AggValueSlot::DoesNotOverlap);
       EmitAggExpr(ILE->getInit(0), Slot);
 
       // Move past these elements.
@@ -1083,7 +1089,8 @@
       // an array, and we have an array filler, we can fold together the two
       // initialization loops.
       StoreAnyExprIntoOneUnit(*this, ILE->getInit(i),
-                              ILE->getInit(i)->getType(), CurPtr);
+                              ILE->getInit(i)->getType(), CurPtr,
+                              AggValueSlot::DoesNotOverlap);
       CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getPointer(),
                                                  Builder.getSize(1),
                                                  "array.exp.next"),
@@ -1236,7 +1243,8 @@
   }
 
   // Emit the initializer into this element.
-  StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr);
+  StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr,
+                          AggValueSlot::DoesNotOverlap);
 
   // Leave the Cleanup if we entered one.
   if (CleanupDominator) {
@@ -1267,7 +1275,8 @@
     CGF.EmitNewArrayInitializer(E, ElementType, ElementTy, NewPtr, NumElements,
                                 AllocSizeWithoutCookie);
   else if (const Expr *Init = E->getInitializer())
-    StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr);
+    StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr,
+                            AggValueSlot::DoesNotOverlap);
 }
 
 /// Emit a call to an operator new or operator delete function, as implicitly
Index: CodeGen/CGExprAgg.cpp
===================================================================
--- CodeGen/CGExprAgg.cpp
+++ CodeGen/CGExprAgg.cpp
@@ -337,7 +337,8 @@
 
   AggValueSlot srcAgg =
     AggValueSlot::forLValue(src, AggValueSlot::IsDestructed,
-                            needsGC(type), AggValueSlot::IsAliased);
+                            needsGC(type), AggValueSlot::IsAliased,
+                            AggValueSlot::MayOverlap);
   EmitCopy(type, Dest, srcAgg);
 }
 
@@ -348,7 +349,7 @@
 void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
                               const AggValueSlot &src) {
   if (dest.requiresGCollection()) {
-    CharUnits sz = CGF.getContext().getTypeSizeInChars(type);
+    CharUnits sz = dest.getPreferredSize(CGF.getContext(), type);
     llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity());
     CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF,
                                                       dest.getAddress(),
@@ -362,7 +363,7 @@
   // the two sides.
   LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type);
   LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type);
-  CGF.EmitAggregateCopy(DestLV, SrcLV, type,
+  CGF.EmitAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(),
                         dest.isVolatile() || src.isVolatile());
 }
 
@@ -759,6 +760,7 @@
                                           valueDest.isExternallyDestructed(),
                                           valueDest.requiresGCollection(),
                                           valueDest.isPotentiallyAliased(),
+                                          AggValueSlot::DoesNotOverlap,
                                           AggValueSlot::IsZeroed);
       }
       
@@ -986,7 +988,8 @@
     EmitCopy(E->getLHS()->getType(),
              AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
                                      needsGC(E->getLHS()->getType()),
-                                     AggValueSlot::IsAliased),
+                                     AggValueSlot::IsAliased,
+                                     AggValueSlot::MayOverlap),
              Dest);
     return;
   }
@@ -1007,7 +1010,8 @@
   AggValueSlot LHSSlot =
     AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, 
                             needsGC(E->getLHS()->getType()),
-                            AggValueSlot::IsAliased);
+                            AggValueSlot::IsAliased,
+                            AggValueSlot::MayOverlap);
   // A non-volatile aggregate destination might have volatile member.
   if (!LHSSlot.isVolatile() &&
       CGF.hasVolatileMember(E->getLHS()->getType()))
@@ -1185,6 +1189,7 @@
                                                AggValueSlot::IsDestructed,
                                       AggValueSlot::DoesNotNeedGCBarriers,
                                                AggValueSlot::IsNotAliased,
+                                               AggValueSlot::MayOverlap,
                                                Dest.isZeroed()));
     return;
   case TEK_Scalar:
@@ -1283,11 +1288,12 @@
       Address V = CGF.GetAddressOfDirectBaseInCompleteClass(
           Dest.getAddress(), CXXRD, BaseRD,
           /*isBaseVirtual*/ false);
-      AggValueSlot AggSlot =
-        AggValueSlot::forAddr(V, Qualifiers(),
-                              AggValueSlot::IsDestructed,
-                              AggValueSlot::DoesNotNeedGCBarriers,
-                              AggValueSlot::IsNotAliased);
+      AggValueSlot AggSlot = AggValueSlot::forAddr(
+          V, Qualifiers(),
+          AggValueSlot::IsDestructed,
+          AggValueSlot::DoesNotNeedGCBarriers,
+          AggValueSlot::IsNotAliased,
+          CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual()));
       CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot);
 
       if (QualType::DestructionKind dtorKind =
@@ -1468,7 +1474,9 @@
       // If the subexpression is an ArrayInitLoopExpr, share its cleanup.
       auto elementSlot = AggValueSlot::forLValue(
           elementLV, AggValueSlot::IsDestructed,
-          AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased);
+          AggValueSlot::DoesNotNeedGCBarriers,
+          AggValueSlot::IsNotAliased,
+          AggValueSlot::DoesNotOverlap);
       AggExprEmitter(CGF, elementSlot, false)
           .VisitArrayInitLoopExpr(InnerLoop, outerBegin);
     } else
@@ -1584,7 +1592,7 @@
     }
 
   // If the type is 16-bytes or smaller, prefer individual stores over memset.
-  CharUnits Size = CGF.getContext().getTypeSizeInChars(E->getType());
+  CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType());
   if (Size <= CharUnits::fromQuantity(16))
     return;
 
@@ -1630,13 +1638,37 @@
   LValue LV = MakeAddrLValue(Temp, E->getType());
   EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed,
                                          AggValueSlot::DoesNotNeedGCBarriers,
-                                         AggValueSlot::IsNotAliased));
+                                         AggValueSlot::IsNotAliased,
+                                         AggValueSlot::DoesNotOverlap));
   return LV;
 }
 
-void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src,
-                                        QualType Ty, bool isVolatile,
-                                        bool isAssignment) {
+AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit(
+    const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) {
+  // Virtual bases are initialized first, in address order, so there's never
+  // any overlap during their initialization.
+  //
+  // FIXME: Under P0840, this is no longer true: the tail padding of a vbase
+  // of a field could be reused by a vbase of a containing class.
+  if (IsVirtual)
+    return AggValueSlot::DoesNotOverlap;
+
+  // If the base class is laid out entirely within the nvsize of the derived
+  // class, its tail padding cannot yet be initialized, so we can issue
+  // stores at the full width of the base class.
+  const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+  if (Layout.getBaseClassOffset(BaseRD) +
+          getContext().getASTRecordLayout(BaseRD).getSize() <=
+      Layout.getNonVirtualSize())
+    return AggValueSlot::DoesNotOverlap;
+
+  // The tail padding may contain values we need to preserve.
+  return AggValueSlot::MayOverlap;
+}
+
+void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
+                                        AggValueSlot::Overlap_t MayOverlap,
+                                        bool isVolatile) {
   assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex");
 
   Address DestPtr = Dest.getAddress();
@@ -1669,12 +1701,11 @@
   // implementation handles this case safely.  If there is a libc that does not
   // safely handle this, we can add a target hook.
 
-  // Get data size info for this aggregate. If this is an assignment,
-  // don't copy the tail padding, because we might be assigning into a
-  // base subobject where the tail padding is claimed.  Otherwise,
-  // copying it is fine.
+  // Get data size info for this aggregate. Don't copy the tail padding if this
+  // might be a potentially-overlapping subobject, since the tail padding might
+  // be occupied by a different object. Otherwise, copying it is fine.
   std::pair<CharUnits, CharUnits> TypeInfo;
-  if (isAssignment)
+  if (MayOverlap)
     TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty);
   else
     TypeInfo = getContext().getTypeInfoInChars(Ty);
@@ -1686,22 +1717,12 @@
             getContext().getAsArrayType(Ty))) {
       QualType BaseEltTy;
       SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr);
-      TypeInfo = getContext().getTypeInfoDataSizeInChars(BaseEltTy);
-      std::pair<CharUnits, CharUnits> LastElementTypeInfo;
-      if (!isAssignment)
-        LastElementTypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
+      TypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
+      // FIXME: What about VLAs of VLAs?
       assert(!TypeInfo.first.isZero());
       SizeVal = Builder.CreateNUWMul(
           SizeVal,
           llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
-      if (!isAssignment) {
-        SizeVal = Builder.CreateNUWSub(
-            SizeVal,
-            llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
-        SizeVal = Builder.CreateNUWAdd(
-            SizeVal, llvm::ConstantInt::get(
-                         SizeTy, LastElementTypeInfo.first.getQuantity()));
-      }
     }
   }
   if (!SizeVal) {
Index: CodeGen/CGExpr.cpp
===================================================================
--- CodeGen/CGExpr.cpp
+++ CodeGen/CGExpr.cpp
@@ -214,7 +214,8 @@
     EmitAggExpr(E, AggValueSlot::forAddr(Location, Quals,
                                          AggValueSlot::IsDestructed_t(IsInit),
                                          AggValueSlot::DoesNotNeedGCBarriers,
-                                         AggValueSlot::IsAliased_t(!IsInit)));
+                                         AggValueSlot::IsAliased_t(!IsInit),
+                                         AggValueSlot::MayOverlap));
     return;
   }
 
@@ -432,7 +433,8 @@
                                            E->getType().getQualifiers(),
                                            AggValueSlot::IsDestructed,
                                            AggValueSlot::DoesNotNeedGCBarriers,
-                                           AggValueSlot::IsNotAliased));
+                                           AggValueSlot::IsNotAliased,
+                                           AggValueSlot::DoesNotOverlap));
       break;
     }
     }
Index: CodeGen/CGDeclCXX.cpp
===================================================================
--- CodeGen/CGDeclCXX.cpp
+++ CodeGen/CGDeclCXX.cpp
@@ -53,7 +53,8 @@
   case TEK_Aggregate:
     CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed,
                                           AggValueSlot::DoesNotNeedGCBarriers,
-                                                  AggValueSlot::IsNotAliased));
+                                                  AggValueSlot::IsNotAliased,
+                                                  AggValueSlot::DoesNotOverlap));
     return;
   }
   llvm_unreachable("bad evaluation kind");
Index: CodeGen/CGDecl.cpp
===================================================================
--- CodeGen/CGDecl.cpp
+++ CodeGen/CGDecl.cpp
@@ -1458,7 +1458,8 @@
       EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
                                               AggValueSlot::IsDestructed,
                                          AggValueSlot::DoesNotNeedGCBarriers,
-                                              AggValueSlot::IsNotAliased));
+                                              AggValueSlot::IsNotAliased,
+                                              AggValueSlot::DoesNotOverlap));
     }
     return;
   }
Index: CodeGen/CGClass.cpp
===================================================================
--- CodeGen/CGClass.cpp
+++ CodeGen/CGClass.cpp
@@ -555,10 +555,12 @@
                                               BaseClassDecl,
                                               isBaseVirtual);
   AggValueSlot AggSlot =
-    AggValueSlot::forAddr(V, Qualifiers(),
-                          AggValueSlot::IsDestructed,
-                          AggValueSlot::DoesNotNeedGCBarriers,
-                          AggValueSlot::IsNotAliased);
+      AggValueSlot::forAddr(
+          V, Qualifiers(),
+          AggValueSlot::IsDestructed,
+          AggValueSlot::DoesNotNeedGCBarriers,
+          AggValueSlot::IsNotAliased,
+          CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual));
 
   CGF.EmitAggExpr(BaseInit->getInit(), AggSlot);
 
@@ -647,7 +649,8 @@
       LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field);
 
       // Copy the aggregate.
-      CGF.EmitAggregateCopy(LHS, Src, FieldType, LHS.isVolatileQualified());
+      CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field),
+                            LHS.isVolatileQualified());
       // Ensure that we destroy the objects if an exception is thrown later in
       // the constructor.
       QualType::DestructionKind dtorKind = FieldType.isDestructedType();
@@ -677,10 +680,12 @@
     break;
   case TEK_Aggregate: {
     AggValueSlot Slot =
-      AggValueSlot::forLValue(LHS,
-                              AggValueSlot::IsDestructed,
-                              AggValueSlot::DoesNotNeedGCBarriers,
-                              AggValueSlot::IsNotAliased);
+        AggValueSlot::forLValue(
+            LHS,
+            AggValueSlot::IsDestructed,
+            AggValueSlot::DoesNotNeedGCBarriers,
+            AggValueSlot::IsNotAliased,
+            overlapForFieldInit(Field));
     EmitAggExpr(Init, Slot);
     break;
   }
@@ -911,15 +916,15 @@
     }
 
     CharUnits getMemcpySize(uint64_t FirstByteOffset) const {
+      ASTContext &Ctx = CGF.getContext();
       unsigned LastFieldSize =
-        LastField->isBitField() ?
-          LastField->getBitWidthValue(CGF.getContext()) :
-          CGF.getContext().getTypeSize(LastField->getType());
-      uint64_t MemcpySizeBits =
-        LastFieldOffset + LastFieldSize - FirstByteOffset +
-        CGF.getContext().getCharWidth() - 1;
-      CharUnits MemcpySize =
-        CGF.getContext().toCharUnitsFromBits(MemcpySizeBits);
+          LastField->isBitField()
+              ? LastField->getBitWidthValue(Ctx)
+              : Ctx.toBits(
+                    Ctx.getTypeInfoDataSizeInChars(LastField->getType()).first);
+      uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize -
+                                FirstByteOffset + Ctx.getCharWidth() - 1;
+      CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits);
       return MemcpySize;
     }
 
@@ -1960,7 +1965,8 @@
     }
 
     EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false,
-                           /*Delegating=*/false, curAddr, E);
+                           /*Delegating=*/false, curAddr, E,
+                           AggValueSlot::DoesNotOverlap);
   }
 
   // Go to the next element.
@@ -1995,7 +2001,8 @@
                                              CXXCtorType Type,
                                              bool ForVirtualBase,
                                              bool Delegating, Address This,
-                                             const CXXConstructExpr *E) {
+                                             const CXXConstructExpr *E,
+                                             AggValueSlot::Overlap_t Overlap) {
   CallArgList Args;
 
   // Push the this ptr.
@@ -2011,7 +2018,7 @@
     LValue Src = EmitLValue(Arg);
     QualType DestTy = getContext().getTypeDeclType(D->getParent());
     LValue Dest = MakeAddrLValue(This, DestTy);
-    EmitAggregateCopyCtor(Dest, Src);
+    EmitAggregateCopyCtor(Dest, Src, Overlap);
     return;
   }
 
@@ -2023,7 +2030,8 @@
   EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(),
                /*ParamsToSkip*/ 0, Order);
 
-  EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args);
+  EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args,
+                         Overlap);
 }
 
 static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
@@ -2055,7 +2063,8 @@
                                              bool ForVirtualBase,
                                              bool Delegating,
                                              Address This,
-                                             CallArgList &Args) {
+                                             CallArgList &Args,
+                                             AggValueSlot::Overlap_t Overlap) {
   const CXXRecordDecl *ClassDecl = D->getParent();
 
   // C++11 [class.mfct.non-static]p2:
@@ -2082,7 +2091,7 @@
     LValue SrcLVal = MakeAddrLValue(Src, SrcTy);
     QualType DestTy = getContext().getTypeDeclType(ClassDecl);
     LValue DestLVal = MakeAddrLValue(This, DestTy);
-    EmitAggregateCopyCtor(DestLVal, SrcLVal);
+    EmitAggregateCopyCtor(DestLVal, SrcLVal, Overlap);
     return;
   }
 
@@ -2171,7 +2180,7 @@
   }
 
   EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false,
-                         This, Args);
+                         This, Args, AggValueSlot::MayOverlap);
 }
 
 void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
@@ -2267,7 +2276,8 @@
   EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(),
                /*ParamsToSkip*/ 1);
 
-  EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args);
+  EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args,
+                         AggValueSlot::MayOverlap);
 }
 
 void
@@ -2302,7 +2312,8 @@
   }
 
   EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false,
-                         /*Delegating=*/true, This, DelegateArgs);
+                         /*Delegating=*/true, This, DelegateArgs,
+                         AggValueSlot::MayOverlap);
 }
 
 namespace {
@@ -2333,7 +2344,8 @@
     AggValueSlot::forAddr(ThisPtr, Qualifiers(),
                           AggValueSlot::IsDestructed,
                           AggValueSlot::DoesNotNeedGCBarriers,
-                          AggValueSlot::IsNotAliased);
+                          AggValueSlot::IsNotAliased,
+                          AggValueSlot::MayOverlap);
 
   EmitAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot);
 
Index: CodeGen/CGCall.cpp
===================================================================
--- CodeGen/CGCall.cpp
+++ CodeGen/CGCall.cpp
@@ -3019,7 +3019,8 @@
                                Ty.getQualifiers(),
                                AggValueSlot::IsNotDestructed,
                                AggValueSlot::DoesNotNeedGCBarriers,
-                               AggValueSlot::IsNotAliased);
+                               AggValueSlot::IsNotAliased,
+                               AggValueSlot::DoesNotOverlap);
 }
 
 void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
@@ -3487,7 +3488,8 @@
   if (!HasLV)
     return RV;
   LValue Copy = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty), Ty);
-  CGF.EmitAggregateCopy(Copy, LV, Ty, LV.isVolatile());
+  CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap,
+                        LV.isVolatile());
   IsUsed = true;
   return RValue::getAggregate(Copy.getAddress());
 }
@@ -3501,7 +3503,8 @@
   else {
     auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress();
     LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty);
-    CGF.EmitAggregateCopy(Dst, SrcLV, Ty,
+    // We assume that call args are never copied into subobjects.
+    CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap,
                           HasLV ? LV.isVolatileQualified()
                                 : RV.isVolatileQualified());
   }
Index: CodeGen/CGBlocks.cpp
===================================================================
--- CodeGen/CGBlocks.cpp
+++ CodeGen/CGBlocks.cpp
@@ -929,7 +929,8 @@
             AggValueSlot::forAddr(blockField, Qualifiers(),
                                   AggValueSlot::IsDestructed,
                                   AggValueSlot::DoesNotNeedGCBarriers,
-                                  AggValueSlot::IsNotAliased);
+                                  AggValueSlot::IsNotAliased,
+                                  AggValueSlot::DoesNotOverlap);
         EmitAggExpr(copyExpr, Slot);
       } else {
         EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr);
Index: CodeGen/CGAtomic.cpp
===================================================================
--- CodeGen/CGAtomic.cpp
+++ CodeGen/CGAtomic.cpp
@@ -1513,7 +1513,8 @@
                                     getAtomicType());
     bool IsVolatile = rvalue.isVolatileQualified() ||
                       LVal.isVolatileQualified();
-    CGF.EmitAggregateCopy(Dest, Src, getAtomicType(), IsVolatile);
+    CGF.EmitAggregateCopy(Dest, Src, getAtomicType(),
+                          AggValueSlot::DoesNotOverlap, IsVolatile);
     return;
   }
 
@@ -2008,6 +2009,7 @@
                                         AggValueSlot::IsNotDestructed,
                                         AggValueSlot::DoesNotNeedGCBarriers,
                                         AggValueSlot::IsNotAliased,
+                                        AggValueSlot::DoesNotOverlap,
                                         Zeroed ? AggValueSlot::IsZeroed :
                                                  AggValueSlot::IsNotZeroed);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D45306: P... Richard Smith - zygoloid via Phabricator via cfe-commits

Reply via email to