[PATCH] D45306: PR36992 don't overwrite virtual bases in tail padding

2018-04-05 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC329342: PR36992: do not store beyond the dsize of a class 
object unless we know (authored by rsmith, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D45306?vs=141205=141217#toc

Repository:
  rC Clang

https://reviews.llvm.org/D45306

Files:
  lib/CodeGen/CGAtomic.cpp
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/CGClass.cpp
  lib/CodeGen/CGDecl.cpp
  lib/CodeGen/CGDeclCXX.cpp
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CGExprAgg.cpp
  lib/CodeGen/CGExprCXX.cpp
  lib/CodeGen/CGObjC.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGStmt.cpp
  lib/CodeGen/CGValue.h
  lib/CodeGen/CodeGenFunction.h
  lib/CodeGen/ItaniumCXXABI.cpp
  test/CodeGenCXX/tail-padding.cpp

Index: lib/CodeGen/CGDecl.cpp
===
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -1416,17 +1416,17 @@
   }
 }
 
-/// Emit an expression as an initializer for a variable at the given
-/// location.  The expression is not necessarily the normal
-/// initializer for the variable, and the address is not necessarily
+/// Emit an expression as an initializer for an object (variable, field, etc.)
+/// at the given location.  The expression is not necessarily the normal
+/// initializer for the object, and the address is not necessarily
 /// its normal location.
 ///
 /// \param init the initializing expression
-/// \param var the variable to act as if we're initializing
+/// \param D the object to act as if we're initializing
 /// \param loc the address to initialize; its type is a pointer
-///   to the LLVM mapping of the variable's type
+///   to the LLVM mapping of the object's type
 /// \param alignment the alignment of the address
-/// \param capturedByInit true if the variable is a __block variable
+/// \param capturedByInit true if \p D is a __block variable
 ///   whose address is potentially changed by the initializer
 void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
  LValue lvalue, bool capturedByInit) {
@@ -1454,11 +1454,17 @@
 if (type->isAtomicType()) {
   EmitAtomicInit(const_cast(init), lvalue);
 } else {
+  AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap;
+  if (isa(D))
+Overlap = AggValueSlot::DoesNotOverlap;
+  else if (auto *FD = dyn_cast(D))
+Overlap = overlapForFieldInit(FD);
   // TODO: how can we delay here if D is captured by its initializer?
   EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
   AggValueSlot::IsDestructed,
  AggValueSlot::DoesNotNeedGCBarriers,
-  AggValueSlot::IsNotAliased));
+  AggValueSlot::IsNotAliased,
+  Overlap));
 }
 return;
   }
Index: lib/CodeGen/CGValue.h
===
--- lib/CodeGen/CGValue.h
+++ lib/CodeGen/CGValue.h
@@ -472,17 +472,25 @@
   /// evaluating an expression which constructs such an object.
   bool AliasedFlag : 1;
 
+  /// This is set to true if the tail padding of this slot might overlap 
+  /// another object that may have already been initialized (and whose
+  /// value must be preserved by this initialization). If so, we may only
+  /// store up to the dsize of the type. Otherwise we can widen stores to
+  /// the size of the type.
+  bool OverlapFlag : 1;
+
 public:
   enum IsAliased_t { IsNotAliased, IsAliased };
   enum IsDestructed_t { IsNotDestructed, IsDestructed };
   enum IsZeroed_t { IsNotZeroed, IsZeroed };
+  enum Overlap_t { DoesNotOverlap, MayOverlap };
   enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
 
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
   static AggValueSlot ignored() {
 return forAddr(Address::invalid(), Qualifiers(), IsNotDestructed,
-   DoesNotNeedGCBarriers, IsNotAliased);
+   DoesNotNeedGCBarriers, IsNotAliased, DoesNotOverlap);
   }
 
   /// forAddr - Make a slot for an aggregate value.
@@ -500,6 +508,7 @@
   IsDestructed_t isDestructed,
   NeedsGCBarriers_t needsGC,
   IsAliased_t isAliased,
+  Overlap_t mayOverlap,
   IsZeroed_t isZeroed = IsNotZeroed) {
 AggValueSlot AV;
 if (addr.isValid()) {
@@ -514,16 +523,18 @@
 AV.ObjCGCFlag = needsGC;
 AV.ZeroedFlag = isZeroed;
 AV.AliasedFlag = isAliased;
+AV.OverlapFlag = mayOverlap;
 return AV;
   }
 
   static AggValueSlot forLValue(const LValue ,
 IsDestructed_t isDestructed,
   

[PATCH] D45306: PR36992 don't overwrite virtual bases in tail padding

2018-04-05 Thread John McCall via Phabricator via cfe-commits
rjmccall accepted this revision.
rjmccall added a comment.
This revision is now accepted and ready to land.

Okay, LGTM.


https://reviews.llvm.org/D45306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45306: PR36992 don't overwrite virtual bases in tail padding

2018-04-05 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added a comment.

In https://reviews.llvm.org/D45306#1058792, @rjmccall wrote:

> Changing the requirements on the return-value slot would be an ABI break, no?


It would be, yes. But WG21 seems to have a taste for breaking changes at the 
moment, so who knows... I'm happy to remove `overlapForReturnValue` for now; 
it's easy enough to find these places again should the change ever happen.

> And it would force to us to treat all complete-object constructions as 
> nvsize-limited unless they're constructions of known sizeof-sized allocations.

P0840 pushes us there already. Fortunately, we don't treat complete-object 
constructors specially in this regard currently, and no other compiler seems to 
do so either. However, when performing the trivial copy/move constructor -> 
`memcpy` optimization (which is I think the most interesting case for extending 
a store into the tail padding), we do know which object is being initialized, 
so we can take into account whether it could have something else allocated into 
its tail padding.




Comment at: CodeGen/CGDecl.cpp:1462
+  AggValueSlot::IsNotAliased,
+  AggValueSlot::DoesNotOverlap));
 }

rjmccall wrote:
> How is 'does not overlap' justified here?  Should this be propagated into the 
> LValue?
That's what I get for trusting comments :)

No functional change from `DoesNotOverlap` here yet, though, because in 
practice this is always called with some kind of `VarDecl` (which is 
`DoesNotOverlap` because it's a complete object) or some kind of `FieldDecl` 
(which is `DoesNotOverlap` until P0840 lands).


https://reviews.llvm.org/D45306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45306: PR36992 don't overwrite virtual bases in tail padding

2018-04-05 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith updated this revision to Diff 141205.
rsmith marked 2 inline comments as done.

https://reviews.llvm.org/D45306

Files:
  CodeGen/CGAtomic.cpp
  CodeGen/CGBlocks.cpp
  CodeGen/CGCall.cpp
  CodeGen/CGClass.cpp
  CodeGen/CGDecl.cpp
  CodeGen/CGDeclCXX.cpp
  CodeGen/CGExpr.cpp
  CodeGen/CGExprAgg.cpp
  CodeGen/CGExprCXX.cpp
  CodeGen/CGObjC.cpp
  CodeGen/CGOpenMPRuntime.cpp
  CodeGen/CGStmt.cpp
  CodeGen/CGValue.h
  CodeGen/CodeGenFunction.h
  CodeGen/ItaniumCXXABI.cpp
  CodeGenCXX/tail-padding.cpp

Index: CodeGenCXX/tail-padding.cpp
===
--- /dev/null
+++ CodeGenCXX/tail-padding.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
+
+// PR36992
+namespace Implicit {
+  struct A { char c; A(const A&); };
+  struct B { int n; char c[3]; ~B(); };
+  struct C : B, virtual A {};
+  static_assert(sizeof(C) == sizeof(void*) + 8);
+  C f(C c) { return c; }
+
+  // CHECK: define {{.*}} @_ZN8Implicit1CC1EOS0_
+  // CHECK: call void @_ZN8Implicit1AC2ERKS0_(
+  // Note: this must memcpy 7 bytes, not 8, to avoid trampling over the virtual base class.
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 7, i1 false)
+  // CHECK: store i32 {{.*}} @_ZTVN8Implicit1CE
+}
+
+namespace InitWithinNVSize {
+  // This is the same as the previous test, except that the A base lies
+  // entirely within the nvsize of C. This makes it valid to copy at the
+  // full width.
+  struct A { char c; A(const A&); };
+  struct B { int n; char c[3]; ~B(); };
+  struct C : B, virtual A { char x; };
+  static_assert(sizeof(C) > sizeof(void*) + 8);
+  C f(C c) { return c; }
+
+  // CHECK: define {{.*}} @_ZN16InitWithinNVSize1CC1EOS0_
+  // CHECK: call void @_ZN16InitWithinNVSize1AC2ERKS0_(
+  // This copies over the 'C::x' member, but that's OK because we've not initialized it yet.
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  // CHECK: store i32 {{.*}} @_ZTVN16InitWithinNVSize1CE
+  // CHECK: store i8
+}
Index: CodeGen/ItaniumCXXABI.cpp
===
--- CodeGen/ItaniumCXXABI.cpp
+++ CodeGen/ItaniumCXXABI.cpp
@@ -3896,7 +3896,7 @@
 caughtExnAlignment);
 LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
 LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
-CGF.EmitAggregateCopy(Dest, Src, CatchType);
+CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap);
 return;
   }
 
@@ -3923,7 +3923,8 @@
   AggValueSlot::forAddr(ParamAddr, Qualifiers(),
 AggValueSlot::IsNotDestructed,
 AggValueSlot::DoesNotNeedGCBarriers,
-AggValueSlot::IsNotAliased));
+AggValueSlot::IsNotAliased,
+AggValueSlot::DoesNotOverlap));
 
   // Leave the terminate scope.
   CGF.EHStack.popTerminate();
Index: CodeGen/CodeGenFunction.h
===
--- CodeGen/CodeGenFunction.h
+++ CodeGen/CodeGenFunction.h
@@ -2061,7 +2061,8 @@
  T.getQualifiers(),
  AggValueSlot::IsNotDestructed,
  AggValueSlot::DoesNotNeedGCBarriers,
- AggValueSlot::IsNotAliased);
+ AggValueSlot::IsNotAliased,
+ AggValueSlot::DoesNotOverlap);
   }
 
   /// Emit a cast to void* in the appropriate address space.
@@ -2118,28 +2119,52 @@
 }
 return false;
   }
-  /// EmitAggregateCopy - Emit an aggregate assignment.
-  ///
-  /// The difference to EmitAggregateCopy is that tail padding is not copied.
-  /// This is required for correctness when assigning non-POD structures in C++.
+
+  /// Determine whether a return value slot may overlap some other object.
+  AggValueSlot::Overlap_t overlapForReturnValue() {
+// FIXME: Assuming no overlap here breaks guaranteed copy elision for base
+// class subobjects. These cases may need to be revisited depending on the
+// resolution of the relevant core issue.
+return AggValueSlot::DoesNotOverlap;
+  }
+
+  /// Determine whether a field initialization may overlap some other object.
+  AggValueSlot::Overlap_t overlapForFieldInit(const FieldDecl *FD) {
+// FIXME: These cases can result in overlap as a result of P0840R0's
+// [[no_unique_address]] attribute. We can still infer NoOverlap in the
+// presence of that attribute if the field is within the nvsize of its
+// containing class, because non-virtual subobjects are initialized in
+// address order.
+return AggValueSlot::DoesNotOverlap;
+  }
+
+  /// Determine whether a base class initialization may overlap 

[PATCH] D45306: PR36992 don't overwrite virtual bases in tail padding

2018-04-05 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

Changing the requirements on the return-value slot would be an ABI break, no?  
And it would force to us to treat all complete-object constructions as 
nvsize-limited unless they're constructions of known sizeof-sized allocations.




Comment at: CodeGen/CGDecl.cpp:1462
+  AggValueSlot::IsNotAliased,
+  AggValueSlot::DoesNotOverlap));
 }

How is 'does not overlap' justified here?  Should this be propagated into the 
LValue?



Comment at: CodeGen/CGExprAgg.cpp:1721
+  TypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
+  // FIXME: What about VLAs of VLAs?
   assert(!TypeInfo.first.isZero());

emitArrayLength drills down to the base non-array type; it handles nested VLAs.


Repository:
  rC Clang

https://reviews.llvm.org/D45306



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45306: PR36992 don't overwrite virtual bases in tail padding

2018-04-04 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith created this revision.
rsmith added a reviewer: rjmccall.
Herald added subscribers: cfe-commits, sanjoy.

When initializing a base class, we might accidentally overwrite a virtual base 
in its tail padding in some situations (in particular, when emitting a trivial 
copy constructor as a memcpy, we copy the full object size rather than only the 
dsize):

  struct A { char c; A(const A&) : c(1) {} };
  struct B { int n; char c[3]; ~B(); };
  struct C : B, virtual A {};
  C f(C c) { return c; }

(Here, Clang emits an 8-byte memcpy for the `B` base in `f`, overwriting the 
already-initialized `A` vbase.)

It's easy enough to prevent that from happening entirely, but we would still 
like to copy the padded-to-alignment size whenever we can (for example, see the 
tests in `test/CodeGenCXX/assign-construct-memcpy.cpp`). This patch adds a flag 
to `AggValueSlot` to indicate whether the tail padding of the aggregate might 
overlap some other object, and propagate that flag far enough that we can see 
it when emitting a trivial copy ctor call. When initializing a base class, we 
treat it as potentially-overlapping only if the base class lies partly outside 
the nvsize of the derived class -- otherwise, because we know non-virtual 
subobjects are initialized in address order, an overlarge store is valid.

This patch also lays some groundwork for P0840 , which 
permits packing into the tail padding for fields marked with a new attribute, 
wherein the same situation can arise (a vbase can be constructed in the tail 
padding of the last field if it has the attribute):

  struct A { char c; A(const A&) : c(1) {} };
  struct B { int n; char c[3]; ~B(); };
  struct C : virtual A { [[no_unique_address]] B b; };
  C f(C c) { return c; }

(Copying a `C` object must not perform an 8-byte copy for the `B` subobject, 
even though it's a most-derived object, because its tail padding contains an 
`A`.)

It also prepares us for the possibility that WG21 might decide that guaranteed 
copy elision is supposed to apply to base class initialization (for classes 
without virtual bases), which would mean that we cannot assume that the tail 
padding of the return slot of a function has no overlap with other objects:

  struct A { char c; };
  struct B { int n; char c[3]; ~B(); };
  B f() { return /*...*/ }
  struct C : B, virtual A {
C() : A{1}, B(f()) {}
  };

If `B(f())` is not permitted to make a copy, then `f()` cannot trample on its 
return slot's tail padding (which in this example might contain an 
already-initialized `A` object).


Repository:
  rC Clang

https://reviews.llvm.org/D45306

Files:
  CodeGen/CGAtomic.cpp
  CodeGen/CGBlocks.cpp
  CodeGen/CGCall.cpp
  CodeGen/CGClass.cpp
  CodeGen/CGDecl.cpp
  CodeGen/CGDeclCXX.cpp
  CodeGen/CGExpr.cpp
  CodeGen/CGExprAgg.cpp
  CodeGen/CGExprCXX.cpp
  CodeGen/CGObjC.cpp
  CodeGen/CGOpenMPRuntime.cpp
  CodeGen/CGStmt.cpp
  CodeGen/CGValue.h
  CodeGen/CodeGenFunction.h
  CodeGen/ItaniumCXXABI.cpp
  CodeGenCXX/tail-padding.cpp

Index: CodeGenCXX/tail-padding.cpp
===
--- /dev/null
+++ CodeGenCXX/tail-padding.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
+
+// PR36992
+namespace Implicit {
+  struct A { char c; A(const A&); };
+  struct B { int n; char c[3]; ~B(); };
+  struct C : B, virtual A {};
+  static_assert(sizeof(C) == sizeof(void*) + 8);
+  C f(C c) { return c; }
+
+  // CHECK: define {{.*}} @_ZN8Implicit1CC1EOS0_
+  // CHECK: call void @_ZN8Implicit1AC2ERKS0_(
+  // Note: this must memcpy 7 bytes, not 8, to avoid trampling over the virtual base class.
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 7, i1 false)
+  // CHECK: store i32 {{.*}} @_ZTVN8Implicit1CE
+}
+
+namespace InitWithinNVSize {
+  // This is the same as the previous test, except that the A base lies
+  // entirely within the nvsize of C. This makes it valid to copy at the
+  // full width.
+  struct A { char c; A(const A&); };
+  struct B { int n; char c[3]; ~B(); };
+  struct C : B, virtual A { char x; };
+  static_assert(sizeof(C) > sizeof(void*) + 8);
+  C f(C c) { return c; }
+
+  // CHECK: define {{.*}} @_ZN16InitWithinNVSize1CC1EOS0_
+  // CHECK: call void @_ZN16InitWithinNVSize1AC2ERKS0_(
+  // This copies over the 'C::x' member, but that's OK because we've not initialized it yet.
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  // CHECK: store i32 {{.*}} @_ZTVN16InitWithinNVSize1CE
+  // CHECK: store i8
+}
Index: CodeGen/ItaniumCXXABI.cpp
===
--- CodeGen/ItaniumCXXABI.cpp
+++ CodeGen/ItaniumCXXABI.cpp
@@ -3896,7 +3896,7 @@
 caughtExnAlignment);
 LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType);
 LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType);
-