This patch teaches -fcatch-undefined-behavior to catch cases where a reference 
is bound to a glvalue which doesn't point to storage of the right size and 
alignment, and likewise for 'this' pointers. It also adds the alignment check 
to the existing checks for loads and stores.

Clang and LLVM's tests are now clean under these new checks, except for TypeLoc 
alignment.

http://llvm-reviews.chandlerc.com/D24

Files:
  test/CodeGenCXX/catch-undef-behavior.cpp
  test/CodeGen/catch-undef-behavior.c
  lib/CodeGen/CGExprScalar.cpp
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CGExprAgg.cpp
  lib/CodeGen/CGExprCXX.cpp
  lib/CodeGen/CodeGenFunction.h
Index: test/CodeGenCXX/catch-undef-behavior.cpp
===================================================================
--- test/CodeGenCXX/catch-undef-behavior.cpp
+++ test/CodeGenCXX/catch-undef-behavior.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -fcatch-undefined-behavior -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+
+// CHECK: @_Z17reference_binding
+void reference_binding(int *p) {
+  // C++ core issue 453: If an lvalue to which a reference is directly bound
+  // designates neither an existing object or function of an appropriate type,
+  // nor a region of storage of suitable size and alignment to contain an object
+  // of the reference's type, the behavior is undefined.
+
+  // CHECK: icmp ne {{.*}}, null
+
+  // CHECK: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64
+  // CHECK-NEXT: icmp uge i64 %[[SIZE]], 4
+
+  // CHECK: %[[PTRINT:.*]] = ptrtoint
+  // CHECK-NEXT: %[[MISALIGN:.*]] = and i64 %[[PTRINT]], 3
+  // CHECK-NEXT: icmp eq i64 %[[MISALIGN]], 0
+  int &r = *p;
+}
+
+struct S {
+  double d;
+  int a, b;
+  virtual int f();
+};
+
+// CHECK: @_Z13member_access
+void member_access(S *p) {
+  // (1) Check 'p' is appropriately sized and aligned for member access.
+
+  // FIXME: Check vptr is for 'S' or a class derived from 'S'.
+
+  // CHECK: icmp ne {{.*}}, null
+
+  // CHECK: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64
+  // CHECK-NEXT: icmp uge i64 %[[SIZE]], 24
+
+  // CHECK: %[[PTRINT:.*]] = ptrtoint
+  // CHECK-NEXT: %[[MISALIGN:.*]] = and i64 %[[PTRINT]], 7
+  // CHECK-NEXT: icmp eq i64 %[[MISALIGN]], 0
+
+  // (2) Check 'p->b' is appropriately sized and aligned for a load.
+
+  // FIXME: Suppress this in the trivial case of a member access, because we
+  // know we've just checked the member access expression itself.
+
+  // CHECK: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64
+  // CHECK-NEXT: icmp uge i64 %[[SIZE]], 4
+
+  // CHECK: %[[PTRINT:.*]] = ptrtoint
+  // CHECK-NEXT: %[[MISALIGN:.*]] = and i64 %[[PTRINT]], 3
+  // CHECK-NEXT: icmp eq i64 %[[MISALIGN]], 0
+  int k = p->b;
+
+  // (3) Check 'p' is appropriately sized and aligned for member function call.
+
+  // FIXME: Check vptr is for 'S' or a class derived from 'S'.
+
+  // CHECK: icmp ne {{.*}}, null
+
+  // CHECK: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64
+  // CHECK-NEXT: icmp uge i64 %[[SIZE]], 24
+
+  // CHECK: %[[PTRINT:.*]] = ptrtoint
+  // CHECK-NEXT: %[[MISALIGN:.*]] = and i64 %[[PTRINT]], 7
+  // CHECK-NEXT: icmp eq i64 %[[MISALIGN]], 0
+  k = p->f();
+}
Index: test/CodeGen/catch-undef-behavior.c
===================================================================
--- test/CodeGen/catch-undef-behavior.c
+++ test/CodeGen/catch-undef-behavior.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fcatch-undefined-behavior -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fcatch-undefined-behavior -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
 
 // PR6805
 // CHECK: @foo
@@ -11,7 +11,11 @@
 
 // CHECK: @bar
 int bar(int *a) {
-  // CHECK: objectsize
-  // CHECK: icmp uge
+  // CHECK: %[[SIZE:.*]] = call i64 @llvm.objectsize.i64
+  // CHECK-NEXT: icmp uge i64 %[[SIZE]], 4
+
+  // CHECK: %[[PTRINT:.*]] = ptrtoint
+  // CHECK-NEXT: %[[MISALIGN:.*]] = and i64 %[[PTRINT]], 3
+  // CHECK-NEXT: icmp eq i64 %[[MISALIGN]], 0
   return *a;
 }
Index: lib/CodeGen/CGExprScalar.cpp
===================================================================
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -80,7 +80,9 @@
 
   llvm::Type *ConvertType(QualType T) { return CGF.ConvertType(T); }
   LValue EmitLValue(const Expr *E) { return CGF.EmitLValue(E); }
-  LValue EmitCheckedLValue(const Expr *E) { return CGF.EmitCheckedLValue(E); }
+  LValue EmitCheckedLValue(const Expr *E, CodeGenFunction::CheckType CT) {
+    return CGF.EmitCheckedLValue(E, CT);
+  }
 
   Value *EmitLoadOfLValue(LValue LV) {
     return CGF.EmitLoadOfLValue(LV).getScalarVal();
@@ -90,7 +92,7 @@
   /// value l-value, this method emits the address of the l-value, then loads
   /// and returns the result.
   Value *EmitLoadOfLValue(const Expr *E) {
-    return EmitLoadOfLValue(EmitCheckedLValue(E));
+    return EmitLoadOfLValue(EmitCheckedLValue(E, CodeGenFunction::CT_Load));
   }
 
   /// EmitConversionToBool - Convert the specified expression value to a
@@ -1680,7 +1682,7 @@
   OpInfo.Opcode = E->getOpcode();
   OpInfo.E = E;
   // Load/convert the LHS.
-  LValue LHSLV = EmitCheckedLValue(E->getLHS());
+  LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::CT_Store);
   OpInfo.LHS = EmitLoadOfLValue(LHSLV);
 
   llvm::PHINode *atomicPHI = 0;
@@ -2326,7 +2328,7 @@
 
   case Qualifiers::OCL_Weak:
     RHS = Visit(E->getRHS());
-    LHS = EmitCheckedLValue(E->getLHS());    
+    LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::CT_Store);
     RHS = CGF.EmitARCStoreWeak(LHS.getAddress(), RHS, Ignore);
     break;
 
@@ -2336,7 +2338,7 @@
     // __block variables need to have the rhs evaluated first, plus
     // this should improve codegen just a little.
     RHS = Visit(E->getRHS());
-    LHS = EmitCheckedLValue(E->getLHS());
+    LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::CT_Store);
 
     // Store the value into the LHS.  Bit-fields are handled specially
     // because the result is altered by the store, i.e., [C99 6.5.16p1]
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -486,6 +486,15 @@
                                                    ReferenceTemporaryDtor,
                                                    ObjCARCReferenceLifetimeType,
                                                    InitializedDecl);
+  if (CatchUndefined && !E->getType()->isFunctionType()) {
+    // C++11 [dcl.ref]p5 (as amended by core issue 453):
+    //   If a glvalue to which a reference is directly bound designates neither
+    //   an existing object or function of an appropriate type nor a region of
+    //   storage of suitable size and alignment to contain an object of the
+    //   reference's type, the behavior is undefined.
+    QualType Ty = E->getType();
+    EmitCheck(CT_ReferenceBinding, Value, Ty);
+  }
   if (!ReferenceTemporaryDtor && ObjCARCReferenceLifetimeType.isNull())
     return RValue::get(Value);
   
@@ -549,22 +558,53 @@
       ->getZExtValue();
 }
 
-void CodeGenFunction::EmitCheck(llvm::Value *Address, unsigned Size) {
+void CodeGenFunction::EmitCheck(CheckType CT, llvm::Value *Address, QualType Ty,
+                                CharUnits Alignment) {
   if (!CatchUndefined)
     return;
 
-  // This needs to be to the standard address space.
-  Address = Builder.CreateBitCast(Address, Int8PtrTy);
+  llvm::Value *Cond = 0;
 
-  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, IntPtrTy);
+  if (CT != CT_Load && CT != CT_Store) {
+    // The glvalue must not be an empty glvalue. Don't bother checking this for
+    // loads and stores, because we will get a segfault anyway (if the operation
+    // isn't optimized out).
+    Cond = Builder.CreateICmpNE(
+        Address, llvm::Constant::getNullValue(Address->getType()));
+  }
 
-  llvm::Value *Min = Builder.getFalse();
-  llvm::Value *C = Builder.CreateCall2(F, Address, Min);
-  llvm::BasicBlock *Cont = createBasicBlock();
-  Builder.CreateCondBr(Builder.CreateICmpUGE(C,
-                                        llvm::ConstantInt::get(IntPtrTy, Size)),
-                       Cont, getTrapBB());
-  EmitBlock(Cont);
+  if (!Ty->isIncompleteType()) {
+    uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity();
+    uint64_t AlignVal = Alignment.getQuantity();
+    if (!AlignVal)
+      AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity();
+
+    // This needs to be to the standard address space.
+    Address = Builder.CreateBitCast(Address, Int8PtrTy);
+
+    // The glvalue must refer to a large enough storage region.
+    // FIXME: If -faddress-sanitizer is enabled, insert dynamic instrumentation
+    //        to check this.
+    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, IntPtrTy);
+    llvm::Value *Min = Builder.getFalse();
+    llvm::Value *LargeEnough =
+        Builder.CreateICmpUGE(Builder.CreateCall2(F, Address, Min),
+                              llvm::ConstantInt::get(IntPtrTy, Size));
+    Cond = Cond ? Builder.CreateAnd(Cond, LargeEnough) : LargeEnough;
+
+    // The glvalue must be suitably aligned.
+    llvm::Value *Align =
+        Builder.CreateAnd(Builder.CreatePtrToInt(Address, IntPtrTy),
+                          llvm::ConstantInt::get(IntPtrTy, AlignVal - 1));
+    Cond = Builder.CreateAnd(Cond,
+        Builder.CreateICmpEQ(Align, llvm::ConstantInt::get(IntPtrTy, 0)));
+  }
+
+  if (Cond) {
+    llvm::BasicBlock *Cont = createBasicBlock();
+    Builder.CreateCondBr(Cond, Cont, getTrapBB());
+    EmitBlock(Cont);
+  }
 }
 
 
@@ -641,11 +681,10 @@
   return MakeAddrLValue(llvm::UndefValue::get(Ty), E->getType());
 }
 
-LValue CodeGenFunction::EmitCheckedLValue(const Expr *E) {
+LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, CheckType CT) {
   LValue LV = EmitLValue(E);
   if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple())
-    EmitCheck(LV.getAddress(), 
-              getContext().getTypeSizeInChars(E->getType()).getQuantity());
+    EmitCheck(CT, LV.getAddress(), E->getType(), LV.getAlignment());
   return LV;
 }
 
@@ -2114,11 +2153,13 @@
 
   // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
   LValue BaseLV;
-  if (E->isArrow())
-    BaseLV = MakeNaturalAlignAddrLValue(EmitScalarExpr(BaseExpr),
-                                        BaseExpr->getType()->getPointeeType());
-  else
-    BaseLV = EmitLValue(BaseExpr);
+  if (E->isArrow()) {
+    llvm::Value *Ptr = EmitScalarExpr(BaseExpr);
+    QualType PtrTy = BaseExpr->getType()->getPointeeType();
+    EmitCheck(CT_MemberAccess, Ptr, PtrTy);
+    BaseLV = MakeNaturalAlignAddrLValue(Ptr, PtrTy);
+  } else
+    BaseLV = EmitCheckedLValue(BaseExpr, CT_MemberAccess);
 
   NamedDecl *ND = E->getMemberDecl();
   if (FieldDecl *Field = dyn_cast<FieldDecl>(ND)) {
Index: lib/CodeGen/CGExprAgg.cpp
===================================================================
--- lib/CodeGen/CGExprAgg.cpp
+++ lib/CodeGen/CGExprAgg.cpp
@@ -549,8 +549,10 @@
 void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   switch (E->getCastKind()) {
   case CK_Dynamic: {
+    // FIXME: Can this actually happen? We have no test coverage for it.
     assert(isa<CXXDynamicCastExpr>(E) && "CK_Dynamic without a dynamic_cast?");
-    LValue LV = CGF.EmitCheckedLValue(E->getSubExpr());
+    LValue LV = CGF.EmitCheckedLValue(E->getSubExpr(),
+                                      CodeGenFunction::CT_Load);
     // FIXME: Do we also need to handle property references here?
     if (LV.isSimple())
       CGF.EmitDynamicCast(LV.getAddress(), cast<CXXDynamicCastExpr>(E));
Index: lib/CodeGen/CGExprCXX.cpp
===================================================================
--- lib/CodeGen/CGExprCXX.cpp
+++ lib/CodeGen/CGExprCXX.cpp
@@ -33,6 +33,11 @@
   assert(MD->isInstance() &&
          "Trying to emit a member call expr on a static method!");
 
+  // C++11 [class.mfct.non-static]p2:
+  //   If a non-static member function of a class X is called for an object that
+  //   is not of type X, or of a type derived from X, the behavior is undefined.
+  EmitCheck(CT_MemberCall, This, getContext().getRecordType(MD->getParent()));
+
   CallArgList Args;
 
   // Push the this ptr.
@@ -337,6 +342,8 @@
   else 
     This = EmitLValue(BaseExpr).getAddress();
 
+  EmitCheck(CT_MemberCall, This, QualType(MPT->getClass(), 0));
+
   // Ask the ABI to load the callee.  Note that This is modified.
   llvm::Value *Callee =
     CGM.getCXXABI().EmitLoadOfMemberFunctionPointer(*this, This, MemFnPtr, MPT);
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -1834,8 +1834,30 @@
   void EmitStdInitializerListCleanup(llvm::Value *loc,
                                      const InitListExpr *init);
 
-  void EmitCheck(llvm::Value *, unsigned Size);
+  /// \brief Situations in which we might emit a check for the suitability of a
+  ///        pointer or glvalue.
+  enum CheckType {
+    /// Checking the operand of a load. Must be suitably sized and aligned.
+    CT_Load,
+    /// Checking the destination of a store. Must be suitably sized and aligned.
+    CT_Store,
+    /// Checking the bound value in a reference binding. Must be suitably sized
+    /// and aligned, but is not required to refer to an object (until the
+    /// reference is used), per core issue 453.
+    CT_ReferenceBinding,
+    /// Checking the object expression in a non-static data member access. Must
+    /// be an object within its lifetime.
+    CT_MemberAccess,
+    /// Checking the 'this' pointer for a call to a non-static member function.
+    /// Must be an object within its lifetime.
+    CT_MemberCall
+  };
 
+  /// EmitCheck - Emit a check that \p V is the address of storage of the
+  /// appropriate size and alignment for an object of type \p Type.
+  void EmitCheck(CheckType CT, llvm::Value *V,
+                 QualType Type, CharUnits Alignment = CharUnits::Zero());
+
   llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                        bool isInc, bool isPre);
   ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
@@ -2037,7 +2059,7 @@
   /// checking code to guard against undefined behavior.  This is only
   /// suitable when we know that the address will be used to access the
   /// object.
-  LValue EmitCheckedLValue(const Expr *E);
+  LValue EmitCheckedLValue(const Expr *E, CheckType CT);
 
   /// EmitToMemory - Change a scalar value from its value
   /// representation to its in-memory representation.
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to