Index: test/CodeGenCXX/implicit-copy-constructor.cpp
===================================================================
--- test/CodeGenCXX/implicit-copy-constructor.cpp	(revision 172158)
+++ test/CodeGenCXX/implicit-copy-constructor.cpp	(working copy)
@@ -46,7 +46,7 @@
 // CHECK: call void @_ZN1AD1Ev
 // CHECK: call void @_ZN1AC2ERS_
 // CHECK: call void @_ZN1BC2ERS_
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 24}}
+// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 28}}
 // CHECK: call void @_ZN1BC1ERS_
 // CHECK: br
 // CHECK: {{icmp ult.*, 2}}
@@ -54,8 +54,7 @@
 // CHECK: call void @_ZN1AC1Ev
 // CHECK: call void @_ZN1CC1ERS_1A
 // CHECK: call void @_ZN1AD1Ev
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 288}}
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 12}}
+// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 300}}
 // CHECK: ret void
 
 
Index: test/CodeGenCXX/implicit-copy-assign-operator.cpp
===================================================================
--- test/CodeGenCXX/implicit-copy-assign-operator.cpp	(revision 172158)
+++ test/CodeGenCXX/implicit-copy-assign-operator.cpp	(working copy)
@@ -44,7 +44,7 @@
 // CHECK: {{call.*_ZN1AaSERS_}}
 // CHECK: {{call.*_ZN1BaSERS_}}
 // CHECK: {{call.*_ZN1CaSERKS_}}
-// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 24}}
+// CHECK: {{call void @llvm.memcpy.p0i8.p0i8.i64.*i64 28}}
 // CHECK: {{call.*_ZN1BaSERS_}}
 // CHECK: br
 // CHECK: {{call.*_ZN1CaSERKS_}}
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp	(revision 172158)
+++ lib/CodeGen/CodeGenFunction.cpp	(working copy)
@@ -546,6 +546,11 @@
     // The lambda "__invoke" function is special, because it forwards or
     // clones the body of the function call operator (but is actually static).
     EmitLambdaStaticInvokeFunction(cast<CXXMethodDecl>(FD));
+  } else if (FD->isImplicit() && isa<CXXMethodDecl>(FD) &&
+             cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator()) {
+    // Implicit copy-assignment gets the same special treatment as implicit
+    // copy-constructors.
+    EmitImplicitAssignmentOperatorBody(Args);
   }
   else
     EmitFunctionBody(Args);
Index: lib/CodeGen/CGExprAgg.cpp
===================================================================
--- lib/CodeGen/CGExprAgg.cpp	(revision 172158)
+++ lib/CodeGen/CGExprAgg.cpp	(working copy)
@@ -1380,6 +1380,24 @@
                        /*TBAATag=*/0, TBAAStructTag);
 }
 
+void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr,
+                                        llvm::Value *SrcPtr,
+                                        CharUnits Size, CharUnits Alignment){
+
+  llvm::PointerType *DPT = cast<llvm::PointerType>(DestPtr->getType());
+  llvm::Type *DBP =
+    llvm::Type::getInt8PtrTy(CGM.getLLVMContext(), DPT->getAddressSpace());
+  DestPtr = Builder.CreateBitCast(DestPtr, DBP);
+
+  llvm::PointerType *SPT = cast<llvm::PointerType>(SrcPtr->getType());
+  llvm::Type *SBP =
+    llvm::Type::getInt8PtrTy(CGM.getLLVMContext(), SPT->getAddressSpace());
+  SrcPtr = Builder.CreateBitCast(SrcPtr, SBP);
+
+  Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity(),
+                       Alignment.getQuantity());
+}
+
 void CodeGenFunction::MaybeEmitStdInitializerListCleanup(llvm::Value *loc,
                                                          const Expr *init) {
   const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(init);
Index: lib/CodeGen/CGClass.cpp
===================================================================
--- lib/CodeGen/CGClass.cpp	(revision 172158)
+++ lib/CodeGen/CGClass.cpp	(working copy)
@@ -18,6 +18,7 @@
 #include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Frontend/CodeGenOptions.h"
 
 using namespace clang;
@@ -761,6 +762,299 @@
     ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 }
 
+namespace {
+  class FieldMemcpyizer {
+  public:
+    FieldMemcpyizer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl,
+                    const VarDecl *SrcRec)
+      : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), 
+        RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
+        FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0),
+        LastAddedFieldIndex(0) { }
+
+    CodeGenFunction& GetCGF() { return CGF; }
+
+    const CodeGenFunction& GetCGF() const { return CGF; }
+
+    static bool isMemcpyableField(FieldDecl *F) {
+      QualType FieldType = F->getType();
+      if (FieldType.isVolatileQualified() || FieldType.isObjCGCWeak() ||
+          FieldType.isObjCGCStrong() ||
+          FieldType.hasStrongOrWeakObjCLifetime())
+        return false;
+      return true;
+    }
+
+    void AddMemcpyableField(FieldDecl *F) {
+      if (FirstField == 0) {
+        FirstField = F;
+        LastField = F;
+        FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+        LastFieldOffset = FirstFieldOffset;
+        LastAddedFieldIndex = F->getFieldIndex();
+        return;
+      }
+
+      assert(F->getFieldIndex() == LastAddedFieldIndex + 1 &&
+             "Cannot aggregate non-contiguous fields.");
+      LastAddedFieldIndex = F->getFieldIndex();
+
+      // The 'first' and 'last' fields are chosen by offset, rather than field
+      // index. This allows the code to support bitfields, as well as regular
+      // fields.
+      uint64_t FOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+      if (FOffset < FirstFieldOffset) {
+        FirstField = F;
+        FirstFieldOffset = FOffset;
+      }
+      if (FOffset > LastFieldOffset) {
+        LastField = F;
+        LastFieldOffset = FOffset;
+      }
+    }
+
+    CharUnits GetMemcpySize() const {
+      unsigned LastFieldSize =
+        CGF.getContext().getTypeInfo(LastField->getType()).first; 
+      uint64_t MemcpySizeBits =
+        LastFieldOffset + LastFieldSize - FirstFieldOffset;
+      CharUnits MemcpySize =
+        CGF.getContext().toCharUnitsFromBits(MemcpySizeBits);
+      return MemcpySize;
+    }
+
+    void EmitMemcpy() {
+      // Give the subclass a chance to bail out if it feels the memcpy isn't
+      // worth it (e.g. Hasn't aggregated enough data).
+      if (FirstField == 0 || BailOut()) {
+        FirstField = 0;
+        return;
+      }
+
+      unsigned FirstFieldAlign =
+        CGF.getContext().getTypeInfo(FirstField->getType()).second;
+      assert(FirstFieldOffset % FirstFieldAlign == 0 && "Bad field alignment.");
+      CharUnits Alignment =
+        CGF.getContext().toCharUnitsFromBits(FirstFieldAlign);
+      CharUnits MemcpySize = GetMemcpySize();
+      QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+      llvm::Value *ThisPtr = CGF.LoadCXXThis();
+      LValue DestLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+      LValue Dest = CGF.EmitLValueForFieldInitialization(DestLV, FirstField);
+      llvm::Value *SrcPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(SrcRec));
+      LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy);
+      LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField);
+
+      CGF.EmitAggregateCopy(Dest.getAddress(), Src.getAddress(),
+                            MemcpySize, Alignment);
+      FirstField = 0; // Reset FirstField.
+    }
+
+    virtual bool BailOut() = 0;
+
+  private:
+    CodeGenFunction &CGF;
+    const CXXRecordDecl *ClassDecl;
+    const VarDecl *SrcRec;
+    const ASTRecordLayout &RecLayout;
+    FieldDecl *FirstField;
+    FieldDecl *LastField;
+    uint64_t FirstFieldOffset, LastFieldOffset;
+    unsigned LastAddedFieldIndex;
+  };
+
+  class ConstructorMemcpyizer : public FieldMemcpyizer {
+  private:
+
+    // Get source argument for copy constructor. Returns null if not a copy
+    // constructor. 
+    static const VarDecl* GetCCSrc(const CXXConstructorDecl *CD,
+                                   FunctionArgList &Args) {
+      if (CD->isCopyOrMoveConstructor() && CD->isImplicitlyDefined())
+        return Args[Args.size() - 1];
+      return 0; 
+    }
+
+    // Returns true if a CXXCtorInitializer represents a member initialization
+    // that can be rolled into a memcpy.
+    bool MemberInitIsMemcpyable(CXXCtorInitializer *MemberInit) const {
+      if (!MemcpyableCtor)
+        return false;
+      FieldDecl *Field = MemberInit->getMember();
+      assert(Field != 0 && "No field for member init.");
+      QualType FieldType = Field->getType();
+      CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
+
+      // Bail out on non-POD, not-trivially-constructable members.
+      if (!FieldType.isPODType(GetCGF().getContext()) &&
+          (!CE || !CE->getConstructor()->isTrivial()))
+        return false;
+
+      // Bail out on bitfields, volatile fields, and strong and weak fields.
+      if (!isMemcpyableField(Field))
+        return false;
+
+      // Otherwise we're good.
+      return true;
+    }
+
+  public:
+    ConstructorMemcpyizer(CodeGenFunction &CGF, const CXXConstructorDecl *CD,
+                          FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, CD->getParent(), GetCCSrc(CD, Args)),
+        ConstructorDecl(CD),
+        MemcpyableCtor(CD->isImplicitlyDefined() &&
+                       CD->isCopyOrMoveConstructor() &&
+                       CGF.getLangOpts().getGC() == LangOptions::NonGC),
+        Args(Args) { }
+
+    void EmitConstructorCopy(CXXCtorInitializer *MemberInit) {
+      if (MemberInitIsMemcpyable(MemberInit)) {
+        AggregatedInits.push_back(MemberInit);
+        AddMemcpyableField(MemberInit->getMember());
+      } else {  
+        // If MemberInit can't be rolled into the memcpy, emit a memcpy for the
+        // currently aggregated fields, then emit an initializer for MemberInit.
+        EmitMemcpy();
+        AggregatedInits.clear();
+        EmitMemberInitializer(GetCGF(), ConstructorDecl->getParent(), MemberInit,
+                              ConstructorDecl, Args);
+      }
+    }
+
+    virtual bool BailOut() {
+      if (AggregatedInits.size() == 1) {
+        // If we haven't aggregated sufficient inits, fall back on normal
+        // initializer codegen.
+        for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+          EmitMemberInitializer(GetCGF(), ConstructorDecl->getParent(),
+                                AggregatedInits[i], ConstructorDecl, Args);
+        }
+        return true;
+      }
+      // We have aggregated enough inits to emit the memcpy. No bailing out now. 
+      return false;
+    }
+
+    void Finish() {
+      EmitMemcpy();
+    }
+
+  private:
+    const CXXConstructorDecl *ConstructorDecl;
+    bool MemcpyableCtor;
+    FunctionArgList &Args;
+    SmallVector<CXXCtorInitializer*, 16> AggregatedInits;
+  };
+
+  class AssignmentMemcpyizer : public FieldMemcpyizer {
+  private:
+
+    std::pair<bool, FieldDecl*> AssignmentIsMemcpyable(Stmt *S) {
+      std::pair<bool, FieldDecl*> NoMatch(false, 0);
+
+      if (!AssignmentsMemcpyable)
+        return NoMatch;
+
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
+        if (BO->getOpcode() != BO_Assign)
+          return NoMatch;
+        MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS());
+        if (!ME)
+          return NoMatch;
+        FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return NoMatch;
+        Stmt *RHS = BO->getRHS();
+        if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS))
+          RHS = EC->getSubExpr();
+        if (!RHS)
+          return NoMatch;
+        MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
+        if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
+          return NoMatch;
+
+        return std::make_pair(true, Field);
+
+      } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
+        FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl());
+        if (!FD)
+          return NoMatch;
+        if (FD->getBuiltinID() != Builtin::BI__builtin_memcpy)
+          return NoMatch;
+        Expr *DstPtr = CE->getArg(0);
+        if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr))
+          DstPtr = DC->getSubExpr();
+        UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr);
+        if (!DUO || DUO->getOpcode() != UO_AddrOf)
+          return NoMatch;
+        MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr());
+        if (!ME)
+          return NoMatch;
+        FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+        if (!Field || !isMemcpyableField(Field))
+          return NoMatch;
+        Expr *SrcPtr = CE->getArg(1);
+        if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr))
+          SrcPtr = SC->getSubExpr();
+        UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr);
+        if (!SUO || SUO->getOpcode() != UO_AddrOf)
+          return NoMatch;
+        MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr());
+        if (!ME2)
+          return NoMatch;
+        if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
+          return NoMatch;
+        
+        return std::make_pair(true, Field);
+
+      }
+
+      return NoMatch;
+    }
+
+    bool AssignmentsMemcpyable;
+    SmallVector<Stmt*, 16> AggregatedStmts;
+
+  public:
+
+    AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD,
+                         FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]),
+        AssignmentsMemcpyable(CGF.getLangOpts().getGC() == LangOptions::NonGC) {
+      assert(Args.size() == 2);
+    }
+
+    void EmitAssignment(Stmt *S) {
+      std::pair<bool, FieldDecl*> A = AssignmentIsMemcpyable(S);
+      if (A.first) {
+        AddMemcpyableField(A.second);
+        AggregatedStmts.push_back(S);
+      } else {  
+        // If MemberInit can't be rolled into the memcpy, emit a memcpy for the
+        // currently aggregated fields, then emit an initializer for MemberInit.
+        EmitMemcpy();
+        AggregatedStmts.clear();
+        GetCGF().EmitStmt(S);
+      }
+    }
+
+    void Finish() {
+      EmitMemcpy();
+    }
+
+    virtual bool BailOut() {
+      if (AggregatedStmts.size() == 1) {
+        for (unsigned i = 0; i < AggregatedStmts.size(); ++i)
+          GetCGF().EmitStmt(AggregatedStmts[i]);
+        return true;
+      }
+      return false;
+    }
+  };
+
+}
+
 /// EmitCtorPrologue - This routine generates necessary code to initialize
 /// base classes and non-static data members belonging to this constructor.
 void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
@@ -789,8 +1083,10 @@
 
   InitializeVTablePointers(ClassDecl);
 
+  ConstructorMemcpyizer CM(*this, CD, Args);
   for (unsigned I = 0, E = MemberInitializers.size(); I != E; ++I)
-    EmitMemberInitializer(*this, ClassDecl, MemberInitializers[I], CD, Args);
+    CM.EmitConstructorCopy(MemberInitializers[I]);
+  CM.Finish();
 }
 
 static bool
@@ -958,6 +1254,26 @@
     ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 }
 
+void CodeGenFunction::EmitImplicitAssignmentOperatorBody(FunctionArgList &Args){
+  const CXXMethodDecl *AssignOp = cast<CXXMethodDecl>(CurGD.getDecl());
+  const Stmt *RootS = AssignOp->getBody();
+  assert(RootS->getStmtClass() == Stmt::CompoundStmtClass &&
+         "Body of an implicit assignment operator should be compound stmt.");
+  const CompoundStmt &RootCS = cast<CompoundStmt>(*RootS);
+
+  LexicalScope Scope(*this, RootCS.getSourceRange());
+
+  AssignmentMemcpyizer AM(*this, AssignOp, Args);
+  for (CompoundStmt::const_body_iterator I = RootCS.body_begin(),
+                                         E = RootCS.body_end();
+       I != E; ++I) {
+    AM.EmitAssignment(*I);  
+  }
+  AM.Finish();
+
+//  EmitFunctionBody(Args);
+}
+
 namespace {
   /// Call the operator delete associated with the current destructor.
   struct CallDtorDelete : EHScopeStack::Cleanup {
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h	(revision 172158)
+++ lib/CodeGen/CodeGenFunction.h	(working copy)
@@ -1397,6 +1397,7 @@
 
   void EmitConstructorBody(FunctionArgList &Args);
   void EmitDestructorBody(FunctionArgList &Args);
+  void EmitImplicitAssignmentOperatorBody(FunctionArgList &Args);
   void EmitFunctionBody(FunctionArgList &Args);
 
   void EmitForwardingCallToLambda(const CXXRecordDecl *Lambda,
@@ -1683,6 +1684,10 @@
                          CharUnits Alignment = CharUnits::Zero(),
                          bool isAssignment = false);
 
+  /// EmitAggregateCopy - Emit an aggregate copy.
+  void EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr,
+                         CharUnits Size, CharUnits Alignment);
+
   /// StartBlock - Start new block named N. If insert block is a dummy block
   /// then reuse it.
   void StartBlock(const char *N);
