Index: test/CodeGen/arm-homogenous.c
===================================================================
--- test/CodeGen/arm-homogenous.c	(revision 166031)
+++ test/CodeGen/arm-homogenous.c	(working copy)
@@ -156,6 +156,23 @@
 }
 // CHECK: declare arm_aapcs_vfpcc %union.union_with_struct_with_fundamental_elems @returns_union_with_struct_with_fundamental_elems()
 
+// Make sure HAs that can be partially fit into VFP registers will be allocated
+// on stack and that later VFP candidates will go on stack as well.
+typedef struct {
+  double x;
+  double a2;
+  double a3;
+  double a4;
+} struct_of_four_doubles;
+extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d);
+struct_of_four_doubles g_s4d;
+
+void test_struct_of_four_doubles(void) {
+// CHECK: test_struct_of_four_doubles
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+  takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
+}
+
 // FIXME: Tests necessary:
 //         - Vectors
 //         - C++ stuff
Index: lib/CodeGen/ABIInfo.h
===================================================================
--- lib/CodeGen/ABIInfo.h	(revision 166031)
+++ lib/CodeGen/ABIInfo.h	(working copy)
@@ -108,8 +108,8 @@
                                   , bool Realign = false) {
       return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, true, 0);
     }
-    static ABIArgInfo getExpand() {
-      return ABIArgInfo(Expand, 0, 0, false, false, false, 0);
+    static ABIArgInfo getExpand(llvm::Type *Padding = 0) {
+      return ABIArgInfo(Expand, 0, 0, false, false, false, Padding);
     }
 
     Kind getKind() const { return TheKind; }
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp	(revision 166052)
+++ lib/CodeGen/TargetInfo.cpp	(working copy)
@@ -2798,7 +2798,8 @@
   ABIKind getABIKind() const { return Kind; }
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
+                                  bool &IsHA) const;
   bool isIllegalVectorType(QualType Ty) const;
 
   virtual void computeInfo(CGFunctionInfo &FI) const;
@@ -2842,10 +2843,32 @@
 }
 
 void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  // To correctly handle Homogeneous Aggregate, we need to keep track of the
+  // number of VFP registers allocated so far.
+  // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
+  // VFP registers of the appropriate type unallocated then the argument is
+  // allocated to the lowest-numbered sequence of such registers.
+  // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
+  // unallocated are marked as unavailable. 
+  unsigned AllocatedVFP = 0;
   FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classifyArgumentType(it->type);
+       it != ie; ++it) {
+    unsigned PreAllocation = AllocatedVFP;
+    bool IsHA = false;
+    // 6.1.2.3 There is one VFP co-processor register class using registers
+    // s0-s15 (d0-d7) for passing arguments.
+    const unsigned NumVFPs = 16;
+    it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA);
+    // If we do not have enough VFP registers for the HA, any VFP registers
+    // that are unallocated are marked as unavailable. To achieve this, we add
+    // padding of (NumVFPs - PreAllocation) floats.
+    if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
+      llvm::Type *PaddingTy = llvm::ArrayType::get(
+          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
+      it->info = ABIArgInfo::getExpand(PaddingTy);
+    }
+  }
 
   // Always honor user-specified calling convention.
   if (FI.getCallingConvention() != llvm::CallingConv::C)
@@ -2945,7 +2968,17 @@
   return (Members > 0 && Members <= 4);
 }
 
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP,
+                                            bool &IsHA) const {
+  // We update number of allocated VFPs according to
+  // 6.1.2.1 The following argument types are VFP CPRCs:
+  //   A single-precision floating-point type (including promoted
+  //   half-precision types); A double-precision floating-point type;
+  //   A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
+  //   with a Base Type of a single- or double-precision floating-point type,
+  //   64-bit containerized vectors or 128-bit containerized vectors with one
+  //   to four Elements.
+
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
@@ -2957,15 +2990,38 @@
     if (Size == 64) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 2);
+      // Align AllocatedVFP to an even number to use a D register.
+      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
+      AllocatedVFP += 2; // 1 D register = 2 S registers
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 4);
+      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4);
+      AllocatedVFP += 4; // 1 Q register = 4 S registers
       return ABIArgInfo::getDirect(ResType);
     }
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
   }
+  // Update AllocatedVFP for legal vector types.
+  if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    uint64_t Size = getContext().getTypeSize(VT);
+    // Size of a legal vector should be power of 2 and above 64.
+    AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : 2);
+    AllocatedVFP += (Size / 32);
+  }
+  // Update AllocatedVFP for floating point types.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->getKind() == BuiltinType::Half ||
+        BT->getKind() == BuiltinType::Float)
+      AllocatedVFP += 1;
+    if (BT->getKind() == BuiltinType::Double ||
+        BT->getKind() == BuiltinType::LongDouble) {
+      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
+      AllocatedVFP += 2;
+    }
+  }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
@@ -2986,10 +3042,28 @@
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
   if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
-    // Homogeneous Aggregates need to be expanded.
+    // Homogeneous Aggregates need to be expanded when we can fit the aggregate
+    // into VFP registers.
     const Type *Base = 0;
-    if (isHomogeneousAggregate(Ty, Base, getContext())) {
+    uint64_t Members = 0;
+    if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
+      // Base can be a floating-point or a vector.
+      if (Base->isVectorType()) {
+        // ElementSize is in number of floats.
+        unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
+        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP,
+                       ElementSize);
+        AllocatedVFP += Members * ElementSize;
+      } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
+        AllocatedVFP += Members;
+      else {
+        assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
+               Base->isSpecificBuiltinType(BuiltinType::LongDouble));
+        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
+        AllocatedVFP += Members * 2; // Base type is double.
+      }
+      IsHA = true;
       return ABIArgInfo::getExpand();
     }
   }
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp	(revision 166031)
+++ lib/CodeGen/CGCall.cpp	(working copy)
@@ -894,6 +894,8 @@
     }
 
     case ABIArgInfo::Expand:
+      if (llvm::Type *PaddingType = argAI.getPaddingType())
+        argTypes.push_back(PaddingType);
       GetExpandedTypes(it->type, argTypes);
       break;
     }
@@ -1063,6 +1065,9 @@
       continue;
 
     case ABIArgInfo::Expand: {
+      // Increment Index if there is padding.
+      Index += (AI.getPaddingType() != 0);
+
       SmallVector<llvm::Type*, 8> types;
       // FIXME: This is rather inefficient. Do we ever actually need to do
       // anything here? The result should be just reconstructed on the other
@@ -1290,6 +1295,10 @@
     }
 
     case ABIArgInfo::Expand: {
+      // Skip the dummy padding argument.
+      if (ArgI.getPaddingType())
+        ++AI;
+
       // If this structure was expanded into multiple arguments then
       // we need to create a temporary and reconstruct it from the
       // arguments.
@@ -2118,6 +2127,12 @@
     }
 
     case ABIArgInfo::Expand:
+      // Insert a padding argument to ensure proper alignment.
+      if (llvm::Type *PaddingType = ArgInfo.getPaddingType()) {
+        Args.push_back(llvm::UndefValue::get(PaddingType));
+        ++IRArgNo;
+      }
+
       ExpandTypeToArgs(I->Ty, RV, Args, IRFuncTy);
       IRArgNo = Args.size();
       break;
