Index: test/CodeGen/arm-abi-vector.c
===================================================================
--- test/CodeGen/arm-abi-vector.c	(revision 0)
+++ test/CodeGen/arm-abi-vector.c	(revision 0)
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi aapcs -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck -check-prefix=APCS-GNU %s
+
+#include <stdarg.h>
+
+typedef __attribute__(( ext_vector_type(2) ))  int __int2;
+
+// Passing legal vector types as varargs.
+double varargs_vec_2i(int fixed, ...) {
+// CHECK: varargs_vec_2i
+// CHECK: %c3 = alloca <2 x i32>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <2 x i32>*
+// APCS-GNU: varargs_vec_2i
+// APCS-GNU: %c3 = alloca <2 x i32>, align 8
+// APCS-GNU: %var.align = alloca <2 x i32>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <2 x i32>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <2 x i32>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __int2 c3 = va_arg(ap, __int2);
+  sum = sum + c3.x + c3.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_2i(__int2 *in) {
+// CHECK: test_2i
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+// APCS-GNU: test_2i
+// APCS-GNU: call double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+  return varargs_vec_2i(3, *in);
+}
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp	(revision 165846)
+++ lib/CodeGen/TargetInfo.cpp	(working copy)
@@ -3167,27 +3167,58 @@
   CGBuilderTy &Builder = CGF.Builder;
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
+
+  uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
+  uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
+
+  // The ABI alignment for vectors is 8 for AAPCS and 4 for APCS.
+  if (Ty->getAs<VectorType>() && Size >= 8) {
+    if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
+        getABIKind() == ARMABIInfo::AAPCS)
+      TyAlign = 8;
+    else
+      TyAlign = 4;
+  }
+
   // Handle address alignment for type alignment > 32 bits
-  uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
   if (TyAlign > 4) {
     assert((TyAlign & (TyAlign - 1)) == 0 &&
            "Alignment is not power of 2!");
     llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
     AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
     AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
-    Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
+    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
   }
-  llvm::Type *PTy =
-    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
 
   uint64_t Offset =
-    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 4);
+    llvm::RoundUpToAlignment(Size, 4);
   llvm::Value *NextAddr =
     Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
+  if (Ty->getAs<VectorType>() && ((Size == 8 && TyAlign < 8) ||
+           (Size >= 16 && TyAlign < 16))) {
+    // We can't directly cast ap.cur to pointer to a vector type, since ap.cur
+    // may not be correctly aligned for the vector type. We create an aligned
+    // temporary space and copy the content over from ap.cur to the temporary
+    // space. This is necessary if TyAlign < 8 for Size == 8, or
+    // TyAlign < 16 for Size >= 16.
+    llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
+    CharUnits CharSize = getContext().getTypeSizeInChars(Ty);
+    llvm::Value *AlignedTemp = CGF.CreateTempAlloca(CGF.ConvertType(Ty),
+                                                    "var.align");
+    llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
+    llvm::Value *Src = Builder.CreateBitCast(Addr, I8PtrTy);
+    Builder.CreateMemCpy(Dst, Src,
+        llvm::ConstantInt::get(CGF.IntPtrTy, CharSize.getQuantity()),
+        TyAlign, false);
+    Addr = AlignedTemp; //The content is in aligned location.
+  }
+  llvm::Type *PTy =
+    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
+
   return AddrTyped;
 }
 
