jfb updated this revision to Diff 283406.
jfb marked 5 inline comments as done.
jfb added a comment.

Remove restrict, update docs, call isCompleteType


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79279/new/

https://reviews.llvm.org/D79279

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/DiagnosticASTKinds.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ExprConstant.cpp
  clang/lib/CodeGen/CGBuilder.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtin-overloaded-memfns.c
  clang/test/CodeGen/ubsan-builtin-checks.c
  clang/test/CodeGen/ubsan-builtin-ctz-clz.c
  clang/test/CodeGen/ubsan-builtin-mem_overloaded.c
  clang/test/CodeGenObjC/builtin-memfns.m
  clang/test/Sema/builtin-overloaded-memfns.cpp
  clang/test/SemaCXX/constexpr-string.cpp
  compiler-rt/lib/ubsan/ubsan_handlers.cpp
  compiler-rt/lib/ubsan/ubsan_handlers.h
  compiler-rt/test/ubsan/TestCases/Misc/builtins-ctz-clz.cpp
  compiler-rt/test/ubsan/TestCases/Misc/builtins-mem_overloaded.cpp
  compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp

Index: compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp
===================================================================
--- compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-// REQUIRES: arch=x86_64
-//
-// RUN: %clangxx -fsanitize=builtin -w %s -O3 -o %t
-// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=RECOVER
-// RUN: %clangxx -fsanitize=builtin -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort
-// RUN: not %run %t.abort 2>&1 | FileCheck %s --check-prefix=ABORT
-
-void check_ctz(int n) {
-  // ABORT: builtins.cpp:[[@LINE+2]]:17: runtime error: passing zero to ctz(), which is not a valid argument
-  // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to ctz(), which is not a valid argument
-  __builtin_ctz(n);
-
-  // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to ctz(), which is not a valid argument
-  __builtin_ctzl(n);
-
-  // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to ctz(), which is not a valid argument
-  __builtin_ctzll(n);
-}
-
-void check_clz(int n) {
-  // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to clz(), which is not a valid argument
-  __builtin_clz(n);
-
-  // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to clz(), which is not a valid argument
-  __builtin_clzl(n);
-
-  // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to clz(), which is not a valid argument
-  __builtin_clzll(n);
-}
-
-int main() {
-  check_ctz(0);
-  check_clz(0);
-  return 0;
-}
Index: compiler-rt/test/ubsan/TestCases/Misc/builtins-mem_overloaded.cpp
===================================================================
--- /dev/null
+++ compiler-rt/test/ubsan/TestCases/Misc/builtins-mem_overloaded.cpp
@@ -0,0 +1,93 @@
+// RUN: %clangxx -fsanitize=builtin -w %s -O3 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+using uintptr_t = __UINTPTR_TYPE__;
+using size_t = __SIZE_TYPE__;
+
+void check_memcpy_align(char *dst_aligned, char *dst_misaligned, const char *src_aligned, const char *src_misaligned, size_t sz) {
+  // OK.
+  __builtin_memcpy_overloaded(dst_aligned, src_aligned, sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:31: runtime error: passing pointer 0x{{[0-9a-f]*}} with invalid alignment 1 into __builtin_mem*_overloaded, element size 2
+  __builtin_memcpy_overloaded(dst_misaligned, src_aligned, sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:44: runtime error: passing pointer 0x{{[0-9a-f]*}} with invalid alignment 1 into __builtin_mem*_overloaded, element size 2
+  __builtin_memcpy_overloaded(dst_aligned, src_misaligned, sz, 2);
+}
+
+void check_memmove_align(char *dst_aligned, char *dst_misaligned, const char *src_aligned, const char *src_misaligned, size_t sz) {
+  // OK.
+  __builtin_memmove_overloaded(dst_aligned, src_aligned, sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:32: runtime error: passing pointer 0x{{[0-9a-f]*}} with invalid alignment 1 into __builtin_mem*_overloaded, element size 2
+  __builtin_memmove_overloaded(dst_misaligned, src_aligned, sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:45: runtime error: passing pointer 0x{{[0-9a-f]*}} with invalid alignment 1 into __builtin_mem*_overloaded, element size 2
+  __builtin_memmove_overloaded(dst_aligned, src_misaligned, sz, 2);
+}
+
+void check_memset_align(char *dst_aligned, char *dst_misaligned, size_t sz) {
+  // OK.
+  __builtin_memset_overloaded(dst_aligned, 0, sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:31: runtime error: passing pointer 0x{{[0-9a-f]*}} with invalid alignment 1 into __builtin_mem*_overloaded, element size 2
+  __builtin_memset_overloaded(dst_misaligned, 0, sz, 2);
+}
+
+void check_memcpy_size(char *dst, char *src) {
+  // OK.
+  __builtin_memcpy_overloaded(dst, src, 32, 2);
+  __builtin_memcpy_overloaded(dst, src, 2, 2);
+  __builtin_memcpy_overloaded(dst, src, 0, 2);
+  volatile size_t small_bad_sz = 1;
+  volatile size_t big_bad_sz = 43;
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:41: runtime error: passing an invalid size 1 with element size 2 to __builtin_mem*_overloaded
+  __builtin_memcpy_overloaded(dst, src, small_bad_sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:41: runtime error: passing an invalid size 43 with element size 2 to __builtin_mem*_overloaded
+  __builtin_memcpy_overloaded(dst, src, big_bad_sz, 2);
+}
+
+void check_memmove_size(char *dst, char *src) {
+  // OK.
+  __builtin_memmove_overloaded(dst, src, 32, 2);
+  __builtin_memmove_overloaded(dst, src, 2, 2);
+  __builtin_memmove_overloaded(dst, src, 0, 2);
+  volatile size_t small_bad_sz = 1;
+  volatile size_t big_bad_sz = 43;
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:42: runtime error: passing an invalid size 1 with element size 2 to __builtin_mem*_overloaded
+  __builtin_memmove_overloaded(dst, src, small_bad_sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:42: runtime error: passing an invalid size 43 with element size 2 to __builtin_mem*_overloaded
+  __builtin_memmove_overloaded(dst, src, big_bad_sz, 2);
+}
+
+void check_memset_size(char *dst) {
+  // OK.
+  __builtin_memset_overloaded(dst, 0, 32, 2);
+  __builtin_memset_overloaded(dst, 0, 2, 2);
+  __builtin_memset_overloaded(dst, 0, 0, 2);
+  volatile size_t small_bad_sz = 1;
+  volatile size_t big_bad_sz = 43;
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:39: runtime error: passing an invalid size 1 with element size 2 to __builtin_mem*_overloaded
+  __builtin_memset_overloaded(dst, 0, small_bad_sz, 2);
+  // CHECK: builtins-mem_overloaded.cpp:[[@LINE+1]]:39: runtime error: passing an invalid size 43 with element size 2 to __builtin_mem*_overloaded
+  __builtin_memset_overloaded(dst, 0, big_bad_sz, 2);
+}
+
+int main() {
+  char dst0[128] = {0};
+  char dst1[128] = {0};
+  char src0[128] = {0};
+  char src1[128] = {0};
+  char *dst_aligned = ((uintptr_t)dst0 & 0x1) ? (dst0 + 1) : dst0;
+  char *dst_misaligned = ((uintptr_t)dst1 & 0x1) ? dst1 : (dst1 + 1);
+  char *src_aligned = ((uintptr_t)src0 & 0x1) ? (src0 + 1) : src0;
+  char *src_misaligned = ((uintptr_t)src1 & 0x1) ? src1 : (src1 + 1);
+  check_memcpy_align(dst_aligned, dst_misaligned, src_aligned, src_misaligned, 32);
+  check_memmove_align(dst_aligned, dst_misaligned, src_aligned, src_misaligned, 32);
+  check_memset_align(dst_aligned, dst_misaligned, 32);
+  check_memcpy_size(dst_aligned, src_aligned);
+  check_memmove_size(dst_aligned, src_aligned);
+  check_memset_size(dst_aligned);
+
+  return 0;
+}
+
+// The stubs don't actually need to do anything since we're not checking their behavior.
+extern "C" void *__llvm_memcpy_element_unordered_atomic_2(void *dst, void *src, size_t sz) { return nullptr; }
+extern "C" void *__llvm_memmove_element_unordered_atomic_2(void *, void *, size_t) { return nullptr; }
+extern "C" void *__llvm_memset_element_unordered_atomic_2(volatile short *, int, size_t) { return nullptr; }
Index: compiler-rt/test/ubsan/TestCases/Misc/builtins-ctz-clz.cpp
===================================================================
--- /dev/null
+++ compiler-rt/test/ubsan/TestCases/Misc/builtins-ctz-clz.cpp
@@ -0,0 +1,44 @@
+// REQUIRES: arch=x86_64
+//
+// RUN: %clangxx -fsanitize=builtin -w %s -O3 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=RECOVER
+// RUN: %clangxx -fsanitize=builtin -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort
+// RUN: not %run %t.abort 2>&1 | FileCheck %s --check-prefix=ABORT
+
+void check_ctz(int n) {
+  // ABORT: builtins-ctz-clz.cpp:[[@LINE+2]]:17: runtime error: passing zero to ctz(), which is not a valid argument
+  // RECOVER: builtins-ctz-clz.cpp:[[@LINE+1]]:17: runtime error: passing zero to ctz(), which is not a valid argument
+  __builtin_ctz(n);
+
+  // RECOVER: builtins-ctz-clz.cpp:[[@LINE+1]]:18: runtime error: passing zero to ctz(), which is not a valid argument
+  __builtin_ctzl(n);
+
+  // RECOVER: builtins-ctz-clz.cpp:[[@LINE+1]]:19: runtime error: passing zero to ctz(), which is not a valid argument
+  __builtin_ctzll(n);
+}
+
+void check_clz(int n) {
+  // RECOVER: builtins-ctz-clz.cpp:[[@LINE+1]]:17: runtime error: passing zero to clz(), which is not a valid argument
+  __builtin_clz(n);
+
+  // RECOVER: builtins-ctz-clz.cpp:[[@LINE+1]]:18: runtime error: passing zero to clz(), which is not a valid argument
+  __builtin_clzl(n);
+
+  // RECOVER: builtins-ctz-clz.cpp:[[@LINE+1]]:19: runtime error: passing zero to clz(), which is not a valid argument
+  __builtin_clzll(n);
+}
+
+void check_recoverable(int n) {
+  // Print out only once.
+  for (int i = 0; i != 100; ++i) {
+    // RECOVER: builtins-ctz-clz.cpp:[[@LINE+1]]:19: runtime error: passing zero to clz(), which is not a valid argument
+    __builtin_clz(n);
+  }
+}
+
+int main() {
+  check_ctz(0);
+  check_clz(0);
+  check_recoverable(0);
+  return 0;
+}
Index: compiler-rt/lib/ubsan/ubsan_handlers.h
===================================================================
--- compiler-rt/lib/ubsan/ubsan_handlers.h
+++ compiler-rt/lib/ubsan/ubsan_handlers.h
@@ -155,18 +155,22 @@
 
 /// Known builtin check kinds.
 /// Keep in sync with the enum of the same name in CodeGenFunction.h
-enum BuiltinCheckKind : unsigned char {
-  BCK_CTZPassedZero,
-  BCK_CLZPassedZero,
+enum BuiltinCheck : unsigned char {
+  CTZPassedZero,
+  CLZPassedZero,
+  AtomicMemMisaligned,
+  AtomicMemMismatchedSize,
 };
 
 struct InvalidBuiltinData {
   SourceLocation Loc;
   unsigned char Kind;
+  unsigned char UnusedPadding;
+  unsigned ElementSize;
 };
 
 /// Handle a builtin called in an invalid way.
-RECOVERABLE(invalid_builtin, InvalidBuiltinData *Data)
+RECOVERABLE(invalid_builtin, InvalidBuiltinData *Data, ValueHandle PtrOrSize)
 
 struct InvalidObjCCast {
   SourceLocation Loc;
Index: compiler-rt/lib/ubsan/ubsan_handlers.cpp
===================================================================
--- compiler-rt/lib/ubsan/ubsan_handlers.cpp
+++ compiler-rt/lib/ubsan/ubsan_handlers.cpp
@@ -617,7 +617,8 @@
   Die();
 }
 
-static void handleInvalidBuiltin(InvalidBuiltinData *Data, ReportOptions Opts) {
+static void handleInvalidBuiltin(InvalidBuiltinData *Data, ReportOptions Opts,
+                                 uptr PtrOrSize) {
   SourceLocation Loc = Data->Loc.acquire();
   ErrorType ET = ErrorType::InvalidBuiltin;
 
@@ -626,18 +627,42 @@
 
   ScopedReport R(Opts, Loc, ET);
 
-  Diag(Loc, DL_Error, ET,
-       "passing zero to %0, which is not a valid argument")
-    << ((Data->Kind == BCK_CTZPassedZero) ? "ctz()" : "clz()");
+  switch (Data->Kind) {
+  case BuiltinCheck::CTZPassedZero:
+    Diag(Loc, DL_Error, ET,
+         "passing zero to ctz(), which is not a valid argument");
+    break;
+  case BuiltinCheck::CLZPassedZero:
+    Diag(Loc, DL_Error, ET,
+         "passing zero to clz(), which is not a valid argument");
+    break;
+  case BuiltinCheck::AtomicMemMisaligned:
+    Diag(Loc, DL_Error, ET,
+         "passing pointer %0 with invalid alignment %1 into "
+         "__builtin_mem*_overloaded, element size %2")
+        << (void *)PtrOrSize << ((Data->ElementSize - 1) & PtrOrSize)
+        << Data->ElementSize;
+    break;
+  case BuiltinCheck::AtomicMemMismatchedSize:
+    Diag(Loc, DL_Error, ET,
+         "passing an invalid size %0 with element size %1 to "
+         "__builtin_mem*_overloaded")
+        << PtrOrSize << Data->ElementSize;
+    break;
+  default:
+    UNREACHABLE("unexpected builtin kind!");
+  }
 }
 
-void __ubsan::__ubsan_handle_invalid_builtin(InvalidBuiltinData *Data) {
-  GET_REPORT_OPTIONS(true);
-  handleInvalidBuiltin(Data, Opts);
+void __ubsan::__ubsan_handle_invalid_builtin(InvalidBuiltinData *Data,
+                                             uptr PtrOrSize) {
+  GET_REPORT_OPTIONS(false);
+  handleInvalidBuiltin(Data, Opts, PtrOrSize);
 }
-void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data) {
+void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data,
+                                                   uptr PtrOrSize) {
   GET_REPORT_OPTIONS(true);
-  handleInvalidBuiltin(Data, Opts);
+  handleInvalidBuiltin(Data, Opts, PtrOrSize);
   Die();
 }
 
Index: clang/test/SemaCXX/constexpr-string.cpp
===================================================================
--- clang/test/SemaCXX/constexpr-string.cpp
+++ clang/test/SemaCXX/constexpr-string.cpp
@@ -675,4 +675,25 @@
     return true;
   }
   static_assert(test_address_of_incomplete_struct_type()); // expected-error {{constant}} expected-note {{in call}}
+
+  template <typename T, int ElNum>
+  constexpr auto test_memcpy_overloaded(int dst_off, int src_off, int num) {
+    T dst[4] = {0, 0, 0, 0};
+    const T src[4] = {1, 2, 3, 4};
+    // expected-note@+2 {{size parameter is 12, expected a size that is evenly divisible by element size 8}}
+    // expected-note@+1 {{size parameter is 4, expected a size that is evenly divisible by element size 8}}
+    __builtin_memcpy_overloaded(dst + dst_off, src + src_off, num * sizeof(T), ElNum * sizeof(T));
+    return result(dst);
+  }
+
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 1) == 1000);
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 2) == 1200);
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 3) == 1230);
+  static_assert(test_memcpy_overloaded<int, 1>(0, 0, 4) == 1234);
+  static_assert(test_memcpy_overloaded<int, 2>(0, 0, 4) == 1234);
+
+  // expected-error@+1 {{static_assert expression is not an integral constant expression}}
+  static_assert(test_memcpy_overloaded<int, 2>(0, 0, 3) == 1234); // expected-note {{in call to 'test_memcpy_overloaded(0, 0, 3)'}}
+  // expected-error@+1 {{static_assert expression is not an integral constant expression}}
+  static_assert(test_memcpy_overloaded<int, 2>(0, 0, 1) == 1234); // expected-note {{in call to 'test_memcpy_overloaded(0, 0, 1)'}}
 }
Index: clang/test/Sema/builtin-overloaded-memfns.cpp
===================================================================
--- /dev/null
+++ clang/test/Sema/builtin-overloaded-memfns.cpp
@@ -0,0 +1,258 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only -triple=arm64-unknown-unknown -fms-extensions -DCPY=1
+// RUN: %clang_cc1 %s -verify -fsyntax-only -triple=arm64-unknown-unknown -fms-extensions -DCPY=0
+
+// Test memcpy and memmove with the same code, since they're basically the same constraints.
+#if CPY
+#define MEM(...) __builtin_memcpy_overloaded(__VA_ARGS__)
+#else
+#define MEM(...) __builtin_memmove_overloaded(__VA_ARGS__)
+#endif
+
+#define NULL (void *)0
+#define nullptr __nullptr
+using size_t = __SIZE_TYPE__;
+using sizeless_t = __SVInt8_t;
+using float4 = float __attribute__((ext_vector_type(4)));
+struct Intish {
+  int i;
+};
+struct NotLockFree {
+  char buf[512];
+};
+struct TrivialCpy {
+  char buf[8];
+  TrivialCpy();
+  TrivialCpy(const TrivialCpy &) = default;
+};
+struct NotTrivialCpy {
+  char buf[8];
+  NotTrivialCpy();
+  NotTrivialCpy(const NotTrivialCpy &);
+};
+
+constexpr int CONSTEXPR_ONE = 1;
+
+void arg_count() {
+  MEM();                                      // expected-error {{too few arguments to function call, expected 3, have 0}}
+  MEM(0);                                     // expected-error {{too few arguments to function call, expected 3, have 1}}
+  MEM(0, 0);                                  // expected-error {{too few arguments to function call, expected 3, have 2}}
+  MEM(0, 0, 0, 0, 0);                         // expected-error {{too many arguments to function call, expected 4, have 5}}
+  __builtin_memset_overloaded();              // expected-error {{too few arguments to function call, expected 3, have 0}}
+  __builtin_memset_overloaded(0);             // expected-error {{too few arguments to function call, expected 3, have 1}}
+  __builtin_memset_overloaded(0, 0);          // expected-error {{too few arguments to function call, expected 3, have 2}}
+  __builtin_memset_overloaded(0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
+}
+
+void null(char *dst, const char *src, size_t size) {
+  MEM(0, src, 0);                              // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(0, src, size);                           // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, 0);                              // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, size);                           // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded(0, 0, 0);        // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded(0, 0, size);     // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, 42);                             // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, 0, 42);                             // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, NULL, 42);                          // expected-warning {{null passed to a callee that requires a non-null argument}}
+  MEM(dst, nullptr, 42);                       // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'nullptr_t'}}
+  MEM(0, src, 42);                             // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(NULL, src, 42);                          // expected-warning {{null passed to a callee that requires a non-null argument}}
+  MEM(nullptr, src, 42);                       // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'nullptr_t'}}
+  __builtin_memset_overloaded(0, 0, 42);       // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded(NULL, 0, 42);    // expected-warning {{null passed to a callee that requires a non-null argument}}
+  __builtin_memset_overloaded(nullptr, 0, 42); // expected-error{{cannot initialize a parameter of type 'void *' with an rvalue of type 'nullptr_t'}}
+}
+
+void good_arg_types(char *dst, const char *src, size_t size) {
+  MEM(dst, src, 0);
+  MEM(dst, dst, ~(size_t)0);
+  MEM(dst, src, 42);
+  MEM(dst, src, size);
+  MEM(dst, (char *)src, size);
+  MEM(dst, (const void *)src, size);
+  MEM((void *)dst, src, size);
+  MEM(dst, (volatile const char *)src, size);
+  MEM((volatile char *)dst, src, size);
+  MEM(dst, (__unaligned const char *)src, size);
+  MEM((__unaligned char *)dst, src, size);
+
+  MEM(dst, (const __attribute__((address_space(32))) char *)src, size);
+  MEM((__attribute__((address_space(32))) char *)dst, src, size);
+  MEM((__attribute__((address_space(32))) char *)dst, (const __attribute__((address_space(64))) char *)src, size);
+  MEM(dst, (__attribute__((address_space(32))) __unaligned const volatile void *)src, size);
+  MEM((__attribute__((address_space(32))) __unaligned volatile void *)dst, src, size);
+
+  MEM(dst, (const char *)src, size, 1);
+  MEM(dst, (const char *)src, size, 2);
+  MEM(dst, (const char *)src, size, 4);
+  MEM(dst, (const char *)src, size, 8);
+  MEM(dst, (const char *)src, size, 16);
+  MEM((void *)dst, src, size, 1);
+  MEM(dst, (const void *)src, size, 1);
+  MEM((void *)dst, src, size, 4);
+  MEM(dst, (const void *)src, size, 4);
+  MEM((int *)dst, (const Intish *)src, size, 4);
+  MEM((Intish *)dst, (const int *)src, size, 4);
+  MEM((int *)dst, src, size, 1);
+  MEM(dst, (const int *)src, size, 1);
+  MEM((int *)dst, src, size, 2);
+  MEM(dst, (const int *)src, size, 2);
+  MEM((int *)dst, src, size, 8);
+  MEM(dst, (const int *)src, size, 8);
+  MEM(dst, src, size, CONSTEXPR_ONE);
+
+  __builtin_memset_overloaded(dst, 0, 0);
+  __builtin_memset_overloaded(dst, 0, ~(size_t)0);
+  __builtin_memset_overloaded(dst, 0, 42);
+  __builtin_memset_overloaded(dst, 0, size);
+  __builtin_memset_overloaded((void *)dst, 0, size);
+  __builtin_memset_overloaded((volatile char *)dst, 0, size);
+  __builtin_memset_overloaded((__unaligned char *)dst, 0, size);
+  __builtin_memset_overloaded((int *)dst, 0, size);
+  __builtin_memset_overloaded((__attribute__((address_space(32))) char *)dst, 0, size);
+  __builtin_memset_overloaded((__attribute__((address_space(32))) __unaligned volatile void *)dst, 0, size);
+
+  __builtin_memset_overloaded((char *)dst, 0, size, 1);
+  __builtin_memset_overloaded((char *)dst, 0, size, 2);
+  __builtin_memset_overloaded((char *)dst, 0, size, 4);
+  __builtin_memset_overloaded((char *)dst, 0, size, 8);
+  __builtin_memset_overloaded((char *)dst, 0, size, 16);
+  __builtin_memset_overloaded((void *)dst, 0, size, 1);
+  __builtin_memset_overloaded((void *)dst, 0, size, 4);
+  __builtin_memset_overloaded((Intish *)dst, 0, size, 4);
+  __builtin_memset_overloaded((int *)dst, 0, size, 1);
+  __builtin_memset_overloaded((int *)dst, 0, size, 2);
+  __builtin_memset_overloaded((int *)dst, 0, size, 8);
+  __builtin_memset_overloaded(dst, 0, size, CONSTEXPR_ONE);
+}
+
+// expected-note@+1 2 {{declared here}}
+void bad_arg_types(char *dst, const char *src, size_t size) {
+  MEM(dst, 42, size);                                                                        // expected-error {{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(42, src, size);                                                                        // expected-error {{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  MEM(dst, src, dst);                                                                        // expected-error {{cannot initialize a parameter of type 'unsigned long' with an lvalue of type 'char *'}}
+  MEM((const char *)dst, src, size);                                                         // expected-error {{argument must be non-const, got 'const char'}}
+  MEM((__attribute__((address_space(32))) __unaligned const volatile char *)dst, src, size); // expected-error {{argument must be non-const, got 'const volatile __unaligned __attribute__((address_space(32))) char'}}
+
+  MEM(dst, (_Atomic const char *)src, size);          // expected-error{{parameter cannot have the _Atomic qualifier ('const _Atomic(char)' invalid)}}
+  MEM((_Atomic char *)dst, src, size);                // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(char)' invalid)}}
+  MEM((int *)dst, (_Atomic const Intish *)src, size); // expected-error{{parameter cannot have the _Atomic qualifier ('const _Atomic(Intish)' invalid)}}
+  MEM((_Atomic Intish *)dst, (const int *)src, size); // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(Intish)' invalid)}}
+  MEM((void *)dst, (_Atomic const int *)src, size);   // expected-error{{parameter cannot have the _Atomic qualifier ('const _Atomic(int)' invalid)}}
+  MEM((_Atomic int *)dst, (const void *)src, size);   // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(int)' invalid)}}
+
+  // expected-note@+1 {{read of non-const variable 'size' is not allowed in a constant expression}}
+  MEM(dst, src, size, size);                                                    // expected-error{{expression is not an integral constant expression}}
+  MEM(dst, src, size, -1);                                                      // expected-error{{argument should be a power of 2}}
+  MEM(dst, src, size, 0);                                                       // expected-error{{argument should be a power of 2}}
+  MEM(dst, src, size, 3);                                                       // expected-error{{argument should be a power of 2}}
+  MEM(dst, src, size, 32);                                                      // expected-error{{lock-free}}
+  MEM((NotLockFree *)dst, (const NotLockFree *)src, size, sizeof(NotLockFree)); // expected-error{{element size must be a lock-free size, 512 exceeds 16 bytes}}
+  MEM(dst, (volatile const char *)src, size, 1);                                // expected-error{{specifying an access size for volatile memory operations is unsupported ('const volatile char' is volatile)}}
+  MEM((volatile char *)dst, src, size, 1);                                      // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+
+  __builtin_memset_overloaded(42, 0, size);                                                                        // expected-error {{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+  __builtin_memset_overloaded((const char *)dst, 0, size);                                                         // expected-error {{argument must be non-const, got 'const char'}}
+  __builtin_memset_overloaded((__attribute__((address_space(32))) __unaligned const volatile char *)dst, 0, size); // expected-error {{argument must be non-const, got 'const volatile __unaligned __attribute__((address_space(32))) char'}}
+  __builtin_memset_overloaded((_Atomic char *)dst, 0, size);                                                       // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(char)' invalid)}}
+  __builtin_memset_overloaded((_Atomic Intish *)dst, 0, size);                                                     // expected-error{{parameter cannot have the _Atomic qualifier ('_Atomic(Intish)' invalid)}}
+
+  // expected-note@+1 {{read of non-const variable 'size' is not allowed in a constant expression}}
+  __builtin_memset_overloaded(dst, 0, size, size);                               // expected-error{{expression is not an integral constant expression}}
+  __builtin_memset_overloaded(dst, 0, size, -1);                                 // expected-error{{argument should be a power of 2}}
+  __builtin_memset_overloaded(dst, 0, size, 0);                                  // expected-error{{argument should be a power of 2}}
+  __builtin_memset_overloaded(dst, 0, size, 3);                                  // expected-error{{argument should be a power of 2}}
+  __builtin_memset_overloaded(dst, 0, size, 32);                                 // expected-error{{lock-free}}
+  __builtin_memset_overloaded((volatile char *)dst, 0, size, 1);                 // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+  __builtin_memset_overloaded((NotLockFree *)dst, 0, size, sizeof(NotLockFree)); // expected-error{{element size must be a lock-free size, 512 exceeds 16 bytes}}
+}
+
+void array_arg_types() {
+  extern char adst[512];
+  extern volatile char avdst[512];
+  extern const char asrc[512];
+  extern const volatile char avsrc[512];
+
+  MEM(adst, asrc, sizeof(adst));
+  MEM(avdst, avsrc, sizeof(avdst));
+  MEM(asrc, asrc, sizeof(adst));     // expected-error {{argument must be non-const, got 'const char'}}
+  MEM(adst, asrc, sizeof(adst) + 1); // TODO diagnose size overflow?
+  __builtin_memset_overloaded(adst, 0, sizeof(adst));
+  __builtin_memset_overloaded(avdst, 0, sizeof(avdst));
+  __builtin_memset_overloaded(asrc, 0, sizeof(asrc));     // expected-error {{argument must be non-const, got 'const char'}}
+  __builtin_memset_overloaded(adst, 0, sizeof(adst) + 1); // TODO diagnose size overflow?
+}
+
+void atomic_array_arg_types() {
+  extern char adst[512];
+  extern volatile char avdst[512];
+  extern const char asrc[512];
+  extern const volatile char avsrc[512];
+
+  MEM(adst, asrc, sizeof(adst), 1);
+  MEM(avdst, asrc, sizeof(adst), 1); // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+  MEM(adst, avsrc, sizeof(adst), 1); // expected-error{{specifying an access size for volatile memory operations is unsupported ('const volatile char' is volatile)}}
+  __builtin_memset_overloaded(adst, 0, sizeof(adst), 1);
+  __builtin_memset_overloaded(avdst, 0, sizeof(avdst), 1); // expected-error{{specifying an access size for volatile memory operations is unsupported ('volatile char' is volatile)}}
+}
+
+void trivial_arg_types() {
+  TrivialCpy trivialDst;
+  const TrivialCpy trivialSrc;
+  MEM(&trivialDst, &trivialSrc, sizeof(TrivialCpy));
+  MEM((__attribute__((address_space(32))) __unaligned volatile TrivialCpy *) & trivialDst, (__attribute__((address_space(64))) __unaligned const volatile TrivialCpy *) & trivialSrc, sizeof(TrivialCpy));
+  __builtin_memset_overloaded(&trivialDst, 0, sizeof(trivialDst));
+  __builtin_memset_overloaded((__attribute__((address_space(32))) __unaligned volatile TrivialCpy *) & trivialDst, 0, sizeof(trivialDst));
+
+  TrivialCpy trivialDstArr[2];
+  const TrivialCpy trivialSrcArr[2];
+  MEM(trivialDstArr, trivialSrcArr, sizeof(TrivialCpy) * 2);
+  __builtin_memset_overloaded(trivialDstArr, 0, sizeof(TrivialCpy) * 2);
+}
+
+void nontrivial_arg_types() {
+  NotTrivialCpy notTrivialDst;
+  const NotTrivialCpy notTrivialSrc;
+  MEM(&notTrivialDst, &notTrivialSrc, sizeof(NotTrivialCpy), sizeof(NotTrivialCpy));            // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+  __builtin_memset_overloaded(&notTrivialDst, 0, sizeof(NotTrivialCpy), sizeof(NotTrivialCpy)); // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+
+  NotTrivialCpy notTrivialDstArr[2];
+  const NotTrivialCpy notTrivialSrcArr[2];
+  MEM(notTrivialDstArr, notTrivialSrcArr, sizeof(NotTrivialCpy) * 2, sizeof(NotTrivialCpy));          // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+  __builtin_memset_overloaded(notTrivialDstArr, 0, sizeof(NotTrivialCpy) * 2, sizeof(NotTrivialCpy)); // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('NotTrivialCpy' invalid)}}
+}
+
+class Incomplete;
+void inclomplete_arg_types(char *dst, const char *src, size_t size) {
+  MEM((Incomplete *)dst, src, size, 1);                       // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('Incomplete' invalid)}}
+  MEM(dst, (const Incomplete *)src, size, 1);                 // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('const Incomplete' invalid)}}
+  __builtin_memset_overloaded((Incomplete *)dst, 0, size, 1); // expected-error{{address argument to atomic operation must be a pointer to a trivially-copyable type ('Incomplete' invalid)}}
+}
+
+void sizeless_arg_types(char *dst, const char *src, size_t size) {
+  MEM((sizeless_t *)dst, src, size);
+  MEM(dst, (const sizeless_t *)src, size);
+  __builtin_memset_overloaded((sizeless_t *)dst, 0, size);
+
+  MEM((sizeless_t *)dst, src, size, 1);
+  MEM(dst, (const sizeless_t *)src, size, 1);
+  __builtin_memset_overloaded((sizeless_t *)dst, 0, size, 1);
+}
+
+void vector_arg_types(char *dst, const char *src, size_t size) {
+  MEM((float4 *)dst, src, size);
+  MEM(dst, (const float4 *)src, size);
+  __builtin_memset_overloaded((float4 *)dst, 0, size);
+
+  MEM((float4 *)dst, (const float4 *)src, size, sizeof(float4));
+  MEM((float4 *)dst, (const float4 *)src, size, sizeof(float4));
+  __builtin_memset_overloaded((float4 *)dst, 0, size, sizeof(float4));
+}
+
+void extint_arg_types(char *dst, const char *src, size_t size) {
+  MEM((_ExtInt(2) *)dst, src, size);
+  MEM(dst, (const _ExtInt(2) *)src, size);
+  __builtin_memset_overloaded((_ExtInt(2) *)dst, 0, size);
+
+  MEM((_ExtInt(8) *)dst, (const _ExtInt(8) *)src, size, 1);
+  __builtin_memset_overloaded((_ExtInt(8) *)dst, 0, size, 1);
+}
Index: clang/test/CodeGenObjC/builtin-memfns.m
===================================================================
--- clang/test/CodeGenObjC/builtin-memfns.m
+++ clang/test/CodeGenObjC/builtin-memfns.m
@@ -1,10 +1,38 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.8.0 -emit-llvm -o - %s | FileCheck %s
 
-void *memcpy(void *restrict s1, const void *restrict s2, unsigned long n);
+typedef __SIZE_TYPE__ size_t;
+
+void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
+void *memmove(void *restrict s1, const void *restrict s2, size_t n);
+void *memset(void *s1, int v, size_t n);
 
 // PR13697
-void test1(int *a, id b) {
-  // CHECK: @test1
+void cpy1(int *a, id b) {
+  // CHECK-LABEL: @cpy1(
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  memcpy(a, b, 8);
+}
+
+void cpy2(id a, int *b) {
+  // CHECK-LABEL: @cpy2(
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
   memcpy(a, b, 8);
 }
+
+void move1(int *a, id b) {
+  // CHECK-LABEL: @move1(
+  // CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  memmove(a, b, 8);
+}
+
+void move2(id a, int *b) {
+  // CHECK-LABEL: @move2(
+  // CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false)
+  memmove(a, b, 8);
+}
+
+void set(id a) {
+  // CHECK-LABEL: @set(
+  // CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 42, i64 8, i1 false)
+  memset(a, 42, 8);
+}
Index: clang/test/CodeGen/ubsan-builtin-mem_overloaded.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/ubsan-builtin-mem_overloaded.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -w -emit-llvm -o - %s -fsanitize=builtin | FileCheck %s --enable-var-scope
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -w -emit-llvm -o - %s -fsanitize=builtin | FileCheck %s --enable-var-scope
+
+typedef __SIZE_TYPE__ size_t;
+
+// CHECK-LABEL: define void @check_memcpy(
+void check_memcpy(char *dst, const char *src, size_t sz) {
+  // CHECK: [[DSTINT:%.*]] = ptrtoint i8* [[DST:%.*]] to i64, !nosanitize
+  // CHECK: [[DSTMASK:%.*]] = and i64 [[DSTINT]], 3, !nosanitize
+  // CHECK: [[DSTOK:%.*]] = icmp eq i64 [[DSTMASK]], 0, !nosanitize
+  // CHECK: br i1 [[DSTOK]], label %[[CONT0:.*]], label %[[DSTFAILED:[^,]*]]
+
+  // CHECK: [[DSTFAILED]]:
+  // CHECK: [[DSTINT2:%.*]] = ptrtoint i8* [[DST]] to i64, !nosanitize
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[DSTINT2]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT0]]:
+  // CHECK: [[SRCINT:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64, !nosanitize
+  // CHECK: [[SRCMASK:%.*]] = and i64 [[SRCINT]], 3, !nosanitize
+  // CHECK: [[SRCOK:%.*]] = icmp eq i64 [[SRCMASK]], 0, !nosanitize
+  // CHECK: br i1 [[SRCOK]], label %[[CONT1:.*]], label %[[SRCFAILED:[^,]*]]
+
+  // CHECK: [[SRCFAILED]]:
+  // CHECK: [[SRCINT2:%.*]] = ptrtoint i8* [[SRC]] to i64, !nosanitize
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[SRCINT2]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT1]]:
+  // CHECK: [[SZREM:%.*]] = urem i64 [[SZ:%.*]], 4, !nosanitize
+  // CHECK: [[SZOK:%.*]] = icmp eq i64 [[SZREM]], 0, !nosanitize
+  // CHECK: br i1 [[SZOK]], label %[[CONT2:.*]], label %[[SZFAILED:[^,]*]]
+
+  // CHECK: [[SZFAILED]]:
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[SZ]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT2]]:
+  // CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[DST]], i8* align 4 [[SRC]], i64 [[SZ]], i32 4)
+  __builtin_memcpy_overloaded(dst, src, sz, 4);
+}
+
+// CHECK-LABEL: define void @check_memmove(
+void check_memmove(char *dst, const char *src, size_t sz) {
+  // CHECK: [[DSTINT:%.*]] = ptrtoint i8* [[DST:%.*]] to i64, !nosanitize
+  // CHECK: [[DSTMASK:%.*]] = and i64 [[DSTINT]], 3, !nosanitize
+  // CHECK: [[DSTOK:%.*]] = icmp eq i64 [[DSTMASK]], 0, !nosanitize
+  // CHECK: br i1 [[DSTOK]], label %[[CONT0:.*]], label %[[DSTFAILED:[^,]*]]
+
+  // CHECK: [[DSTFAILED]]:
+  // CHECK: [[DSTINT2:%.*]] = ptrtoint i8* [[DST]] to i64, !nosanitize
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[DSTINT2]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT0]]:
+  // CHECK: [[SRCINT:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64, !nosanitize
+  // CHECK: [[SRCMASK:%.*]] = and i64 [[SRCINT]], 3, !nosanitize
+  // CHECK: [[SRCOK:%.*]] = icmp eq i64 [[SRCMASK]], 0, !nosanitize
+  // CHECK: br i1 [[SRCOK]], label %[[CONT1:.*]], label %[[SRCFAILED:[^,]*]]
+
+  // CHECK: [[SRCFAILED]]:
+  // CHECK: [[SRCINT2:%.*]] = ptrtoint i8* [[SRC]] to i64, !nosanitize
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[SRCINT2]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT1]]:
+  // CHECK: [[SZREM:%.*]] = urem i64 [[SZ:%.*]], 4, !nosanitize
+  // CHECK: [[SZOK:%.*]] = icmp eq i64 [[SZREM]], 0, !nosanitize
+  // CHECK: br i1 [[SZOK]], label %[[CONT2:.*]], label %[[SZFAILED:[^,]*]]
+
+  // CHECK: [[SZFAILED]]:
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[SZ]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT2]]:
+  // CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[DST]], i8* align 4 [[SRC]], i64 [[SZ]], i32 4)
+  __builtin_memmove_overloaded(dst, src, sz, 4);
+}
+
+// CHECK-LABEL: define void @check_memset(
+void check_memset(char *dst, size_t sz) {
+  // CHECK: [[DSTINT:%.*]] = ptrtoint i8* [[DST:%.*]] to i64, !nosanitize
+  // CHECK: [[DSTMASK:%.*]] = and i64 [[DSTINT]], 3, !nosanitize
+  // CHECK: [[DSTOK:%.*]] = icmp eq i64 [[DSTMASK]], 0, !nosanitize
+  // CHECK: br i1 [[DSTOK]], label %[[CONT0:.*]], label %[[DSTFAILED:[^,]*]]
+
+  // CHECK: [[DSTFAILED]]:
+  // CHECK: [[DSTINT2:%.*]] = ptrtoint i8* [[DST]] to i64, !nosanitize
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[DSTINT2]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT0]]:
+  // CHECK: [[SZREM:%.*]] = urem i64 [[SZ:%.*]], 4, !nosanitize
+  // CHECK: [[SZOK:%.*]] = icmp eq i64 [[SZREM]], 0, !nosanitize
+  // CHECK: br i1 [[SZOK]], label %[[CONT1:.*]], label %[[SZFAILED:[^,]*]]
+
+  // CHECK: [[SZFAILED]]:
+  // CHECK: call void @__ubsan_handle_invalid_builtin_abort({{.*}}, i64 [[SZ]])
+  // CHECK: unreachable, !nosanitize
+
+  // CHECK: [[CONT1]]:
+  // CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[DST]], i8 42, i64 [[SZ]], i32 4)
+  __builtin_memset_overloaded(dst, 42, sz, 4);
+}
Index: clang/test/CodeGen/ubsan-builtin-ctz-clz.c
===================================================================
--- clang/test/CodeGen/ubsan-builtin-ctz-clz.c
+++ clang/test/CodeGen/ubsan-builtin-ctz-clz.c
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -w -emit-llvm -o - %s -fsanitize=builtin | FileCheck %s
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -w -emit-llvm -o - %s -fsanitize=builtin | FileCheck %s --check-prefix=NOT-UB
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -w -emit-llvm -o - %s -fsanitize=builtin | FileCheck %s --enable-var-scope
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -w -emit-llvm -o - %s -fsanitize=builtin | FileCheck %s --enable-var-scope --check-prefix=NOT-UB
 
 // NOT-UB-NOT: __ubsan_handle_invalid_builtin
 
-// CHECK: define void @check_ctz
+// CHECK-LABEL: define void @check_ctz(
 void check_ctz(int n) {
   // CHECK: [[NOT_ZERO:%.*]] = icmp ne i32 [[N:%.*]], 0, !nosanitize
   // CHECK-NEXT: br i1 [[NOT_ZERO]]
@@ -23,7 +23,7 @@
   __builtin_ctzll(n);
 }
 
-// CHECK: define void @check_clz
+// CHECK-LABEL: define void @check_clz(
 void check_clz(int n) {
   // CHECK: [[NOT_ZERO:%.*]] = icmp ne i32 [[N:%.*]], 0, !nosanitize
   // CHECK-NEXT: br i1 [[NOT_ZERO]]
Index: clang/test/CodeGen/builtin-overloaded-memfns.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtin-overloaded-memfns.c
@@ -0,0 +1,319 @@
+// RUN: %clang_cc1 -triple arm64-unknown-unknown -fms-extensions -emit-llvm < %s| FileCheck %s
+
+typedef __SIZE_TYPE__ size_t;
+
+// CHECK-LABEL: volatile_dst_cpy_void(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_cpy_void(volatile void *dst, const void *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_move_void(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_move_void(volatile void *dst, const void *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_set_void(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_set_void(volatile void *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: volatile_src_cpy_void(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_src_cpy_void(void *dst, volatile const void *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_src_move_void(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_src_move_void(void *dst, volatile const void *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dstsrc_cpy_void(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dstsrc_cpy_void(volatile void *dst, volatile const void *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dstsrc_move_void(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dstsrc_move_void(volatile void *dst, volatile const void *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_cpy_char(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_cpy_char(volatile char *dst, const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_move_char(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_move_char(volatile char *dst, const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_set_char(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_set_char(volatile char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: volatile_dst_cpy_int(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_cpy_int(volatile int *dst, const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_move_int(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_move_int(volatile int *dst, const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: volatile_dst_set_int(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void volatile_dst_set_int(volatile int *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: unaligned_dst_cpy_int(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_dst_cpy_int(__unaligned int *dst, const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: unaligned_dst_move_int(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_dst_move_int(__unaligned int *dst, const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: unaligned_dst_set_int(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 false)
+void unaligned_dst_set_int(__unaligned int *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: unaligned_src_cpy_int(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_src_cpy_int(int *dst, __unaligned const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: unaligned_src_move_int(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void unaligned_src_move_int(int *dst, __unaligned const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: addrspace_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.p32i8.p32i8.i64(i8 addrspace(32)* align 1 %{{[0-9]*}}, i8 addrspace(32)* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void addrspace_srcdst_cpy_char(__attribute__((address_space(32))) char *dst, __attribute__((address_space(32))) const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size); }
+
+// CHECK-LABEL: addrspace_srcdst_move_char(
+// CHECK: call void @llvm.memmove.p32i8.p32i8.i64(i8 addrspace(32)* align 1 %{{[0-9]*}}, i8 addrspace(32)* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void addrspace_srcdst_move_char(__attribute__((address_space(32))) char *dst, __attribute__((address_space(32))) const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size); }
+
+// CHECK-LABEL: addrspace_dst_set_char(
+// CHECK: call void @llvm.memset.p32i8.i64(i8 addrspace(32)* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 false)
+void addrspace_dst_set_char(__attribute__((address_space(32))) char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size); }
+
+// CHECK-LABEL: atomic_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_srcdst_cpy_char(char *dst, const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, 1); }
+
+// CHECK-LABEL: atomic_srcdst_cpy_char_big(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %{{[0-9]*}}, i8* align 16 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 16)
+void atomic_srcdst_cpy_char_big(char *dst, const char *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, 16); }
+
+// CHECK-LABEL: atomic_srcdst_move_char(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_srcdst_move_char(char *dst, const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, 1); }
+
+// CHECK-LABEL: atomic_srcdst_move_char_big(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %{{[0-9]*}}, i8* align 16 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 16)
+void atomic_srcdst_move_char_big(char *dst, const char *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, 16); }
+
+// CHECK-LABEL: atomic_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 1)
+void atomic_dst_set_char(char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, 1); }
+
+// CHECK-LABEL: atomic_dst_set_char_big(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 16 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 16)
+void atomic_dst_set_char_big(char *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, 16); }
+
+// CHECK-LABEL: atomic_srcdst_cpy_int(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 4)
+void atomic_srcdst_cpy_int(int *dst, const int *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, 4); }
+
+// CHECK-LABEL: atomic_srcdst_move_int(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 4)
+void atomic_srcdst_move_int(int *dst, const int *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, 4); }
+
+// CHECK-LABEL: atomic_dst_set_int(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 4)
+void atomic_dst_set_int(int *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, 4); }
+
+// CHECK-LABEL: atomic_srcdst_cpy_longlong(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %{{[0-9]*}}, i8* align 8 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 8)
+void atomic_srcdst_cpy_longlong(long long *dst, const long long *src, size_t size) { __builtin_memcpy_overloaded(dst, src, size, sizeof(long long)); }
+
+// CHECK-LABEL: atomic_srcdst_move_longlong(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %{{[0-9]*}}, i8* align 8 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 8)
+void atomic_srcdst_move_longlong(long long *dst, const long long *src, size_t size) { __builtin_memmove_overloaded(dst, src, size, sizeof(long long)); }
+
+// CHECK-LABEL: atomic_dst_set_longlong(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 8)
+void atomic_dst_set_longlong(long long *dst, size_t size) { __builtin_memset_overloaded(dst, 0, size, sizeof(long long)); }
+
+// CHECK-LABEL: atomic_static_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_static_srcdst_cpy_char(char dst[static 2], const char src[2], size_t size) { __builtin_memcpy_overloaded(dst, src, size, 1); }
+
+// CHECK-LABEL: atomic_static_srcdst_move_char(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_static_srcdst_move_char(char dst[static 2], const char src[2], size_t size) { __builtin_memmove_overloaded(dst, src, size, 1); }
+
+// CHECK-LABEL: atomic_static_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i32 1)
+void atomic_static_dst_set_char(char dst[static 2], size_t size) { __builtin_memset_overloaded(dst, 0, size, 1); }
+
+extern char dst_atomic[2];
+extern const char src_atomic[2];
+
+// CHECK-LABEL: atomic_array_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_array_srcdst_cpy_char(size_t size) { __builtin_memcpy_overloaded(dst_atomic, src_atomic, size, 1); }
+
+// CHECK-LABEL: atomic_array_srcdst_move_char(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i32 1)
+void atomic_array_srcdst_move_char(size_t size) { __builtin_memmove_overloaded(dst_atomic, src_atomic, size, 1); }
+
+// CHECK-LABEL: atomic_array_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i32 1)
+void atomic_array_dst_set_char(size_t size) { __builtin_memset_overloaded(dst_atomic, 0, size, 1); }
+
+// CHECK-LABEL: atomic_local_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 4, i32 4)
+void atomic_local_srcdst_cpy_char(size_t size) {
+  int dst;
+  const int src;
+  __builtin_memcpy_overloaded(&dst, &src, sizeof(dst), sizeof(dst));
+}
+
+// CHECK-LABEL: atomic_local_srcdst_move_char(
+// CHECK: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8* align 4 %{{[0-9]*}}, i64 4, i32 4)
+void atomic_local_srcdst_move_char(size_t size) {
+  int dst;
+  const int src;
+  __builtin_memmove_overloaded(&dst, &src, sizeof(dst), sizeof(dst));
+}
+
+// CHECK-LABEL: atomic_local_dst_set_char(
+// CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %{{[0-9]*}}, i8 0, i64 4, i32 4)
+void atomic_local_dst_set_char(size_t size) {
+  int dst;
+  __builtin_memset_overloaded(&dst, 0, sizeof(dst), sizeof(dst));
+}
+
+// CHECK-LABEL: vla_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9a-z]*}}, i8* align 1 %{{[0-9a-z]*}}, i64 %{{[0-9]*}}, i1 true)
+void vla_srcdst_cpy_char(size_t size) {
+  volatile char dst[size];
+  const volatile char src[size];
+  __builtin_memcpy_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: vla_srcdst_move_char(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9a-z]*}}, i8* align 1 %{{[0-9a-z]*}}, i64 %{{[0-9]*}}, i1 true)
+void vla_srcdst_move_char(size_t size) {
+  volatile char dst[size];
+  const volatile char src[size];
+  __builtin_memmove_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: vla_dst_set_char(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9a-z]*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void vla_dst_set_char(size_t size) {
+  volatile char dst[size];
+  __builtin_memset_overloaded(dst, 0, size);
+}
+
+// CHECK-LABEL: static_srcdst_cpy_char(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void static_srcdst_cpy_char(char dst[static 42], const char src[static 42], size_t size) {
+  __builtin_memcpy_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: static_srcdst_move_char(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8* align 1 %{{[0-9]*}}, i64 %{{[0-9]*}}, i1 false)
+void static_srcdst_move_char(char dst[static 42], const char src[static 42], size_t size) {
+  __builtin_memmove_overloaded(dst, src, size);
+}
+
+// CHECK-LABEL: static_dst_set_char(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %{{[0-9]*}}, i8 0, i64 %{{[0-9]*}}, i1 false)
+void static_dst_set_char(char dst[static 42], size_t size) {
+  __builtin_memset_overloaded(dst, 0, size);
+}
+
+extern char dst_unsized[];
+extern volatile char dst_vunsized[];
+extern const char src_cunsized[];
+extern const volatile char src_cvunsized[];
+
+// CHECK-LABEL: array_volatile_unsized_dst_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dst_cpy(size_t size) { __builtin_memcpy_overloaded(dst_vunsized, src_cunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dst_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dst_move(size_t size) { __builtin_memmove_overloaded(dst_vunsized, src_cunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dst_set(size_t size) { __builtin_memset_overloaded(dst_vunsized, 0, size); }
+
+// CHECK-LABEL: array_volatile_unsized_src_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_src_cpy(size_t size) { __builtin_memcpy_overloaded(dst_unsized, src_cvunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_src_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_src_move(size_t size) { __builtin_memmove_overloaded(dst_unsized, src_cvunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_vunsized, src_cvunsized, size); }
+
+// CHECK-LABEL: array_volatile_unsized_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 getelementptr {{.*}}, i8* align 1 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_unsized_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_vunsized, src_cvunsized, size); }
+
+extern __attribute__((aligned(128))) char dst_512[512];
+extern __attribute__((aligned(128))) volatile char dst_v512[512];
+extern __attribute__((aligned(128))) const char src_c512[512];
+extern __attribute__((aligned(128))) const volatile char src_cv512[512];
+
+// CHECK-LABEL: array_volatile_dst_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dst_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512, src_c512, size); }
+
+// CHECK-LABEL: array_volatile_dst_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dst_move(size_t size) { __builtin_memmove_overloaded(dst_v512, src_c512, size); }
+
+// CHECK-LABEL: array_volatile_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dst_set(size_t size) { __builtin_memset_overloaded(dst_v512, 0, size); }
+
+// CHECK-LABEL: array_volatile_src_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_src_cpy(size_t size) { __builtin_memcpy_overloaded(dst_512, src_cv512, size); }
+
+// CHECK-LABEL: array_volatile_src_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_src_move(size_t size) { __builtin_memmove_overloaded(dst_512, src_cv512, size); }
+
+// CHECK-LABEL: array_volatile_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512, src_cv512, size); }
+
+// CHECK-LABEL: array_volatile_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void array_volatile_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_v512, src_cv512, size); }
+
+extern __attribute__((aligned(128))) volatile char dst_v512_32[512][32];
+extern __attribute__((aligned(128))) const volatile char src_cv512_32[512][32];
+
+// CHECK-LABEL: multiarray_volatile_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_volatile_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512_32, src_cv512_32, size); }
+
+// CHECK-LABEL: multiarray_volatile_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8* align 128 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_volatile_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_v512_32, src_cv512_32, size); }
+
+// CHECK-LABEL: multiarray_volatile_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 128 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void multiarray_volatile_dst_set(size_t size) { __builtin_memset_overloaded(dst_v512_32, 0, size); }
+
+// CHECK-LABEL: multiarray_idx_volatile_dstsrc_cpy(
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 getelementptr {{.*}}, i8* align 32 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_idx_volatile_dstsrc_cpy(size_t size) { __builtin_memcpy_overloaded(dst_v512_32[1], src_cv512_32[1], size); }
+
+// CHECK-LABEL: multiarray_idx_volatile_dstsrc_move(
+// CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 32 getelementptr {{.*}}, i8* align 32 getelementptr {{.*}}, i64 %{{[0-9]*}}, i1 true)
+void multiarray_idx_volatile_dstsrc_move(size_t size) { __builtin_memmove_overloaded(dst_v512_32[1], src_cv512_32[1], size); }
+
+// CHECK-LABEL: multiarray_idx_volatile_dst_set(
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 32 getelementptr {{.*}}, i8 0, i64 %{{[0-9]*}}, i1 true)
+void multiarray_idx_volatile_dst_set(size_t size) { __builtin_memset_overloaded(dst_v512_32[1], 0, size); }
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1716,6 +1716,11 @@
     }
     break;
   }
+  case Builtin::BI__builtin_memcpy_overloaded:
+  case Builtin::BI__builtin_memmove_overloaded:
+    return SemaBuiltinMemcpyOverloaded(TheCallResult);
+  case Builtin::BI__builtin_memset_overloaded:
+    return SemaBuiltinMemsetOverloaded(TheCallResult);
 #define BUILTIN(ID, TYPE, ATTRS)
 #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
   case Builtin::BI##ID: \
@@ -5527,6 +5532,242 @@
   return TheCallResult;
 }
 
+/// Perform semantic checking for __builtin_memcpy_overloaded and
+/// __builtin_memmove_overloaded, which are overloaded based on the pointer
+/// types of the destination and source arguments.
+ExprResult Sema::SemaBuiltinMemcpyOverloaded(ExprResult TheCallResult) {
+  CallExpr *TheCall = (CallExpr *)TheCallResult.get();
+
+  unsigned argCount = TheCall->getNumArgs();
+  if (argCount != 3 && argCount != 4) {
+    if (argCount < 3)
+      return ExprError(
+          Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args)
+          << 0 /*function call*/ << 3 << argCount << TheCall->getSourceRange());
+    SourceRange excessRange(TheCall->getArg(4)->getBeginLoc(),
+                            TheCall->getArg(argCount - 1)->getEndLoc());
+    return ExprError(
+        Diag(excessRange.getBegin(), diag::err_typecheck_call_too_many_args)
+        << 0 /*function call*/ << 4 << argCount
+        << TheCall->getArg(1)->getSourceRange());
+  }
+
+  bool HasElSz = argCount == 4;
+
+  ExprResult DstPtr = DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+  if (DstPtr.isInvalid())
+    return ExprError();
+  clang::Expr *DstOp = DstPtr.get();
+  TheCall->setArg(0, DstOp);
+
+  ExprResult SrcPtr = DefaultFunctionArrayLvalueConversion(TheCall->getArg(1));
+  if (SrcPtr.isInvalid())
+    return ExprError();
+  clang::Expr *SrcOp = SrcPtr.get();
+  TheCall->setArg(1, SrcOp);
+
+  const PointerType *DstTy = DstOp->getType()->getAs<PointerType>();
+  const PointerType *SrcTy = SrcOp->getType()->getAs<PointerType>();
+  if (!DstTy)
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), diag::err_init_conversion_failed)
+        << InitializedEntity::EK_Parameter << Context.VoidPtrTy
+        << DstOp->isLValue() << DstOp->getType() << /*no difference*/ 0
+        << DstOp->getSourceRange());
+  if (!SrcTy)
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), diag::err_init_conversion_failed)
+        << InitializedEntity::EK_Parameter << Context.VoidPtrTy
+        << SrcOp->isLValue() << SrcOp->getType() << /*no difference*/ 0
+        << SrcOp->getSourceRange());
+
+  QualType DstValTy = DstTy->getPointeeType();
+  QualType SrcValTy = SrcTy->getPointeeType();
+
+  if (DstValTy.isConstQualified())
+    return ExprError(Diag(TheCall->getBeginLoc(), PDiag(diag::err_const_arg))
+                     << DstValTy << DstOp->getSourceRange());
+  if (DstValTy->isAtomicType())
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), PDiag(diag::err_atomic_qualifier_invalid))
+        << DstValTy << DstOp->getSourceRange());
+  if (SrcValTy->isAtomicType())
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), PDiag(diag::err_atomic_qualifier_invalid))
+        << SrcValTy << SrcOp->getSourceRange());
+
+  ExprResult SizeRes(TheCall->getArg(2));
+  InitializedEntity SizeEntity = InitializedEntity::InitializeParameter(
+      Context, Context.getSizeType(), false);
+  SizeRes = PerformCopyInitialization(SizeEntity, SourceLocation(), SizeRes);
+  if (SizeRes.isInvalid())
+    return ExprError();
+  TheCall->setArg(2, SizeRes.get());
+
+  bool IsNonZero;
+  if (!SizeRes.get()->isValueDependent() &&
+      SizeRes.get()->EvaluateAsBooleanCondition(IsNonZero, Context) &&
+      IsNonZero) {
+    CheckNonNullArgument(*this, DstOp, TheCall->getExprLoc());
+    CheckNonNullArgument(*this, SrcOp, TheCall->getExprLoc());
+  }
+
+  if (HasElSz) {
+    clang::Expr *Arg = TheCall->getArg(3);
+
+    (void)isCompleteType(DstOp->getBeginLoc(), DstValTy);
+    (void)isCompleteType(SrcOp->getBeginLoc(), SrcValTy);
+    if (!DstValTy.isTriviallyCopyableType(Context) && !DstValTy->isVoidType())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_atomic_op_needs_trivial_copy))
+                       << DstValTy << DstOp->getSourceRange());
+    if (!SrcValTy.isTriviallyCopyableType(Context) && !SrcValTy->isVoidType())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_atomic_op_needs_trivial_copy))
+                       << SrcValTy << SrcOp->getSourceRange());
+    if (DstValTy.isVolatileQualified())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_sized_volatile_unsupported))
+                       << DstValTy << DstOp->getSourceRange());
+    if (SrcValTy.isVolatileQualified())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_sized_volatile_unsupported))
+                       << SrcValTy << SrcOp->getSourceRange());
+
+    if (!Arg->isValueDependent()) {
+      llvm::APSInt ElSz;
+      ExprResult ElSzRes(VerifyIntegerConstantExpression(Arg, &ElSz));
+      if (ElSzRes.isInvalid())
+        return ExprError();
+      TheCall->setArg(3, ElSzRes.get());
+
+      if (!ElSz.isStrictlyPositive() || !ElSz.isPowerOf2())
+        return ExprError(
+            Diag(TheCall->getBeginLoc(), diag::err_argument_not_power_of_2)
+            << Arg->getSourceRange());
+      int InlineWidth =
+          Context
+              .toCharUnitsFromBits(
+                  Context.getTargetInfo().getMaxAtomicInlineWidth())
+              .getQuantity();
+      if (ElSz.ugt(InlineWidth))
+        return ExprError(Diag(TheCall->getBeginLoc(),
+                              PDiag(diag::err_elsz_must_be_lock_free))
+                         << (int)ElSz.getLimitedValue() << InlineWidth
+                         << Arg->getSourceRange());
+    }
+  }
+
+  return TheCallResult;
+}
+
+/// Perform semantic checking for __builtin_memset_overloaded and
+/// __builtin_memset_overloaded, which is overloaded based on the pointer type
+/// of the destination argument.
+ExprResult Sema::SemaBuiltinMemsetOverloaded(ExprResult TheCallResult) {
+  CallExpr *TheCall = (CallExpr *)TheCallResult.get();
+
+  unsigned argCount = TheCall->getNumArgs();
+  if (argCount != 3 && argCount != 4) {
+    if (argCount < 3)
+      return ExprError(
+          Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args)
+          << 0 /*function call*/ << 3 << argCount << TheCall->getSourceRange());
+    SourceRange excessRange(TheCall->getArg(4)->getBeginLoc(),
+                            TheCall->getArg(argCount - 1)->getEndLoc());
+    return ExprError(
+        Diag(excessRange.getBegin(), diag::err_typecheck_call_too_many_args)
+        << 0 /*function call*/ << 4 << argCount
+        << TheCall->getArg(1)->getSourceRange());
+  }
+
+  bool HasElSz = argCount == 4;
+
+  ExprResult DstPtr = DefaultFunctionArrayLvalueConversion(TheCall->getArg(0));
+  if (DstPtr.isInvalid())
+    return ExprError();
+  clang::Expr *DstOp = DstPtr.get();
+  TheCall->setArg(0, DstOp);
+
+  const PointerType *DstTy = DstOp->getType()->getAs<PointerType>();
+  if (!DstTy)
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), diag::err_init_conversion_failed)
+        << InitializedEntity::EK_Parameter << Context.VoidPtrTy
+        << DstOp->isLValue() << DstOp->getType() << /*no difference*/ 0
+        << DstOp->getSourceRange());
+
+  QualType DstValTy = DstTy->getPointeeType();
+  if (DstValTy.isConstQualified())
+    return ExprError(Diag(TheCall->getBeginLoc(), PDiag(diag::err_const_arg))
+                     << DstValTy << DstOp->getSourceRange());
+  if (DstValTy->isAtomicType())
+    return ExprError(
+        Diag(TheCall->getBeginLoc(), PDiag(diag::err_atomic_qualifier_invalid))
+        << DstValTy << DstOp->getSourceRange());
+
+  ExprResult ValRes(TheCall->getArg(1));
+  InitializedEntity ValEntity = InitializedEntity::InitializeParameter(
+      Context, Context.UnsignedCharTy, false);
+  ValRes = PerformCopyInitialization(ValEntity, SourceLocation(), ValRes);
+  if (ValRes.isInvalid())
+    return ExprError();
+  TheCall->setArg(1, ValRes.get());
+
+  ExprResult SizeRes(TheCall->getArg(2));
+  InitializedEntity SizeEntity = InitializedEntity::InitializeParameter(
+      Context, Context.getSizeType(), false);
+  SizeRes = PerformCopyInitialization(SizeEntity, SourceLocation(), SizeRes);
+  if (SizeRes.isInvalid())
+    return ExprError();
+  TheCall->setArg(2, SizeRes.get());
+
+  bool IsNonZero;
+  if (!SizeRes.get()->isValueDependent() &&
+      SizeRes.get()->EvaluateAsBooleanCondition(IsNonZero, Context) &&
+      IsNonZero)
+    CheckNonNullArgument(*this, DstOp, TheCall->getExprLoc());
+
+  if (HasElSz) {
+    clang::Expr *Arg = TheCall->getArg(3);
+
+    (void)isCompleteType(DstOp->getBeginLoc(), DstValTy);
+    if (!DstValTy.isTriviallyCopyableType(Context) && !DstValTy->isVoidType())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_atomic_op_needs_trivial_copy))
+                       << DstValTy << DstOp->getSourceRange());
+    if (DstValTy.isVolatileQualified())
+      return ExprError(Diag(TheCall->getBeginLoc(),
+                            PDiag(diag::err_sized_volatile_unsupported))
+                       << DstValTy << DstOp->getSourceRange());
+
+    if (!Arg->isValueDependent()) {
+      llvm::APSInt ElSz;
+      ExprResult ElSzRes(VerifyIntegerConstantExpression(Arg, &ElSz));
+      if (ElSzRes.isInvalid())
+        return ExprError();
+      TheCall->setArg(3, ElSzRes.get());
+
+      if (!ElSz.isStrictlyPositive() || !ElSz.isPowerOf2())
+        return ExprError(
+            Diag(TheCall->getBeginLoc(), diag::err_argument_not_power_of_2)
+            << Arg->getSourceRange());
+      int InlineWidth =
+          Context
+              .toCharUnitsFromBits(
+                  Context.getTargetInfo().getMaxAtomicInlineWidth())
+              .getQuantity();
+      if (ElSz.ugt(InlineWidth))
+        return ExprError(Diag(TheCall->getBeginLoc(),
+                              PDiag(diag::err_elsz_must_be_lock_free))
+                         << (int)ElSz.getLimitedValue() << InlineWidth
+                         << Arg->getSourceRange());
+    }
+  }
+
+  return TheCallResult;
+}
+
 /// CheckObjCString - Checks that the argument to the builtin
 /// CFString constructor is correct
 /// Note: It might also make sense to do the UTF-16 conversion here (would
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -4384,14 +4384,16 @@
 
   /// Specifies which type of sanitizer check to apply when handling a
   /// particular builtin.
-  enum BuiltinCheckKind {
-    BCK_CTZPassedZero,
-    BCK_CLZPassedZero,
+  enum class BuiltinCheck : unsigned char {
+    CTZPassedZero,
+    CLZPassedZero,
+    AtomicMemMisaligned,
+    AtomicMemMismatchedSize,
   };
 
   /// Emits an argument for a call to a builtin. If the builtin sanitizer is
   /// enabled, a runtime check specified by \p Kind is also emitted.
-  llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind);
+  llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheck Kind);
 
   /// Emit a description of a type in a format suitable for passing to
   /// a runtime sanitizer handler.
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -625,6 +625,16 @@
   return {Width, Signed};
 }
 
+static QualType getPtrArgType(CodeGenModule &CGM, const CallExpr *E,
+                              unsigned ArgNo) {
+  QualType ArgTy = E->getArg(ArgNo)->IgnoreImpCasts()->getType();
+  if (ArgTy->isArrayType())
+    return CGM.getContext().getAsArrayType(ArgTy)->getElementType();
+  if (ArgTy->isObjCObjectPointerType())
+    return ArgTy->castAs<clang::ObjCObjectPointerType>()->getPointeeType();
+  return ArgTy->castAs<clang::PointerType>()->getPointeeType();
+}
+
 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
   llvm::Type *DestType = Int8PtrTy;
   if (ArgValue->getType() != DestType)
@@ -1187,9 +1197,10 @@
 }
 
 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
-                                                 BuiltinCheckKind Kind) {
-  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
-          && "Unsupported builtin check kind");
+                                                 BuiltinCheck Kind) {
+  assert((Kind == BuiltinCheck::CLZPassedZero ||
+          Kind == BuiltinCheck::CTZPassedZero) &&
+         "Unsupported builtin check kind");
 
   Value *ArgValue = EmitScalarExpr(E);
   if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
@@ -1201,7 +1212,7 @@
   EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
             SanitizerHandler::InvalidBuiltin,
             {EmitCheckSourceLocation(E->getExprLoc()),
-             llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
+             llvm::ConstantInt::get(Builder.getInt8Ty(), (int)Kind)},
             None);
   return ArgValue;
 }
@@ -1508,6 +1519,71 @@
   return RValue::get(Overflow);
 }
 
+static void EmitAtomicMemUBSanCheck(CodeGenFunction &CGF, unsigned BuiltinID,
+                                    const CallExpr *Call, Value *Dst,
+                                    Value *Src, Value *Size, CharUnits ElSz) {
+  if (!CGF.SanOpts.has(SanitizerKind::Builtin))
+    return;
+
+  CodeGenFunction::SanitizerScope SanScope(&CGF);
+  unsigned PtrBits = CGF.IntPtrTy->getIntegerBitWidth();
+  auto *ElSzI32 = llvm::Constant::getIntegerValue(
+      CGF.IntPtrTy, APInt(32, ElSz.getQuantity()));
+  auto *ElSzIPtr = llvm::Constant::getIntegerValue(
+      CGF.IntPtrTy, APInt(PtrBits, ElSz.getQuantity()));
+  auto *AlignMask = llvm::Constant::getIntegerValue(
+      CGF.IntPtrTy, APInt(PtrBits, ElSz.getQuantity() - 1));
+  auto *Zero = llvm::Constant::getNullValue(CGF.IntPtrTy);
+  auto *MisalignedFlag = llvm::ConstantInt::get(
+      CGF.Builder.getInt8Ty(),
+      (int)CodeGenFunction::BuiltinCheck::AtomicMemMisaligned);
+  auto *SizeFlag = llvm::ConstantInt::get(
+      CGF.Builder.getInt8Ty(),
+      (int)CodeGenFunction::BuiltinCheck::AtomicMemMismatchedSize);
+
+  // ((uintptr_t)Dst & (ElSz - 1)) == 0
+  auto *DstOK = CGF.Builder.CreateICmpEQ(
+      CGF.Builder.CreateAnd(CGF.Builder.CreatePtrToInt(Dst, CGF.IntPtrTy),
+                            AlignMask),
+      Zero);
+  CGF.EmitCheck(std::make_pair(DstOK, SanitizerKind::Builtin),
+                SanitizerHandler::InvalidBuiltin,
+                {CGF.EmitCheckSourceLocation(Call->getArg(0)->getExprLoc()),
+                 MisalignedFlag, ElSzI32},
+                {Dst});
+
+  // ((uintptr_t)Src & (ElSz - 1)) == 0
+  switch (BuiltinID) {
+  case Builtin::BI__builtin_memcpy_overloaded:
+  case Builtin::BI__builtin_memmove_overloaded: {
+    auto *SrcOK = CGF.Builder.CreateICmpEQ(
+        CGF.Builder.CreateAnd(CGF.Builder.CreatePtrToInt(Src, CGF.IntPtrTy),
+                              AlignMask),
+        Zero);
+    CGF.EmitCheck(std::make_pair(SrcOK, SanitizerKind::Builtin),
+                  SanitizerHandler::InvalidBuiltin,
+                  {CGF.EmitCheckSourceLocation(Call->getArg(1)->getExprLoc()),
+                   MisalignedFlag, ElSzI32},
+                  {Src});
+    break;
+  }
+  case Builtin::BI__builtin_memset_overloaded:
+    // No source buffer on memset.
+    break;
+  default:
+    llvm_unreachable("unknown atomic mem builtin");
+  }
+
+  // (Size % ElSz) == 0
+  auto *SizeRem = CGF.Builder.CreateURem(Size, ElSzIPtr);
+  auto *SizeOK = CGF.Builder.CreateICmpEQ(SizeRem, Zero);
+  CGF.EmitCheck(std::make_pair(SizeOK, SanitizerKind::Builtin),
+                SanitizerHandler::InvalidBuiltin,
+                {CGF.EmitCheckSourceLocation(Call->getArg(2)->getExprLoc()),
+                 SizeFlag, ElSzI32},
+                {Size});
+}
+
 static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
                                Value *&RecordPtr, CharUnits Align,
                                llvm::FunctionCallee Func, int Lvl) {
@@ -2077,7 +2153,8 @@
   case Builtin::BI__builtin_ctz:
   case Builtin::BI__builtin_ctzl:
   case Builtin::BI__builtin_ctzll: {
-    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
+    Value *ArgValue =
+        EmitCheckedArgForBuiltin(E->getArg(0), BuiltinCheck::CTZPassedZero);
 
     llvm::Type *ArgType = ArgValue->getType();
     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
@@ -2094,7 +2171,8 @@
   case Builtin::BI__builtin_clz:
   case Builtin::BI__builtin_clzl:
   case Builtin::BI__builtin_clzll: {
-    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
+    Value *ArgValue =
+        EmitCheckedArgForBuiltin(E->getArg(0), BuiltinCheck::CLZPassedZero);
 
     llvm::Type *ArgType = ArgValue->getType();
     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
@@ -2623,16 +2701,38 @@
   }
   case Builtin::BImemcpy:
   case Builtin::BI__builtin_memcpy:
+  case Builtin::BI__builtin_memcpy_overloaded:
   case Builtin::BImempcpy:
   case Builtin::BI__builtin_mempcpy: {
+    QualType DestTy = getPtrArgType(CGM, E, 0);
+    QualType SrcTy = getPtrArgType(CGM, E, 1);
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
+    bool isVolatile =
+        DestTy.isVolatileQualified() || SrcTy.isVolatileQualified();
+    bool isAtomic = E->getNumArgs() == 4;
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
                         E->getArg(1)->getExprLoc(), FD, 1);
-    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
+    if (isAtomic) {
+      auto ElSz =
+          CharUnits::fromQuantity(E->getArg(3)
+                                      ->getIntegerConstantExpr(CGM.getContext())
+                                      ->getLimitedValue());
+      EmitAtomicMemUBSanCheck(*this, BuiltinID, E, Dest.getPointer(),
+                              Src.getPointer(), SizeVal, ElSz);
+      // Element unordered atomic memcpy requires aligned pointers. That's also
+      // a precondition of this builtin, which we optionally check with UBSan
+      // and then assume with the following adjustments.
+      if (Dest.getAlignment() < ElSz)
+        Dest = Address(Dest.getPointer(), ElSz);
+      if (Src.getAlignment() < ElSz)
+        Src = Address(Src.getPointer(), ElSz);
+      Builder.CreateElementUnorderedAtomicMemCpy(Dest, Src, SizeVal, ElSz);
+    } else
+      Builder.CreateMemCpy(Dest, Src, SizeVal, isVolatile);
     if (BuiltinID == Builtin::BImempcpy ||
         BuiltinID == Builtin::BI__builtin_mempcpy)
       return RValue::get(Builder.CreateInBoundsGEP(Dest.getPointer(), SizeVal));
@@ -2701,26 +2801,66 @@
   }
 
   case Builtin::BImemmove:
-  case Builtin::BI__builtin_memmove: {
+  case Builtin::BI__builtin_memmove:
+  case Builtin::BI__builtin_memmove_overloaded: {
+    QualType DestTy = getPtrArgType(CGM, E, 0);
+    QualType SrcTy = getPtrArgType(CGM, E, 1);
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
+    bool isVolatile =
+        DestTy.isVolatileQualified() || SrcTy.isVolatileQualified();
+    bool isAtomic = E->getNumArgs() == 4;
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
                         E->getArg(1)->getExprLoc(), FD, 1);
-    Builder.CreateMemMove(Dest, Src, SizeVal, false);
+    if (isAtomic) {
+      auto ElSz =
+          CharUnits::fromQuantity(E->getArg(3)
+                                      ->getIntegerConstantExpr(CGM.getContext())
+                                      ->getLimitedValue());
+      EmitAtomicMemUBSanCheck(*this, BuiltinID, E, Dest.getPointer(),
+                              Src.getPointer(), SizeVal, ElSz);
+      // Element unordered atomic memcpy requires aligned pointers. That's also
+      // a precondition of this builtin, which we optionally check with UBSan
+      // and then assume with the following adjustments.
+      if (Dest.getAlignment() < ElSz)
+        Dest = Address(Dest.getPointer(), ElSz);
+      if (Src.getAlignment() < ElSz)
+        Src = Address(Src.getPointer(), ElSz);
+      Builder.CreateElementUnorderedAtomicMemMove(Dest, Src, SizeVal, ElSz);
+    } else
+      Builder.CreateMemMove(Dest, Src, SizeVal, isVolatile);
     return RValue::get(Dest.getPointer());
   }
   case Builtin::BImemset:
-  case Builtin::BI__builtin_memset: {
+  case Builtin::BI__builtin_memset:
+  case Builtin::BI__builtin_memset_overloaded: {
+    QualType DestTy = getPtrArgType(CGM, E, 0);
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
                                          Builder.getInt8Ty());
+    bool isVolatile = DestTy.isVolatileQualified();
+    bool isAtomic = E->getNumArgs() == 4;
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
-    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
+    if (isAtomic) {
+      auto ElSz =
+          CharUnits::fromQuantity(E->getArg(3)
+                                      ->getIntegerConstantExpr(CGM.getContext())
+                                      ->getLimitedValue());
+      EmitAtomicMemUBSanCheck(*this, BuiltinID, E, Dest.getPointer(),
+                              /*Src=*/nullptr, SizeVal, ElSz);
+      // Element unordered atomic memcpy requires aligned pointers. That's also
+      // a precondition of this builtin, which we optionally check with UBSan
+      // and then assume with the following adjustments.
+      if (Dest.getAlignment() < ElSz)
+        Dest = Address(Dest.getPointer(), ElSz);
+      Builder.CreateElementUnorderedAtomicMemSet(Dest, ByteVal, SizeVal, ElSz);
+    } else
+      Builder.CreateMemSet(Dest, ByteVal, SizeVal, isVolatile);
     return RValue::get(Dest.getPointer());
   }
   case Builtin::BI__builtin___memset_chk: {
Index: clang/lib/CodeGen/CGBuilder.h
===================================================================
--- clang/lib/CodeGen/CGBuilder.h
+++ clang/lib/CodeGen/CGBuilder.h
@@ -279,6 +279,15 @@
                         IsVolatile);
   }
 
+  using CGBuilderBaseTy::CreateElementUnorderedAtomicMemCpy;
+  llvm::CallInst *CreateElementUnorderedAtomicMemCpy(Address Dest, Address Src,
+                                                     llvm::Value *Size,
+                                                     CharUnits ElementSize) {
+    return CreateElementUnorderedAtomicMemCpy(
+        Dest.getPointer(), Dest.getAlignment().getAsAlign(), Src.getPointer(),
+        Src.getAlignment().getAsAlign(), Size, ElementSize.getQuantity());
+  }
+
   using CGBuilderBaseTy::CreateMemCpyInline;
   llvm::CallInst *CreateMemCpyInline(Address Dest, Address Src, uint64_t Size) {
     return CreateMemCpyInline(
@@ -294,6 +303,15 @@
                          Size, IsVolatile);
   }
 
+  using CGBuilderBaseTy::CreateElementUnorderedAtomicMemMove;
+  llvm::CallInst *CreateElementUnorderedAtomicMemMove(Address Dest, Address Src,
+                                                      llvm::Value *Size,
+                                                      CharUnits ElementSize) {
+    return CreateElementUnorderedAtomicMemMove(
+        Dest.getPointer(), Dest.getAlignment().getAsAlign(), Src.getPointer(),
+        Src.getAlignment().getAsAlign(), Size, ElementSize.getQuantity());
+  }
+
   using CGBuilderBaseTy::CreateMemSet;
   llvm::CallInst *CreateMemSet(Address Dest, llvm::Value *Value,
                                llvm::Value *Size, bool IsVolatile = false) {
@@ -301,6 +319,16 @@
                         Dest.getAlignment().getAsAlign(), IsVolatile);
   }
 
+  using CGBuilderBaseTy::CreateElementUnorderedAtomicMemSet;
+  llvm::CallInst *CreateElementUnorderedAtomicMemSet(Address Dest,
+                                                     llvm::Value *Value,
+                                                     llvm::Value *Size,
+                                                     CharUnits ElementSize) {
+    return CreateElementUnorderedAtomicMemSet(Dest.getPointer(), Value, Size,
+                                              Dest.getAlignment().getAsAlign(),
+                                              ElementSize.getQuantity());
+  }
+
   using CGBuilderBaseTy::CreatePreserveStructAccessIndex;
   Address CreatePreserveStructAccessIndex(Address Addr,
                                           unsigned Index,
Index: clang/lib/AST/ExprConstant.cpp
===================================================================
--- clang/lib/AST/ExprConstant.cpp
+++ clang/lib/AST/ExprConstant.cpp
@@ -8778,6 +8778,8 @@
     LLVM_FALLTHROUGH;
   case Builtin::BI__builtin_memcpy:
   case Builtin::BI__builtin_memmove:
+  case Builtin::BI__builtin_memcpy_overloaded:
+  case Builtin::BI__builtin_memmove_overloaded:
   case Builtin::BI__builtin_wmemcpy:
   case Builtin::BI__builtin_wmemmove: {
     bool WChar = BuiltinOp == Builtin::BIwmemcpy ||
@@ -8787,6 +8789,7 @@
     bool Move = BuiltinOp == Builtin::BImemmove ||
                 BuiltinOp == Builtin::BIwmemmove ||
                 BuiltinOp == Builtin::BI__builtin_memmove ||
+                BuiltinOp == Builtin::BI__builtin_memmove_overloaded ||
                 BuiltinOp == Builtin::BI__builtin_wmemmove;
 
     // The result of mem* is the first argument.
@@ -8841,6 +8844,21 @@
       return false;
     }
 
+    if (E->getNumArgs() == 4) {
+      // Overloaded mem* functions have an optional 4th parameter which denotes
+      // atomic element size in bytes. Constexpr interpretation doesn't care
+      // about atomicity, but needs to check runtime constraints on size. We
+      // can't check the alignment runtime constraints.
+      APSInt ElSz;
+      if (!EvaluateInteger(E->getArg(3), ElSz, Info))
+        return false;
+      if (N.urem(ElSz.getLimitedValue()) != 0) {
+        Info.FFDiag(E, diag::note_constexpr_mem_overloaded_bad_size)
+            << (int)N.getLimitedValue() << (int)ElSz.getLimitedValue();
+        return false;
+      }
+    }
+
     // Figure out how many T's we're copying.
     uint64_t TSize = Info.Ctx.getTypeSizeInChars(T).getQuantity();
     if (!WChar) {
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -12204,6 +12204,8 @@
   bool SemaBuiltinSetjmp(CallExpr *TheCall);
   ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult);
   ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult);
+  ExprResult SemaBuiltinMemcpyOverloaded(ExprResult TheCallResult);
+  ExprResult SemaBuiltinMemsetOverloaded(ExprResult TheCallResult);
   ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult,
                                      AtomicExpr::AtomicOp Op);
   ExprResult SemaBuiltinOperatorNewDeleteOverloaded(ExprResult TheCallResult,
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7932,6 +7932,8 @@
 
 def warn_call_wrong_number_of_arguments : Warning<
   "too %select{few|many}0 arguments in call to %1">;
+def err_atomic_qualifier_invalid : Error<
+  "parameter cannot have the _Atomic qualifier (%0 invalid)">;
 def err_atomic_builtin_must_be_pointer : Error<
   "address argument to atomic builtin must be a pointer (%0 invalid)">;
 def err_atomic_builtin_must_be_pointer_intptr : Error<
@@ -8931,6 +8933,15 @@
   "null returned from %select{function|method}0 that requires a non-null return value">,
   InGroup<NonNull>;
 
+def err_const_arg : Error<
+  "argument must be non-const, got %0">;
+
+def err_sized_volatile_unsupported : Error<
+  "specifying an access size for volatile memory operations is unsupported "
+  "(%0 is volatile)">;
+def err_elsz_must_be_lock_free : Error<
+  "element size must be a lock-free size, %0 exceeds %1 bytes">;
+
 def err_lifetimebound_no_object_param : Error<
   "'lifetimebound' attribute cannot be applied; %select{static |non-}0member "
   "function has no implicit object parameter">;
Index: clang/include/clang/Basic/DiagnosticASTKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticASTKinds.td
+++ clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -282,6 +282,9 @@
   "size to copy (%4) is not a multiple of size of element type %3 (%5)|"
   "source is not a contiguous array of at least %4 elements of type %3|"
   "destination is not a contiguous array of at least %4 elements of type %3}2">;
+def note_constexpr_mem_overloaded_bad_size : Note<
+  "size parameter is %0, expected a size that is evenly divisible by "
+  "element size %1">;
 def note_constexpr_bit_cast_unsupported_type : Note<
   "constexpr bit_cast involving type %0 is not yet supported">;
 def note_constexpr_bit_cast_unsupported_bitfield : Note<
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -488,7 +488,6 @@
 BUILTIN(__builtin_memchr, "v*vC*iz", "nF")
 BUILTIN(__builtin_memcmp, "ivC*vC*z", "nF")
 BUILTIN(__builtin_memcpy, "v*v*vC*z", "nF")
-BUILTIN(__builtin_memcpy_inline, "vv*vC*Iz", "nt")
 BUILTIN(__builtin_memmove, "v*v*vC*z", "nF")
 BUILTIN(__builtin_mempcpy, "v*v*vC*z", "nF")
 BUILTIN(__builtin_memset, "v*v*iz", "nF")
@@ -1491,6 +1490,10 @@
 BUILTIN(__builtin_char_memchr, "c*cC*iz", "n")
 BUILTIN(__builtin_dump_struct, "ivC*v*", "tn")
 BUILTIN(__builtin_preserve_access_index, "v.", "t")
+BUILTIN(__builtin_memcpy_inline, "vv*vC*Iz", "nt")
+BUILTIN(__builtin_memcpy_overloaded, "v*v*vC*z", "nt")
+BUILTIN(__builtin_memmove_overloaded, "v*v*vC*z", "nt")
+BUILTIN(__builtin_memset_overloaded, "v*v*iz", "nt")
 
 // Alignment builtins (uses custom parsing to support pointers and integers)
 BUILTIN(__builtin_is_aligned, "bvC*z", "nct")
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -2420,6 +2420,50 @@
 
 Note that the `size` argument must be a compile time constant.
 
+Clang provides versions of the following functions which are overloaded based on
+the pointer parameter types:
+
+* ``__builtin_memcpy_overloaded(QUAL0 void *dst, QUAL1 const void *src, size_t byte_size, size_t byte_access_size = <unspecified>)``
+* ``__builtin_memmove_overloaded(QUAL0 void *dst, QUAL1 const void *src, size_t byte_size, size_t byte_access_size = <unspecified>)``
+* ``__builtin_memset_overloaded(QUAL void *dst, unsigned char val, size_t byte_size, size_t byte_access_size = <unspecified>)``
+
+These overloads support destinations and sources which have a mix of the
+following qualifiers:
+
+* ``volatile``
+* ``__unaligned``
+* non-default address spaces
+
+The ``_Atomic`` qualifier is not supported. Rather, an optional last function
+parameter can be provided to specify access size in bytes. Access size must be a
+compile-time constant. When the access size is provided, the memory will be
+accessed with a sequence of operations of size equal to or a multiple of the
+requested access size. The order of operations is unspecified, and each access
+has unordered atomic semantics. This means that reads and writes do not tear at
+the individual access level, and they each occur exactly once, but the order in
+which they occur (and in which they are observable) can only be guaranteed using
+appropriate fences around the function call. The access size must therefore be a
+lock-free size for the target architecture. It is undefined behavior to provide
+a memory locations which is aligned to less than the access size. It is
+undefined behavior to provide a size which is not evenly divided by the
+specified access size.
+
+When the access size parameter is not provided, the access size is unspecified
+and might be non-uniform throughout the operation.
+
+The overloaded builtins require both ``dst`` and ``src`` to be pointers to
+trivially copyable types or to ``void`` prior to conversion to the parameter
+type.
+
+The builtins can be used as building blocks for different facilities:
+
+* Using ``volatile`` to copy data a single time from untrusted buffers, avoiding
+  Time-of-Check Time-of-Use security issues.
+* Using ``volatile`` to implement memory operations which will not be eliminated
+  by the optimizer, such as C's Annex K ``memset_s``.
+* Implement an atomic memory with atomic operations of a particular size,
+  similar to that presented in C++ proposal [p1478](https://wg21.link/p1478).
+
 Clang provides constant expression evaluation support for builtin forms of the
 following functions from the C standard library headers
 ``<string.h>`` and ``<wchar.h>``:
@@ -2437,7 +2481,9 @@
 given size is an exact multiple of the element size that is no greater than
 the number of elements accessible through the source and destination operands.
 
-Constant evaluation support is not yet provided for ``__builtin_memcpy_inline``.
+Constant evaluation support is not yet provided for ``__builtin_memcpy_inline``,
+but it is supported for the ``__builtin_mem*_overloaded`` functions under the
+same restrictions as for the C functions' restrictions.
 
 Atomic Min/Max builtins with memory ordering
 --------------------------------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to