https://github.com/wsxarcher updated 
https://github.com/llvm/llvm-project/pull/185487

>From 8a14bd08f085d961c7700fa63342b5846cb419f7 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Mon, 9 Mar 2026 19:11:55 +0100
Subject: [PATCH 1/8] [clang] Refine preferred alignment for x86 globals

---
 clang/include/clang/AST/ASTContext.h          |  2 +
 clang/include/clang/Basic/TargetInfo.h        |  7 ++
 clang/lib/AST/ASTContext.cpp                  | 16 ++++
 clang/lib/Basic/TargetInfo.cpp                |  2 +
 clang/lib/Basic/Targets/X86.h                 |  2 +
 clang/test/AST/ByteCode/codegen.cpp           |  2 +-
 clang/test/CodeGen/Nontemporal.cpp            |  4 +-
 clang/test/CodeGen/attr-counted-by.c          |  8 +-
 clang/test/CodeGen/c-strings.c                |  2 +-
 clang/test/CodeGen/complex.c                  |  8 +-
 clang/test/CodeGen/keep-static-consts.cpp     |  4 +-
 .../linux-kernel-struct-union-initializer.c   | 26 +++---
 clang/test/CodeGen/matrix-type-builtins.c     |  2 +-
 clang/test/CodeGen/matrix-type-indexing.c     |  4 +-
 clang/test/CodeGen/no-opt-volatile-memcpy.c   |  8 +-
 .../test/CodeGen/partial-reinitialization2.c  |  8 +-
 clang/test/CodeGen/unaligned-decl.c           | 12 +--
 clang/test/CodeGen/unaligned-expr.c           | 16 ++--
 clang/test/CodeGen/vector-alignment.c         |  4 +-
 clang/test/CodeGenCXX/const-init-cxx11.cpp    |  2 +-
 .../ms-constexpr-static-data-member.cpp       |  2 +-
 .../CodeGenCXX/no-opt-volatile-memcpy.cpp     | 10 +-
 .../CodeGenCXX/pointers-to-data-members.cpp   |  2 +-
 clang/test/CodeGenCXX/static-init.cpp         |  4 +-
 clang/test/CodeGenObjCXX/encode.mm            |  2 +-
 clang/test/DebugInfo/KeyInstructions/agg.c    |  2 +-
 clang/test/OpenMP/atomic_capture_codegen.cpp  | 92 +++++++++----------
 clang/test/OpenMP/atomic_read_codegen.c       | 21 +++--
 clang/test/OpenMP/atomic_update_codegen.cpp   | 92 +++++++++----------
 clang/test/OpenMP/atomic_write_codegen.c      | 60 ++++++------
 clang/test/SemaCXX/builtin-assume-aligned.cpp |  2 +-
 31 files changed, 230 insertions(+), 198 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index ba1b58489c327..4686c9de42083 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2805,6 +2805,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
   }
   unsigned getPreferredTypeAlign(const Type *T) const;
 
+  unsigned getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Align) 
const;
+
   /// Return the default alignment for __attribute__((aligned)) on
   /// this target, to be used if no alignment value is specified.
   unsigned getTargetDefaultAlignForAttributeAligned() const;
diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 9f7d2a17a0f8a..762c4d6bc9034 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -97,6 +97,8 @@ struct TransferrableTargetInfo {
   unsigned char DoubleWidth, DoubleAlign;
   unsigned char LongDoubleWidth, LongDoubleAlign, Float128Align, Ibm128Align;
   unsigned char LargeArrayMinWidth, LargeArrayAlign;
+  unsigned short LargeGlobalMinWidth;
+  unsigned char LargeGlobalAlign;
   unsigned char LongWidth, LongAlign;
   unsigned char LongLongWidth, LongLongAlign;
   unsigned char Int128Align;
@@ -851,6 +853,11 @@ class TargetInfo : public TransferrableTargetInfo,
   unsigned getLargeArrayMinWidth() const { return LargeArrayMinWidth; }
   unsigned getLargeArrayAlign() const { return LargeArrayAlign; }
 
+  // getLargeGlobalAlign/getLargeGlobalMinWidth - Return the minimum global 
size that is
+  // 'large' and its alignment.
+  unsigned getLargeGlobalMinWidth() const { return LargeGlobalMinWidth; }
+  unsigned getLargeGlobalAlign() const { return LargeGlobalAlign; }
+
   /// Return the maximum width lock-free atomic operation which will
   /// ever be supported for the given target
   unsigned getMaxAtomicPromoteWidth() const { return MaxAtomicPromoteWidth; }
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index ee7f823b014b2..c9798d53fa5fa 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1837,6 +1837,7 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool 
ForAlignof) const {
         uint64_t TypeSize =
             !BaseT->isIncompleteType() ? getTypeSize(T.getTypePtr()) : 0;
         Align = std::max(Align, getMinGlobalAlignOfVar(TypeSize, VD));
+        Align = std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align));
       }
 
     // Fields can be subject to extra alignment constraints, like if
@@ -2642,6 +2643,21 @@ CharUnits 
ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const {
   return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
 }
 
+/// getLargeGlobalPreferredAlign - Return the "preferred" alignment of the 
specified
+/// global variable in bits. Only variables larger than the specifed 
"LargeGlobalMinWidth" will
+/// be aligned using the "LargeGlobalAlign" alignment - typically 16 bytes
+unsigned ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned 
Align) const {
+  if (TypeSize >= Target->getLargeGlobalMinWidth())
+    return Target->getLargeGlobalAlign();
+  else if (TypeSize >= 128)
+    return (unsigned)64;
+  else if (TypeSize >= 32)
+    return (unsigned)32;
+  else
+    return Align;
+
+}
+
 /// getPreferredTypeAlign - Return the "preferred" alignment of the specified
 /// type for the current target in bits.  This can be different than the ABI
 /// alignment in cases where it is beneficial for performance or backwards
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index e6ae89e0948c5..a3bc603360959 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -128,6 +128,8 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   Ibm128Align = 128;
   LargeArrayMinWidth = 0;
   LargeArrayAlign = 0;
+  LargeGlobalMinWidth = 0;
+  LargeGlobalAlign = 0;
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
   MaxVectorAlign = 0;
   MaxTLSAlign = 0;
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index c7afcc7c86053..686c2bdc17593 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -742,6 +742,8 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public 
X86TargetInfo {
     LongDoubleAlign = 128;
     LargeArrayMinWidth = 128;
     LargeArrayAlign = 128;
+    LargeGlobalMinWidth = 1024;
+    LargeGlobalAlign = 128;
     SuitableAlign = 128;
     SizeType = IsX32 ? UnsignedInt : UnsignedLong;
     PtrDiffType = IsX32 ? SignedInt : SignedLong;
diff --git a/clang/test/AST/ByteCode/codegen.cpp 
b/clang/test/AST/ByteCode/codegen.cpp
index cbb0504c89f13..7a4150d0970cb 100644
--- a/clang/test/AST/ByteCode/codegen.cpp
+++ b/clang/test/AST/ByteCode/codegen.cpp
@@ -22,7 +22,7 @@ struct S {
   float c[3];
 };
 
-// CHECK: @s = global %struct.S zeroinitializer, align 4
+// CHECK: @s = global %struct.S zeroinitializer, align 8
 S s;
 // CHECK: @sp = constant ptr getelementptr (i8, ptr @s, i64 16), align 8
 float &sp = s.c[3];
diff --git a/clang/test/CodeGen/Nontemporal.cpp 
b/clang/test/CodeGen/Nontemporal.cpp
index 5052cb225d411..e6f9f2fceb9fa 100644
--- a/clang/test/CodeGen/Nontemporal.cpp
+++ b/clang/test/CodeGen/Nontemporal.cpp
@@ -53,8 +53,8 @@ S x;
 typedef int v4si __attribute__ ((vector_size(16)));
 
 // CHECK-LABEL: define void @_Z14test_alignmentv()
-// CHECK: load <4 x i32>, ptr @x, align 1, !nontemporal
-// CHECK: store <4 x i32> %1, ptr @x, align 1, !nontemporal
+// CHECK: load <4 x i32>, ptr @x, align 8, !nontemporal
+// CHECK: store <4 x i32> %1, ptr @x, align 8, !nontemporal
 
 void test_alignment() {
  auto t =  __builtin_nontemporal_load((v4si*)x.c);
diff --git a/clang/test/CodeGen/attr-counted-by.c 
b/clang/test/CodeGen/attr-counted-by.c
index 58d06f411b2c5..1d05b96ed2df5 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -1199,7 +1199,7 @@ int test12_a, test12_b;
 // SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 
4
 // SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull 
[[BAZ]]) #[[ATTR9:[0-9]+]]
-// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef 
nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 
dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT10:![0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef 
nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 8 
dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT10:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64, 
!nosanitize [[META6]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT:.*]], label 
%[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]]
@@ -1225,7 +1225,7 @@ int test12_a, test12_b;
 // NO-SANITIZE-WITH-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], 
align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr 
nonnull [[BAZ]]) #[[ATTR12:[0-9]+]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef 
nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 
dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT7:![0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef 
nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 8 
dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT7:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [4 
x i8], ptr [[BAZ]], i64 [[IDXPROM]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], 
align 4, !tbaa [[INT_TBAA2]]
@@ -1241,7 +1241,7 @@ int test12_a, test12_b;
 // SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], 
align 4
 // SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr 
nonnull [[BAZ]]) #[[ATTR7:[0-9]+]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef 
nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 
dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT7:![0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef 
nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 8 
dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT7:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64, 
!nosanitize [[META8:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP1]], label %[[CONT:.*]], label 
%[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF9:![0-9]+]], !nosanitize [[META8]]
@@ -1267,7 +1267,7 @@ int test12_a, test12_b;
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:  [[ENTRY:.*:]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], 
align 4
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(ptr 
nonnull [[BAZ]]) #[[ATTR11:[0-9]+]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr 
noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 
4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT7:![0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr 
noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 
8 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct 
[[TBAA_STRUCT7:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to 
i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds 
[4 x i8], ptr [[BAZ]], i64 [[IDXPROM]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr 
[[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]]
diff --git a/clang/test/CodeGen/c-strings.c b/clang/test/CodeGen/c-strings.c
index 31c438fd8ff2e..c31fb779a0ca6 100644
--- a/clang/test/CodeGen/c-strings.c
+++ b/clang/test/CodeGen/c-strings.c
@@ -7,7 +7,7 @@
 // CHECK: @align = {{(dso_local )?}}global i8 [[ALIGN:[0-9]+]]
 // ITANIUM: @.str = private unnamed_addr constant [6 x i8] c"hello\00"
 // MSABI: @"??_C@_05CJBACGMB@hello?$AA@" = linkonce_odr dso_local unnamed_addr 
constant [6 x i8] c"hello\00", comdat, align 1
-// ITANIUM: @f1.x = internal global ptr @.str
+// ITANIUM: @f1.x = internal global ptr @.str, align 8
 // MSABI: @f1.x = internal global ptr @"??_C@_05CJBACGMB@hello?$AA@"
 // CHECK: @f2.x = internal global [6 x i8] c"hello\00", align [[ALIGN]]
 // CHECK: @f3.x = internal global [8 x i8] c"hello\00\00\00", align [[ALIGN]]
diff --git a/clang/test/CodeGen/complex.c b/clang/test/CodeGen/complex.c
index ffa23badba09e..28f7c5380a9b6 100644
--- a/clang/test/CodeGen/complex.c
+++ b/clang/test/CodeGen/complex.c
@@ -286,7 +286,7 @@ int i;
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @ci1, align 4
 // CHECK-NEXT:    store i32 [[TMP0]], ptr @i, align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @i, align 4
-// CHECK-NEXT:    [[CS_REAL:%.*]] = load i16, ptr @cs, align 2
+// CHECK-NEXT:    [[CS_REAL:%.*]] = load i16, ptr @cs, align 4
 // CHECK-NEXT:    [[CS_IMAG:%.*]] = load i16, ptr getelementptr inbounds nuw 
(i8, ptr @cs, i64 2), align 2
 // CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[CS_REAL]] to i32
 // CHECK-NEXT:    [[CONV11:%.*]] = sext i16 [[CS_IMAG]] to i32
@@ -294,7 +294,7 @@ int i;
 // CHECK-NEXT:    [[ADD_I13:%.*]] = add i32 [[CONV11]], 0
 // CHECK-NEXT:    [[CONV14:%.*]] = trunc i32 [[ADD_R12]] to i16
 // CHECK-NEXT:    [[CONV15:%.*]] = trunc i32 [[ADD_I13]] to i16
-// CHECK-NEXT:    store i16 [[CONV14]], ptr @cs, align 2
+// CHECK-NEXT:    store i16 [[CONV14]], ptr @cs, align 4
 // CHECK-NEXT:    store i16 [[CONV15]], ptr getelementptr inbounds nuw (i8, 
ptr @cs, i64 2), align 2
 // CHECK-NEXT:    [[CF_REAL:%.*]] = load float, ptr @cf, align 4
 // CHECK-NEXT:    [[CF_IMAG:%.*]] = load float, ptr getelementptr inbounds nuw 
(i8, ptr @cf, i64 4), align 4
@@ -305,7 +305,7 @@ int i;
 // CHECK-NEXT:    store double [[ADD_R18]], ptr @D, align 8
 // CHECK-NEXT:    [[CI1_REAL19:%.*]] = load i32, ptr @ci1, align 4
 // CHECK-NEXT:    [[CI1_IMAG20:%.*]] = load i32, ptr getelementptr inbounds 
nuw (i8, ptr @ci1, i64 4), align 4
-// CHECK-NEXT:    [[CS_REAL21:%.*]] = load i16, ptr @cs, align 2
+// CHECK-NEXT:    [[CS_REAL21:%.*]] = load i16, ptr @cs, align 4
 // CHECK-NEXT:    [[CS_IMAG22:%.*]] = load i16, ptr getelementptr inbounds nuw 
(i8, ptr @cs, i64 2), align 2
 // CHECK-NEXT:    [[CONV23:%.*]] = sext i16 [[CS_REAL21]] to i32
 // CHECK-NEXT:    [[CONV24:%.*]] = sext i16 [[CS_IMAG22]] to i32
@@ -322,7 +322,7 @@ int i;
 // CHECK-NEXT:    [[TMP13:%.*]] = sdiv i32 [[TMP11]], [[TMP8]]
 // CHECK-NEXT:    [[CONV25:%.*]] = trunc i32 [[TMP12]] to i16
 // CHECK-NEXT:    [[CONV26:%.*]] = trunc i32 [[TMP13]] to i16
-// CHECK-NEXT:    store i16 [[CONV25]], ptr @cs, align 2
+// CHECK-NEXT:    store i16 [[CONV25]], ptr @cs, align 4
 // CHECK-NEXT:    store i16 [[CONV26]], ptr getelementptr inbounds nuw (i8, 
ptr @cs, i64 2), align 2
 // CHECK-NEXT:    [[CI1_REAL27:%.*]] = load i32, ptr @ci1, align 4
 // CHECK-NEXT:    [[CI1_IMAG28:%.*]] = load i32, ptr getelementptr inbounds 
nuw (i8, ptr @ci1, i64 4), align 4
diff --git a/clang/test/CodeGen/keep-static-consts.cpp 
b/clang/test/CodeGen/keep-static-consts.cpp
index 9f4f95be287ae..1d867b3f0e3b6 100644
--- a/clang/test/CodeGen/keep-static-consts.cpp
+++ b/clang/test/CodeGen/keep-static-consts.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -fkeep-static-consts -emit-llvm %s -o - 
-triple=x86_64-unknown-linux-gnu | FileCheck %s
 
-// CHECK: @_ZL7srcvers = internal constant [4 x i8] c"xyz\00", align 1
-// CHECK: @_ZL8srcvers2 = internal constant [4 x i8] c"abc\00", align 1
+// CHECK: @_ZL7srcvers = internal constant [4 x i8] c"xyz\00", align 4
+// CHECK: @_ZL8srcvers2 = internal constant [4 x i8] c"abc\00", align 4
 // CHECK: @_ZL1N = internal constant i32 2, align 4
 // CHECK: @llvm.compiler.used = appending global [4 x ptr] [ptr @_ZL7srcvers, 
ptr @b, ptr @_ZL8srcvers2, ptr @_ZL1N], section "llvm.metadata"
 
diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c 
b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c
index ff7e5d3599048..119b2e77bba36 100644
--- a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c
+++ b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c
@@ -56,26 +56,26 @@ struct S3 global_s6 = {101,  15, 123};
 
 // Test empty initializer for union.
 //.
-// CHECK: @global_u1 = global %union.U1 zeroinitializer, align 4
-// CHECK: @global_u2 = global %union.U1 { i32 3, [12 x i8] zeroinitializer }, 
align 4
-// CHECK: @global_u2_from_cast = global { i32, [12 x i8] } { i32 3, [12 x i8] 
zeroinitializer }, align 4
-// CHECK: @global_s1 = global %struct.S1 zeroinitializer, align 4
-// CHECK: @global_s2 = global %struct.S1 { i32 3, %union.U1 zeroinitializer }, 
align 4
-// CHECK: @global_s3 = global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] 
zeroinitializer } }, align 4
-// CHECK: @global_const_u1 = constant %union.U1 { i32 4, [12 x i8] 
zeroinitializer }, align 4
-// CHECK: @global_s3_from_const_u1 = global %struct.S1 { i32 0, %union.U1 { 
i32 4, [12 x i8] zeroinitializer } }, align 4
+// CHECK: @global_u1 = global %union.U1 zeroinitializer, align 8
+// CHECK: @global_u2 = global %union.U1 { i32 3, [12 x i8] zeroinitializer }, 
align 8
+// CHECK: @global_u2_from_cast = global { i32, [12 x i8] } { i32 3, [12 x i8] 
zeroinitializer }, align 8
+// CHECK: @global_s1 = global %struct.S1 zeroinitializer, align 8
+// CHECK: @global_s2 = global %struct.S1 { i32 3, %union.U1 zeroinitializer }, 
align 8
+// CHECK: @global_s3 = global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] 
zeroinitializer } }, align 8
+// CHECK: @global_const_u1 = constant %union.U1 { i32 4, [12 x i8] 
zeroinitializer }, align 8
+// CHECK: @global_s3_from_const_u1 = global %struct.S1 { i32 0, %union.U1 { 
i32 4, [12 x i8] zeroinitializer } }, align 8
 // CHECK: @global_u3 = global %union.U2 zeroinitializer, align 32
 // CHECK: @global_s4 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } 
zeroinitializer, align 32
 // CHECK: @global_s5 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 
1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] 
zeroinitializer }, align 32
 // CHECK: @global_s6 = global { i8, i8, i8 } { i8 101, i8 -65, i8 7 }, align 1
-// CHECK: @test2.a = internal global %union.U1 zeroinitializer, align 4
+// CHECK: @test2.a = internal global %union.U1 zeroinitializer, align 8
 // CHECK: @__const.test3.a = private unnamed_addr constant %union.U1 { i32 3, 
[12 x i8] zeroinitializer }, align 4
-// CHECK: @test4.a = internal global %union.U1 { i32 3, [12 x i8] 
zeroinitializer }, align 4
-// CHECK: @test6.s = internal global %struct.S1 zeroinitializer, align 4
+// CHECK: @test4.a = internal global %union.U1 { i32 3, [12 x i8] 
zeroinitializer }, align 8
+// CHECK: @test6.s = internal global %struct.S1 zeroinitializer, align 8
 // CHECK: @__const.test7.s = private unnamed_addr constant %struct.S1 { i32 3, 
%union.U1 zeroinitializer }, align 4
-// CHECK: @test8.s = internal global %struct.S1 { i32 3, %union.U1 
zeroinitializer }, align 4
+// CHECK: @test8.s = internal global %struct.S1 { i32 3, %union.U1 
zeroinitializer }, align 8
 // CHECK: @__const.test9.s = private unnamed_addr constant %struct.S1 { i32 3, 
%union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4
-// CHECK: @test10.s = internal global %struct.S1 { i32 3, %union.U1 { i32 6, 
[12 x i8] zeroinitializer } }, align 4
+// CHECK: @test10.s = internal global %struct.S1 { i32 3, %union.U1 { i32 6, 
[12 x i8] zeroinitializer } }, align 8
 // CHECK: @test12.a = internal global %union.U2 zeroinitializer, align 32
 // CHECK: @test14.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] 
} zeroinitializer, align 32
 // CHECK: @__const.test15.s = private unnamed_addr constant { i32, [4 x i8], 
i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] 
zeroinitializer, [8 x i8] zeroinitializer }, align 32
diff --git a/clang/test/CodeGen/matrix-type-builtins.c 
b/clang/test/CodeGen/matrix-type-builtins.c
index f6e67d5933ee4..f042cc777c4af 100644
--- a/clang/test/CodeGen/matrix-type-builtins.c
+++ b/clang/test/CodeGen/matrix-type-builtins.c
@@ -97,7 +97,7 @@ void transpose_global(void) {
   // CHECK32-NEXT:    [[M_T_ADDR:%.*]] = alloca [25 x double], align 4
   // CHECK32-NEXT:    [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr 
@global_matrix, align 4
   // CHECK64-NEXT:    [[M_T_ADDR:%.*]] = alloca [25 x double], align 8
-  // CHECK64-NEXT:    [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr 
@global_matrix, align 8
+  // CHECK64-NEXT:    [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr 
@global_matrix, align 16
   // COMMON-NEXT:    [[M_T:%.*]] = call <25 x double> 
@llvm.matrix.transpose.v25f64(<25 x double> [[GLOBAL_MATRIX]], i32 5, i32 5)
   // CHECK32-NEXT:    store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 4
   // CHECK64-NEXT:    store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 8
diff --git a/clang/test/CodeGen/matrix-type-indexing.c 
b/clang/test/CodeGen/matrix-type-indexing.c
index d76d14c3f67ef..610a27924db94 100644
--- a/clang/test/CodeGen/matrix-type-indexing.c
+++ b/clang/test/CodeGen/matrix-type-indexing.c
@@ -52,9 +52,9 @@ void storeAtMatrixSubscriptExpr(int row, int col, float 
value) {
     // ROW-CHECK-NEXT: [[row_major_index:%.*]] = add i64 [[row_offset]], 
[[col_load:%.*]]
     // COL-CHECK: [[col_offset:%.*]] = mul i64 [[col_load:%.*]], 2
     // COL-CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], 
[[row_load:%.*]]
-    // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr @gM, align 4
+    // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr @gM, align 8
     // ROW-CHECK-NEXT: [[matrix_after_insert:%.*]] = insertelement <6 x float> 
[[matrix_as_vec]], float [[value_load]], i64 [[row_major_index]]
     // COL-CHECK-NEXT: [[matrix_after_insert:%.*]] = insertelement <6 x float> 
[[matrix_as_vec]], float [[value_load]], i64 [[col_major_index]]
-    // CHECK-NEXT: store <6 x float> [[matrix_after_insert]], ptr @gM, align 4
+    // CHECK-NEXT: store <6 x float> [[matrix_after_insert]], ptr @gM, align 8
     gM[row][col] = value;
 }
diff --git a/clang/test/CodeGen/no-opt-volatile-memcpy.c 
b/clang/test/CodeGen/no-opt-volatile-memcpy.c
index 572b1faa11d5b..0d3bd3243a9e7 100644
--- a/clang/test/CodeGen/no-opt-volatile-memcpy.c
+++ b/clang/test/CodeGen/no-opt-volatile-memcpy.c
@@ -16,8 +16,8 @@ void foo (void) {
 // CHECK-LABEL: define{{.*}} void @foo()
 // CHECK: %[[LS:.*]] = alloca %struct.s, align 4
 // CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 
%[[LS]], i64 132, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 @gs, ptr align 4 @gs, 
i64 132, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 
@gs, i64 132, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 16 @gs, ptr align 16 
@gs, i64 132, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 16 
@gs, i64 132, i1 true)
 
 
 struct s1 {
@@ -31,6 +31,6 @@ void fee (void) {
   s.y = gs;
 }
 // CHECK-LABEL: define{{.*}} void @fee()
-// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @s, i64 
132, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @gs, 
i64 132, i1 true)
+// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 @s, i64 
132, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 
@gs, i64 132, i1 true)
 
diff --git a/clang/test/CodeGen/partial-reinitialization2.c 
b/clang/test/CodeGen/partial-reinitialization2.c
index 8d8e04f24541a..715df0a562850 100644
--- a/clang/test/CodeGen/partial-reinitialization2.c
+++ b/clang/test/CodeGen/partial-reinitialization2.c
@@ -15,7 +15,7 @@ union ULP3 { struct LP3 l3; };
 // CHECK-LABEL: test1
 void test1(void)
 {
-  // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g1, i64 6, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g1, i64 6, i1 false)
   // CHECK: store i8 120, ptr %
 
   struct LP1 l = { .p1 = g1, .p1.x[2] = 'x' };
@@ -24,7 +24,7 @@ void test1(void)
 // CHECK-LABEL: test2
 void test2(void)
 {
-  // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g1, i64 6, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g1, i64 6, i1 false)
   // CHECK: store i8 114, ptr %
 
   struct LP1 l = { .p1 = g1, .p1.x[1] = 'r' };
@@ -75,10 +75,10 @@ void test4(void)
 void test5(void)
 {
   // .l3 = g3
-  // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g3, i64 12, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g3, i64 12, i1 false)
 
   // .l3.p1 = { [0] = g1 } implicitly sets [1] to zero
-  // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g1, i64 6, i1 false)
+  // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g1, i64 6, i1 false)
   // CHECK: getelementptr{{.*}}%struct.P1, ptr{{.*}}i64 1
   // CHECK: call void @llvm.memset{{.*}}i8 0, i64 6, i1 false)
 
diff --git a/clang/test/CodeGen/unaligned-decl.c 
b/clang/test/CodeGen/unaligned-decl.c
index 6e35827658f5f..17887ed56bd1b 100644
--- a/clang/test/CodeGen/unaligned-decl.c
+++ b/clang/test/CodeGen/unaligned-decl.c
@@ -1,22 +1,22 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fms-extensions -emit-llvm 
< %s | FileCheck %s
 
-// CHECK: @a1 ={{.*}} global i32 1, align 1
+// CHECK: @a1 ={{.*}} global i32 1, align 4
 __unaligned int a1 = 1;
 
-// CHECK: @a2 ={{.*}} global i32 1, align 1
+// CHECK: @a2 ={{.*}} global i32 1, align 4
 int __unaligned a2 = 1;
 
-// CHECK: @a3 = {{.*}} align 1
+// CHECK: @a3 = {{.*}} align 8
 __unaligned int a3[10];
 
-// CHECK: @a4 = {{.*}} align 1
+// CHECK: @a4 = {{.*}} align 8
 int __unaligned a4[10];
 
-// CHECK: @p1 = {{.*}} align 1
+// CHECK: @p1 = {{.*}} align 4
 int *__unaligned p1;
 
 // CHECK: @p2 = {{.*}} align 8
 int __unaligned *p2;
 
-// CHECK: @p3 = {{.*}} align 1
+// CHECK: @p3 = {{.*}} align 4
 int __unaligned *__unaligned p3;
diff --git a/clang/test/CodeGen/unaligned-expr.c 
b/clang/test/CodeGen/unaligned-expr.c
index b9c706d3e94be..127ceeced72f4 100644
--- a/clang/test/CodeGen/unaligned-expr.c
+++ b/clang/test/CodeGen/unaligned-expr.c
@@ -5,8 +5,8 @@
 // -------------
 __unaligned int x;
 void test1(void) {
-  // CHECK: {{%.*}} = load i32, ptr @x, align 1
-  // CHECK: store i32 {{%.*}}, ptr @x, align 1
+  // CHECK: {{%.*}} = load i32, ptr @x, align 4
+  // CHECK: store i32 {{%.*}}, ptr @x, align 4
   x++;
 }
 
@@ -30,7 +30,7 @@ void test2_1(void) {
 int *__unaligned p1;
 void test3(void) {
 
-  // CHECK: {{%.*}} = load ptr, ptr @p1, align 1
+  // CHECK: {{%.*}} = load ptr, ptr @p1, align 4
   // CHECK: {{%.*}} = load i32, ptr {{%.*}}, align 4
   // CHECK: store i32 {{%.*}}, ptr {{%.*}}, align 4
   (*p1)++;
@@ -46,7 +46,7 @@ void test4(void) {
 
 int __unaligned *__unaligned p3;
 void test5(void) {
-  // CHECK: {{%.*}} = load ptr, ptr @p3, align 1
+  // CHECK: {{%.*}} = load ptr, ptr @p3, align 4
   // CHECK: {{%.*}} = load i32, ptr {{%.*}}, align 1
   // CHECK: store i32 {{%.*}}, ptr {{%.*}}, align 1
   (*p3)++;
@@ -87,8 +87,8 @@ void test8(void) {
 // -------------
 __unaligned int a[10];
 void test9(void) {
-  // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, 
i64 12), align 1
-  // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @a, i64 
12), align 1
+  // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, 
i64 12), align 4
+  // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @a, i64 
12), align 4
   (a[3])++;
 }
 
@@ -180,8 +180,8 @@ struct S1 {
 
 __unaligned S1 s1;
 void test20(void) {
-    // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @s1, 
i64 4), align 1
-    // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @s1, 
i64 4), align 1
+    // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @s1, 
i64 4), align 4
+    // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @s1, 
i64 4), align 4
     s1.x++;
 }
 
diff --git a/clang/test/CodeGen/vector-alignment.c 
b/clang/test/CodeGen/vector-alignment.c
index c0b607e96c618..29e2f236e62e9 100644
--- a/clang/test/CodeGen/vector-alignment.c
+++ b/clang/test/CodeGen/vector-alignment.c
@@ -74,6 +74,6 @@ double __attribute__((vector_size(24), aligned(64))) v11;
 // ALL: @v11 {{.*}}, align 64
 double __attribute__((vector_size(80), aligned(16))) v12;
 // ALL: @v12 {{.*}}, align 16
-typedef __attribute__((ext_vector_type(248), aligned(4))) _Bool v12b_type;
+typedef __attribute__((ext_vector_type(248), aligned(8))) _Bool v12b_type;
 v12b_type v12b;
-// ALL: @v12b {{.*}}, align 4
+// ALL: @v12b {{.*}}, align 8
diff --git a/clang/test/CodeGenCXX/const-init-cxx11.cpp 
b/clang/test/CodeGenCXX/const-init-cxx11.cpp
index 5dfe3488ca7bb..aa811b245768c 100644
--- a/clang/test/CodeGenCXX/const-init-cxx11.cpp
+++ b/clang/test/CodeGenCXX/const-init-cxx11.cpp
@@ -88,7 +88,7 @@ namespace BaseClass {
 
   struct E {};
   struct Test2 : X<E,0>, X<E,1>, X<E,2>, X<E,3> {};
-  // CHECK: @_ZN9BaseClass2t2E ={{.*}} constant {{.*}} zeroinitializer, align 1
+  // CHECK: @_ZN9BaseClass2t2E ={{.*}} constant {{.*}} zeroinitializer, align 4
   extern constexpr Test2 t2 = Test2();
 
   struct __attribute((packed)) PackedD { double y = 2; };
diff --git a/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp 
b/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp
index 604a49fefbacb..ddee0f4879be9 100644
--- a/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp
+++ b/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp
@@ -19,7 +19,7 @@ void usethem() {
   useptr(&S::sdm_udt);
 }
 
-// CHECK-DAG: @"?sdm_char_array@S@@2QBDB" = linkonce_odr dso_local constant [5 
x i8] c"asdf\00", comdat, align 1
+// CHECK-DAG: @"?sdm_char_array@S@@2QBDB" = linkonce_odr dso_local constant [5 
x i8] c"asdf\00", comdat, align 4
 
 // CHECK-DAG: @"?sdm_char_ptr@S@@2QEBDEB" = linkonce_odr dso_local constant 
ptr @"??_C@_04JIHMPGLA@asdf?$AA@", comdat, align 8
 
diff --git a/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp 
b/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp
index 1a52169d5a938..38b53b1b23c50 100644
--- a/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp
+++ b/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp
@@ -16,8 +16,8 @@ void foo (void) {
 // CHECK-LABEL: define{{.*}} void @_Z3foov()
 // CHECK: %[[LS:.*]] = alloca %struct.s, align 4
 // CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 
%[[LS]], i64 132, i1 true)
-// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(ptr align 4 @gs, ptr align 4 
@gs, i64 132, i1 true)
-// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 
@gs, i64 132, i1 true)
+// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(ptr align 16 @gs, ptr align 16 
@gs, i64 132, i1 true)
+// CHECK-NEXT:  call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 
16 @gs, i64 132, i1 true)
 
 
 struct s1 {
@@ -31,8 +31,8 @@ void fee (void) {
   s.y = gs;
 }
 // CHECK-LABEL: define{{.*}} void @_Z3feev()
-// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @s, i64 
132, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @gs, 
i64 132, i1 true)
+// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 @s, i64 
132, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 
@gs, i64 132, i1 true)
 
 struct d : s1 {
 };
@@ -43,4 +43,4 @@ void gorf(void) {
   gd = gd;
 }
 // CHECK-LABEL: define{{.*}} void @_Z4gorfv()
-// CHECK:   call void @llvm.memcpy.{{.*}}(ptr align 4 @gd, ptr align 4 @gd, 
i64 132, i1 true)
+// CHECK:   call void @llvm.memcpy.{{.*}}(ptr align 16 @gd, ptr align 16 @gd, 
i64 132, i1 true)
diff --git a/clang/test/CodeGenCXX/pointers-to-data-members.cpp 
b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
index 2ee6c65cf167d..ecf0ee2c0c8f5 100644
--- a/clang/test/CodeGenCXX/pointers-to-data-members.cpp
+++ b/clang/test/CodeGenCXX/pointers-to-data-members.cpp
@@ -54,7 +54,7 @@ namespace ZeroInit {
   };
 
   struct C : A, B { int j; };
-  // CHECK-GLOBAL: @_ZN8ZeroInit1cE ={{.*}} global {{%.*}} <{ 
%"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::B" { [10 x 
%"struct.ZeroInit::A"] [%"struct.ZeroInit::A" { i64 -1, i32 0 }, 
%"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 
}, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 
0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, 
i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 
-1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }], i8 0, i64 -1 }, i32 0, 
[4 x i8] zeroinitializer }>, align 8
+  // CHECK-GLOBAL: @_ZN8ZeroInit1cE ={{.*}} global {{%.*}} <{ 
%"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::B" { [10 x 
%"struct.ZeroInit::A"] [%"struct.ZeroInit::A" { i64 -1, i32 0 }, 
%"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 
}, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 
0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, 
i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 
-1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }], i8 0, i64 -1 }, i32 0, 
[4 x i8] zeroinitializer }>, align 16
   C c;
 }
 
diff --git a/clang/test/CodeGenCXX/static-init.cpp 
b/clang/test/CodeGenCXX/static-init.cpp
index 51080c895ec65..23c5cde726b9d 100644
--- a/clang/test/CodeGenCXX/static-init.cpp
+++ b/clang/test/CodeGenCXX/static-init.cpp
@@ -3,8 +3,8 @@
 // RUN: %clang_cc1 %s -triple=x86_64-pc-linuxs -emit-llvm -std=c++20 -o - | 
FileCheck -check-prefix=CHECK -check-prefix=CHECK20 %s
 
 // CHECK: @_ZZ1hvE1i = internal global i32 0, align 4
-// CHECK: @base_req ={{.*}} global [4 x i8] c"foo\00", align 1
-// CHECK: @base_req_uchar ={{.*}} global [4 x i8] c"bar\00", align 1
+// CHECK: @base_req ={{.*}} global [4 x i8] c"foo\00", align 4
+// CHECK: @base_req_uchar ={{.*}} global [4 x i8] c"bar\00", align 4
 
 // CHECK: @_ZZN5test31BC1EvE1u = internal global { i8, [3 x i8] } { i8 97, [3 
x i8] undef }, align 4
 
diff --git a/clang/test/CodeGenObjCXX/encode.mm 
b/clang/test/CodeGenObjCXX/encode.mm
index cad70e379c386..e16283945180f 100644
--- a/clang/test/CodeGenObjCXX/encode.mm
+++ b/clang/test/CodeGenObjCXX/encode.mm
@@ -349,7 +349,7 @@ @implementation N
     long c;
   };
 
-  // CHECKCXX20: @_ZN7GH712501sE =  constant [7 x i8] c"{S=qq}\00", align 1
+  // CHECKCXX20: @_ZN7GH712501sE =  constant [7 x i8] c"{S=qq}\00", align 4
   extern const char s[] = @encode(S);
 }
 #endif
diff --git a/clang/test/DebugInfo/KeyInstructions/agg.c 
b/clang/test/DebugInfo/KeyInstructions/agg.c
index 58d923d1d9328..4215b70da0aab 100644
--- a/clang/test/DebugInfo/KeyInstructions/agg.c
+++ b/clang/test/DebugInfo/KeyInstructions/agg.c
@@ -20,7 +20,7 @@ void fun(Struct a) {
 // CHECK: store <1 x i8> %vecins, ptr @c{{.*}}, !dbg [[G3R1:!.*]]
   c[0] = 0;
 
-// CHECK: %3 = load <25 x float>, ptr @m, align 4
+// CHECK: %3 = load <25 x float>, ptr @m, align 8
 // CHECK: %matins = insertelement <25 x float> %3, float 0.000000e+00, i64 0, 
!dbg [[G4R2:!.*]]
 // CHECK: store <25 x float> %matins, ptr @m{{.*}}, !dbg [[G4R1:!.*]]
   m[0][0] = 0;
diff --git a/clang/test/OpenMP/atomic_capture_codegen.cpp 
b/clang/test/OpenMP/atomic_capture_codegen.cpp
index 77312c2dac708..033a9dc555fd4 100644
--- a/clang/test/OpenMP/atomic_capture_codegen.cpp
+++ b/clang/test/OpenMP/atomic_capture_codegen.cpp
@@ -644,13 +644,12 @@ int main(void) {
 #pragma omp atomic capture
   iv = bfx.a = bfx.a - ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}}
-// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr 
inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP:%.+]], i32 noundef 
0)
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr getelementptr inbounds nuw 
(i8, ptr @{{.+}}, i64 4) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
-// CHECK: [[OLD:%.+]] = load i32, ptr [[LDTEMP]],
-// CHECK: store i32 [[OLD]], ptr [[TEMP1:%.+]],
-// CHECK: [[OLD:%.+]] = load i32, ptr [[LDTEMP]],
-// CHECK: store i32 [[OLD]], ptr [[TEMP:%.+]],
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP1:%.+]],
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP:%.+]],
 // CHECK: [[A_LD:%.+]] = load i32, ptr [[TEMP]],
 // CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 1
 // CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 1
@@ -662,7 +661,10 @@ int main(void) {
 // CHECK: [[BF_CLEAR:%.+]] = and i32 [[NEW_VAL]], -2147483648
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, ptr [[TEMP1]]
-// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 
@__atomic_compare_exchange(i64 noundef 4, ptr noundef getelementptr inbounds 
nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP]], ptr noundef [[TEMP1]], 
i32 noundef 0, i32 noundef 0)
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP1]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr 
@{{.+}}, i64 4), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic 
monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 // CHECK: store i32 [[A_ASHR]], ptr @{{.+}},
@@ -696,29 +698,27 @@ int main(void) {
 #pragma omp atomic capture
   {bfx2.a -= ldv; iv = bfx2.a;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, ptr getelementptr inbounds nuw 
(i8, ptr @{{.+}}, i64 3) monotonic, align 1
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
-// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
-// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST_NEW:%.+]],
-// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]],
-// CHECK: [[A_LD:%.+]] = load i8, ptr [[BITCAST]],
-// CHECK: [[A_ASHR:%.+]] = ashr i8 [[A_LD]], 7
-// CHECK: [[CAST:%.+]] = sext i8 [[A_ASHR]] to i32
-// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80
-// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[X_RVAL]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST_NEW:%.+]],
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]],
+// CHECK: [[A_LD:%.+]] = load i32, ptr [[BITCAST]],
+// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_LD]], 31
+// CHECK: [[CAST:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80
+// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[CAST]]
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[DIV]] to i32
-// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8
-// CHECK: [[BF_LD:%.+]] = load i8, ptr [[BITCAST_NEW]],
-// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1
-// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7
-// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127
-// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i8 %{{.+}}, ptr [[BITCAST_NEW]]
-// CHECK: [[NEW_BF_VALUE:%.+]] = load i8, ptr [[BITCAST_NEW]]
-// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr 
@{{.+}}, i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, 
align 1
-// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
-// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
+// CHECK: [[BF_LD:%.+]] = load i32, ptr [[BITCAST_NEW]],
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, ptr [[BITCAST_NEW]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[BITCAST_NEW]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_BF_VALUE]], i32 
[[NEW_BF_VALUE]] monotonic monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 // CHECK: store i32 [[NEW_VAL]], ptr @{{.+}},
@@ -753,28 +753,28 @@ int main(void) {
 #pragma omp atomic capture
   {iv = bfx3.a; bfx3.a /= ldv;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}}
-// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr 
inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP:%.+]], i32 noundef 
0)
-// CHECK: br label %[[CONT:.+]]
-// CHECK: [[CONT]]
-// CHECK: [[OLD:%.+]] = load i24, ptr [[LDTEMP]],
-// CHECK: store i24 [[OLD]], ptr [[BITCAST2:%.+]],
-// CHECK: [[OLD:%.+]] = load i24, ptr [[LDTEMP]],
-// CHECK: store i24 [[OLD]], ptr [[BITCAST1:%.+]],
-// CHECK: [[A_LD:%.+]] = load i24, ptr [[BITCAST1]],
-// CHECK: [[A_SHL:%.+]] = shl i24 [[A_LD]], 7
-// CHECK: [[A_ASHR:%.+]] = ashr i24 [[A_SHL]], 10
-// CHECK: [[CAST:%.+]] = sext i24 [[A_ASHR]] to i32
-// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store i32 [[OLD]], ptr [[BITCAST2:%.+]],
+// CHECK: store i32 [[OLD]], ptr [[BITCAST1:%.+]],
+// CHECK: [[A_LD:%.+]] = load i32, ptr [[BITCAST1]],
+// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 7
+// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 18
+// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80
 // CHECK: [[ADD:%.+]] = fadd x86_fp80 [[X_RVAL]], [[EXPR]]
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[ADD]] to i32
-// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24
-// CHECK: [[BF_LD:%.+]] = load i24, ptr [[BITCAST2]],
-// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383
-// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3
-// CHECK: [[BF_CLEAR:%.+]] = and i24 [[BF_LD]], -131065
-// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i24 %{{.+}}, ptr [[BITCAST2]]
-// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 
@__atomic_compare_exchange(i64 noundef 3, ptr noundef getelementptr inbounds 
nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP]], ptr noundef [[BITCAST2]], 
i32 noundef 0, i32 noundef 0)
+// CHECK: [[BF_LD:%.+]] = load i32, ptr [[BITCAST2]],
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11
+// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -33552385
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, ptr [[BITCAST2]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[BITCAST2]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD]], i32 [[NEW_BF_VALUE]] 
monotonic monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 // CHECK: store i32 [[NEW_VAL]], ptr @{{.+}},
diff --git a/clang/test/OpenMP/atomic_read_codegen.c 
b/clang/test/OpenMP/atomic_read_codegen.c
index 8079d5fd557a3..d61ac4615e01b 100644
--- a/clang/test/OpenMP/atomic_read_codegen.c
+++ b/clang/test/OpenMP/atomic_read_codegen.c
@@ -246,7 +246,8 @@ int main(void) {
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx.a;
-// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr 
inbounds nuw (i8, ptr @bfx_packed, i64 4), ptr noundef [[LDTEMP:%.+]], i32 
noundef 0)
+// CHECK: [[LD:%.+]] = load atomic i32, ptr getelementptr inbounds nuw (i8, 
ptr @{{.+}}, i64 4) monotonic, align 4
+// CHECK: store i32 [[LD]], ptr [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i32, ptr [[LDTEMP]]
 // CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1
 // CHECK: ashr i32 [[SHL]], 1
@@ -260,10 +261,10 @@ int main(void) {
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx2.a;
-// CHECK: [[LD:%.+]] = load atomic i8, ptr getelementptr inbounds nuw (i8, ptr 
@bfx2_packed, i64 3) monotonic, align 1
-// CHECK: store i8 [[LD]], ptr [[LDTEMP:%.+]]
-// CHECK: [[LD:%.+]] = load i8, ptr [[LDTEMP]]
-// CHECK: ashr i8 [[LD]], 7
+// CHECK: [[LD:%.+]] = load atomic i32, ptr @bfx2_packed monotonic, align 4
+// CHECK: store i32 [[LD]], ptr [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32, ptr [[LDTEMP]]
+// CHECK: ashr i32 [[LD]], 31
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx2_packed.a;
@@ -275,11 +276,11 @@ int main(void) {
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx3.a;
-// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr 
inbounds nuw (i8, ptr @bfx3_packed, i64 1), ptr noundef [[LDTEMP:%.+]], i32 
noundef 0)
-// CHECK: [[LD:%.+]] = load i24, ptr [[LDTEMP]]
-// CHECK: [[SHL:%.+]] = shl i24 [[LD]], 7
-// CHECK: [[ASHR:%.+]] = ashr i24 [[SHL]], 10
-// CHECK: sext i24 [[ASHR]] to i32
+// CHECK: [[LD:%.+]] = load atomic i32, ptr @bfx3_packed monotonic, align 4
+// CHECK: store i32 [[LD]], ptr [[LDTEMP:%.+]]
+// CHECK: [[LD:%.+]] = load i32, ptr [[LDTEMP]]
+// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 7
+// CHECK: [[ASHR:%.+]] = ashr i32 [[SHL]], 18
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx3_packed.a;
diff --git a/clang/test/OpenMP/atomic_update_codegen.cpp 
b/clang/test/OpenMP/atomic_update_codegen.cpp
index eeb7657ca90d3..b529a6748df0f 100644
--- a/clang/test/OpenMP/atomic_update_codegen.cpp
+++ b/clang/test/OpenMP/atomic_update_codegen.cpp
@@ -577,13 +577,12 @@ int main(void) {
 #pragma omp atomic
   bfx.a = bfx.a - ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}}
-// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr 
inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP:%.+]], i32 noundef 
0)
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr getelementptr inbounds nuw 
(i8, ptr @{{.+}}, i64 4) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
-// CHECK: [[PREV_VALUE:%.+]] = load i32, ptr [[LDTEMP]]
-// CHECK: store i32 [[PREV_VALUE]], ptr [[TEMP1:%.+]],
-// CHECK: [[PREV_VALUE:%.+]] = load i32, ptr [[LDTEMP]]
-// CHECK: store i32 [[PREV_VALUE]], ptr [[TEMP:%.+]],
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP1:%.+]],
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP:%.+]],
 // CHECK: [[A_LD:%.+]] = load i32, ptr [[TEMP]],
 // CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 1
 // CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 1
@@ -595,7 +594,10 @@ int main(void) {
 // CHECK: [[BF_CLEAR:%.+]] = and i32 [[NEW_VAL]], -2147483648
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, ptr [[TEMP1]]
-// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 
@__atomic_compare_exchange(i64 noundef 4, ptr noundef getelementptr inbounds 
nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP]], ptr noundef [[TEMP1]], 
i32 noundef 0, i32 noundef 0)
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP1]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr 
@{{.+}}, i64 4), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic 
monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 #pragma omp atomic update
@@ -627,29 +629,27 @@ int main(void) {
 #pragma omp atomic
   bfx2.a -= ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, ptr getelementptr inbounds nuw 
(i8, ptr @{{.+}}, i64 3) monotonic, align 1
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
-// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
-// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST1:%.+]],
-// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]],
-// CHECK: [[A_LD:%.+]] = load i8, ptr [[BITCAST]],
-// CHECK: [[A_ASHR:%.+]] = ashr i8 [[A_LD]], 7
-// CHECK: [[CAST:%.+]] = sext i8 [[A_ASHR]] to i32
-// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80
-// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[X_RVAL]]
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST1:%.+]],
+// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]],
+// CHECK: [[A_LD:%.+]] = load i32, ptr [[BITCAST]],
+// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_LD]], 31
+// CHECK: [[CAST:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80
+// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[CAST]]
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[DIV]] to i32
-// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8
-// CHECK: [[BF_LD:%.+]] = load i8, ptr [[BITCAST1]],
-// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1
-// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7
-// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127
-// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i8 %{{.+}}, ptr [[BITCAST1]]
-// CHECK: [[NEW_BF_VALUE:%.+]] = load i8, ptr [[BITCAST1]]
-// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr 
@{{.+}}, i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, 
align 1
-// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
-// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
+// CHECK: [[BF_LD:%.+]] = load i32, ptr [[BITCAST1]],
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, ptr [[BITCAST1]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[BITCAST1]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_BF_VALUE]], i32 
[[NEW_BF_VALUE]] monotonic monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 #pragma omp atomic update
@@ -682,28 +682,28 @@ int main(void) {
 #pragma omp atomic
   bfx3.a /= ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}}
-// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr 
inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[BITCAST:%.+]], i32 noundef 
0)
-// CHECK: br label %[[CONT:.+]]
-// CHECK: [[CONT]]
-// CHECK: [[PREV_VALUE:%.+]] = load i24, ptr [[LDTEMP:%.+]],
-// CHECK: store i24 [[PREV_VALUE]], ptr [[TEMP1:%.+]],
-// CHECK: [[PREV_VALUE:%.+]] = load i24, ptr [[LDTEMP]]
-// CHECK: store i24 [[PREV_VALUE]], ptr [[TEMP:%.+]],
-// CHECK: [[A_LD:%.+]] = load i24, ptr [[TEMP]],
-// CHECK: [[A_SHL:%.+]] = shl i24 [[A_LD]], 7
-// CHECK: [[A_ASHR:%.+]] = ashr i24 [[A_SHL]], 10
-// CHECK: [[CAST:%.+]] = sext i24 [[A_ASHR]] to i32
-// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4
+// CHECK: br label %[[CONT:.+]]
+// CHECK: [[CONT]]
+// CHECK: [[OLD_VAL:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store i32 [[OLD_VAL]], ptr [[TEMP1:%[^,]+]], align 4
+// CHECK: store i32 [[OLD_VAL]], ptr [[TEMP:%[^,]+]], align 4
+// CHECK: [[A_LD:%.+]] = load i32, ptr [[TEMP]],
+// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 7
+// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 18
+// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80
 // CHECK: [[ADD:%.+]] = fadd x86_fp80 [[X_RVAL]], [[EXPR]]
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[ADD]] to i32
-// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24
-// CHECK: [[BF_LD:%.+]] = load i24, ptr [[TEMP1]],
-// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383
-// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3
-// CHECK: [[BF_CLEAR:%.+]] = and i24 [[BF_LD]], -131065
-// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i24 %{{.+}}, ptr [[TEMP1]]
-// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 
@__atomic_compare_exchange(i64 noundef 3, ptr noundef getelementptr inbounds 
nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP]], ptr noundef [[TEMP1]], 
i32 noundef 0, i32 noundef 0)
+// CHECK: [[BF_LD:%.+]] = load i32, ptr [[TEMP1]],
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11
+// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -33552385
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, ptr [[TEMP1]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP1]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_VAL]], i32 
[[NEW_BF_VALUE]] monotonic monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 #pragma omp atomic update
diff --git a/clang/test/OpenMP/atomic_write_codegen.c 
b/clang/test/OpenMP/atomic_write_codegen.c
index 7b3b38c43de75..410770b3f7132 100644
--- a/clang/test/OpenMP/atomic_write_codegen.c
+++ b/clang/test/OpenMP/atomic_write_codegen.c
@@ -300,17 +300,18 @@ int main(void) {
   bfx.a = ldv;
 // CHECK: load x86_fp80, ptr @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr 
inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP:%.+]], i32 noundef 
0)
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr getelementptr inbounds nuw 
(i8, ptr @{{.+}}, i64 4) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
-// CHECK: [[OLD_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP]],
-// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[LDTEMP1:%.+]],
-// CHECK: [[OLD_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP1]],
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
 // CHECK: [[BF_VALUE:%.+]] = and i32 [[NEW_VAL]], 2147483647
-// CHECK: [[BF_CLEAR:%.+]] = and i32 [[OLD_BF_VALUE]], -2147483648
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -2147483648
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i32 %{{.+}}, ptr [[LDTEMP1]]
-// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 
@__atomic_compare_exchange(i64 noundef 4, ptr noundef getelementptr inbounds 
nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP]], ptr noundef [[LDTEMP1]], 
i32 noundef 0, i32 noundef 0)
+// CHECK: store i32 %{{.+}}, ptr [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr 
@{{.+}}, i64 4), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic 
monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 #pragma omp atomic write
@@ -336,20 +337,19 @@ int main(void) {
   bfx2.a = ldv;
 // CHECK: load x86_fp80, ptr @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, ptr getelementptr inbounds nuw 
(i8, ptr @{{.+}}, i64 3) monotonic, align 1
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
-// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
-// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8
-// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1
-// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7
-// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127
-// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i8 %{{.+}}, ptr [[LDTEMP:%.+]]
-// CHECK: [[NEW_BF_VALUE:%.+]] = load i8, ptr [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr 
@{{.+}}, i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, 
align 1
-// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
-// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
+// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, ptr [[LDTEMP:%.+]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_BF_VALUE]], i32 
[[NEW_BF_VALUE]] monotonic monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 #pragma omp atomic write
@@ -375,18 +375,20 @@ int main(void) {
   bfx3.a = ldv;
 // CHECK: load x86_fp80, ptr @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr 
inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[BITCAST:%.+]], i32 noundef 
0)
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
-// CHECK: [[OLD_VAL:%.+]] = load i24, ptr %{{.+}},
-// CHECK: store i24 [[OLD_VAL]], ptr [[TEMP:%.+]],
-// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24
-// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383
-// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3
-// CHECK: [[BF_CLEAR:%.+]] = and i24 %{{.+}}, -131065
-// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]]
-// CHECK: store i24 %{{.+}}, ptr [[TEMP]]
-// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 
@__atomic_compare_exchange(i64 noundef 3, ptr noundef getelementptr inbounds 
nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP:%.+]], ptr noundef [[TEMP]], 
i32 noundef 0, i32 noundef 0)
+// CHECK: [[OLD_VAL:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ 
[[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
+// CHECK: store i32 [[OLD_VAL]], ptr [[TEMP:%.+]]
+// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383
+// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11
+// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -33552385
+// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// CHECK: store i32 %{{.+}}, ptr [[TEMP]]
+// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP]]
+// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_VAL]], i32 
[[NEW_BF_VALUE]] monotonic monotonic, align 4
+// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
+// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 #pragma omp atomic write
diff --git a/clang/test/SemaCXX/builtin-assume-aligned.cpp 
b/clang/test/SemaCXX/builtin-assume-aligned.cpp
index 30296c72c6be8..ebd172ce7982f 100644
--- a/clang/test/SemaCXX/builtin-assume-aligned.cpp
+++ b/clang/test/SemaCXX/builtin-assume-aligned.cpp
@@ -26,7 +26,7 @@ constexpr void *q4 = __builtin_assume_aligned(&n, 4, -4);
 
 static char ar1[6];
 // expected-error@+2 {{must be initialized by a constant expression}}
-// expected-note@+1 {{alignment of the base pointee object (1 byte) is less 
than the asserted 16 bytes}}
+// expected-note@+1 {{alignment of the base pointee object (4 bytes) is less 
than the asserted 16 bytes}}
 constexpr void *r1 = __builtin_assume_aligned(&ar1[2], 16);
 
 static char ar2[6] __attribute__((aligned(32)));

>From 5719b2b28cba1f8739a2e22bfe31eb762ea0dd12 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Mon, 9 Mar 2026 19:36:36 +0100
Subject: [PATCH 2/8] Add test case

---
 .../X86/x86_64-global-preferred-alignment.c   | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c

diff --git a/clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c 
b/clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c
new file mode 100644
index 0000000000000..1e2b339552b0b
--- /dev/null
+++ b/clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | 
FileCheck %s
+
+struct S3 {
+  char Buffer[3];
+};
+
+struct S4 {
+  char Buffer[4];
+};
+
+struct S15 {
+  char Buffer[15];
+};
+
+struct S16 {
+  char Buffer[16];
+};
+
+struct S127 {
+  char Buffer[127];
+};
+
+struct S128 {
+  char Buffer[128];
+};
+
+struct S3 g3;
+struct S4 g4;
+struct S15 g15;
+struct S16 g16;
+struct S127 g127;
+struct S128 g128;
+
+// CHECK: @g3 = global %struct.S3 zeroinitializer, align 1
+// CHECK: @g4 = global %struct.S4 zeroinitializer, align 4
+// CHECK: @g15 = global %struct.S15 zeroinitializer, align 4
+// CHECK: @g16 = global %struct.S16 zeroinitializer, align 8
+// CHECK: @g127 = global %struct.S127 zeroinitializer, align 8
+// CHECK: @g128 = global %struct.S128 zeroinitializer, align 16

>From 8bf10c140b63e5f05308bfb5efea2df0d2a061e4 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Fri, 10 Apr 2026 21:07:39 +0200
Subject: [PATCH 3/8] Only increase alignment for non external

---
 clang/lib/AST/ASTContext.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index c9798d53fa5fa..8af0da62ee2f7 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1837,7 +1837,11 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool 
ForAlignof) const {
         uint64_t TypeSize =
             !BaseT->isIncompleteType() ? getTypeSize(T.getTypePtr()) : 0;
         Align = std::max(Align, getMinGlobalAlignOfVar(TypeSize, VD));
-        Align = std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align));
+        // Do not increase alignment for externally defined variables
+        // to not break ABI compatibility.
+        if (VD->hasDefinition())
+          Align =
+              std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align));
       }
 
     // Fields can be subject to extra alignment constraints, like if

>From 19b30de57b417b28a5969976ed0864308a545c18 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Fri, 10 Apr 2026 21:07:52 +0200
Subject: [PATCH 4/8] Format

---
 clang/include/clang/Basic/TargetInfo.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 762c4d6bc9034..fa64ee5cb2ff8 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -853,8 +853,8 @@ class TargetInfo : public TransferrableTargetInfo,
   unsigned getLargeArrayMinWidth() const { return LargeArrayMinWidth; }
   unsigned getLargeArrayAlign() const { return LargeArrayAlign; }
 
-  // getLargeGlobalAlign/getLargeGlobalMinWidth - Return the minimum global 
size that is
-  // 'large' and its alignment.
+  // getLargeGlobalAlign/getLargeGlobalMinWidth - Return the minimum global 
size
+  // that is 'large' and its alignment.
   unsigned getLargeGlobalMinWidth() const { return LargeGlobalMinWidth; }
   unsigned getLargeGlobalAlign() const { return LargeGlobalAlign; }
 

>From 2d3a757eefc13f063c751fc35416308f642a5c08 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Fri, 10 Apr 2026 21:08:31 +0200
Subject: [PATCH 5/8] Format

---
 clang/include/clang/AST/ASTContext.h |  3 ++-
 clang/lib/AST/ASTContext.cpp         | 11 ++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 4686c9de42083..175256fee8264 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2805,7 +2805,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
   }
   unsigned getPreferredTypeAlign(const Type *T) const;
 
-  unsigned getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Align) 
const;
+  unsigned getLargeGlobalPreferredAlign(uint64_t TypeSize,
+                                        unsigned Align) const;
 
   /// Return the default alignment for __attribute__((aligned)) on
   /// this target, to be used if no alignment value is specified.
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 8af0da62ee2f7..4539b4677f0ef 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2647,10 +2647,12 @@ CharUnits 
ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const {
   return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
 }
 
-/// getLargeGlobalPreferredAlign - Return the "preferred" alignment of the 
specified
-/// global variable in bits. Only variables larger than the specifed 
"LargeGlobalMinWidth" will
-/// be aligned using the "LargeGlobalAlign" alignment - typically 16 bytes
-unsigned ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned 
Align) const {
+/// getLargeGlobalPreferredAlign - Return the "preferred" alignment of the
+/// specified global variable in bits. Only variables larger than the specifed
+/// "LargeGlobalMinWidth" will be aligned using the "LargeGlobalAlign" 
alignment
+/// - typically 16 bytes
+unsigned ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize,
+                                                  unsigned Align) const {
   if (TypeSize >= Target->getLargeGlobalMinWidth())
     return Target->getLargeGlobalAlign();
   else if (TypeSize >= 128)
@@ -2659,7 +2661,6 @@ unsigned 
ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Al
     return (unsigned)32;
   else
     return Align;
-
 }
 
 /// getPreferredTypeAlign - Return the "preferred" alignment of the specified

>From 4cad029e8dac070c12b2cf8481ab85d991c23031 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Fri, 10 Apr 2026 21:16:34 +0200
Subject: [PATCH 6/8] Add test

---
 .../CodeGen/x86_64-extern-global-alignment.c  | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 clang/test/CodeGen/x86_64-extern-global-alignment.c

diff --git a/clang/test/CodeGen/x86_64-extern-global-alignment.c 
b/clang/test/CodeGen/x86_64-extern-global-alignment.c
new file mode 100644
index 0000000000000..1c612832a1345
--- /dev/null
+++ b/clang/test/CodeGen/x86_64-extern-global-alignment.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | 
FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s -o - | 
FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck 
%s
+
+// Test that getLargeGlobalPreferredAlign does not bump alignment for extern
+// global variable declarations. Extern variables are defined in external
+// libraries, and the compiler cannot assume they are aligned beyond their
+// natural type alignment.
+
+// A 128-byte struct of doubles (natural alignment 8)
+struct S128Doubles {
+    double m11,m12,m13,m14;
+    double m21,m22,m23,m24;
+    double m31,m32,m33,m34;
+    double m41,m42,m43,m44;
+};
+
+// A 128-byte struct of chars (natural alignment 1)
+struct S128 {
+    char Buffer[128];
+};
+
+// A 64-byte struct of chars (natural alignment 1)
+struct S64 {
+    char Buffer[64];
+};
+
+// A 16-byte struct of chars (natural alignment 1)
+struct S16 {
+    char Buffer[16];
+};
+
+// Extern declarations: alignment must NOT be bumped beyond the type's natural
+// preferred alignment, because the compiler does not control placement of
+// extern symbols.
+extern struct S128Doubles extern_s128doubles;
+extern struct S128 extern_s128;
+extern struct S64 extern_s64;
+extern struct S16 extern_s16;
+
+// Definitions: alignment CAN be bumped because the compiler controls 
placement.
+struct S128Doubles defined_s128doubles = {0};
+struct S128 defined_s128 = {0};
+struct S64 defined_s64 = {0};
+struct S16 defined_s16 = {0};
+
+// Extern globals should use the type's natural preferred alignment:
+// CHECK-DAG: @extern_s128doubles = external {{(dso_local )?}}global 
%struct.S128Doubles, align 8
+// CHECK-DAG: @extern_s128 = external {{(dso_local )?}}global %struct.S128, 
align 1
+// CHECK-DAG: @extern_s64 = external {{(dso_local )?}}global %struct.S64, 
align 1
+// CHECK-DAG: @extern_s16 = external {{(dso_local )?}}global %struct.S16, 
align 1
+
+// Defined globals can use the bumped alignment:
+// CHECK-DAG: @defined_s128doubles = {{(dso_local )?}}global 
%struct.S128Doubles zeroinitializer, align 16
+// CHECK-DAG: @defined_s128 = {{(dso_local )?}}global %struct.S128 
zeroinitializer, align 16
+// CHECK-DAG: @defined_s64 = {{(dso_local )?}}global %struct.S64 
zeroinitializer, align 8
+// CHECK-DAG: @defined_s16 = {{(dso_local )?}}global %struct.S16 
zeroinitializer, align 8
+
+void use(void *);
+void test(void) {
+    use(&extern_s128doubles);
+    use(&extern_s128);
+    use(&extern_s64);
+    use(&extern_s16);
+    use(&defined_s128doubles);
+    use(&defined_s128);
+    use(&defined_s64);
+    use(&defined_s16);
+}

>From bfb07a7f28f851a6470d8b8a4e848eff4cdb38d9 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Fri, 10 Apr 2026 22:52:03 +0200
Subject: [PATCH 7/8] Avoid elf copiable

---
 clang/lib/AST/ASTContext.cpp                  | 16 +++-
 clang/test/CodeGen/c-strings.c                |  2 +-
 .../CodeGen/x86_64-extern-global-alignment.c  | 87 +++++++++++++++----
 3 files changed, 86 insertions(+), 19 deletions(-)

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 4539b4677f0ef..a910cbda43d89 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1837,9 +1837,19 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool 
ForAlignof) const {
         uint64_t TypeSize =
             !BaseT->isIncompleteType() ? getTypeSize(T.getTypePtr()) : 0;
         Align = std::max(Align, getMinGlobalAlignOfVar(TypeSize, VD));
-        // Do not increase alignment for externally defined variables
-        // to not break ABI compatibility.
-        if (VD->hasDefinition())
+        // Only increase alignment if it can safely control the
+        // variable's placement. This mirrors the conditions in
+        // GlobalObject::canIncreaseAlignment:
+        // - Must be a strong definition (not a declaration or weak)
+        // - Must not have a section attribute (may be densely packed)
+        // - On ELF with PIC (not PIE), default-visibility symbols may be
+        //   COPY-relocated, so the executable controls their alignment.
+        bool IsAlwaysLocal = VD->hasDefinition() && !VD->isWeak() &&
+                           !VD->hasAttr<SectionAttr>();
+        bool IsELFCopyReloc = Target->getTriple().isOSBinFormatELF() &&
+            LangOpts.PICLevel && !LangOpts.PIE &&
+            VD->getVisibility() == DefaultVisibility;
+        if (IsAlwaysLocal && !IsELFCopyReloc)
           Align =
               std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align));
       }
diff --git a/clang/test/CodeGen/c-strings.c b/clang/test/CodeGen/c-strings.c
index c31fb779a0ca6..31c438fd8ff2e 100644
--- a/clang/test/CodeGen/c-strings.c
+++ b/clang/test/CodeGen/c-strings.c
@@ -7,7 +7,7 @@
 // CHECK: @align = {{(dso_local )?}}global i8 [[ALIGN:[0-9]+]]
 // ITANIUM: @.str = private unnamed_addr constant [6 x i8] c"hello\00"
 // MSABI: @"??_C@_05CJBACGMB@hello?$AA@" = linkonce_odr dso_local unnamed_addr 
constant [6 x i8] c"hello\00", comdat, align 1
-// ITANIUM: @f1.x = internal global ptr @.str, align 8
+// ITANIUM: @f1.x = internal global ptr @.str
 // MSABI: @f1.x = internal global ptr @"??_C@_05CJBACGMB@hello?$AA@"
 // CHECK: @f2.x = internal global [6 x i8] c"hello\00", align [[ALIGN]]
 // CHECK: @f3.x = internal global [8 x i8] c"hello\00\00\00", align [[ALIGN]]
diff --git a/clang/test/CodeGen/x86_64-extern-global-alignment.c 
b/clang/test/CodeGen/x86_64-extern-global-alignment.c
index 1c612832a1345..8b92497f816ee 100644
--- a/clang/test/CodeGen/x86_64-extern-global-alignment.c
+++ b/clang/test/CodeGen/x86_64-extern-global-alignment.c
@@ -1,11 +1,17 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | 
FileCheck %s
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s -o - | 
FileCheck %s
 // RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck 
%s
+//
+// ELF with -fPIC (shared library): default-visibility symbols may be
+// COPY-relocated, so alignment must NOT be bumped for them.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -pic-level 2 -emit-llvm %s 
-o - | FileCheck %s --check-prefix=ELF-PIC
+//
+// ELF with -fPIE (executable): definitions are dso_local, alignment CAN be 
bumped.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -pic-level 2 -pic-is-pie 
-emit-llvm %s -o - | FileCheck %s --check-prefix=ELF-PIE
 
-// Test that getLargeGlobalPreferredAlign does not bump alignment for extern
-// global variable declarations. Extern variables are defined in external
-// libraries, and the compiler cannot assume they are aligned beyond their
-// natural type alignment.
+// Test that getLargeGlobalPreferredAlign does not bump alignment for variables
+// where the compiler cannot safely control placement. This mirrors the
+// conditions in GlobalObject::canIncreaseAlignment.
 
 // A 128-byte struct of doubles (natural alignment 8)
 struct S128Doubles {
@@ -30,32 +36,75 @@ struct S16 {
     char Buffer[16];
 };
 
-// Extern declarations: alignment must NOT be bumped beyond the type's natural
-// preferred alignment, because the compiler does not control placement of
-// extern symbols.
+// --- Extern declarations: no definition, alignment must NOT be bumped ---
 extern struct S128Doubles extern_s128doubles;
 extern struct S128 extern_s128;
 extern struct S64 extern_s64;
 extern struct S16 extern_s16;
 
-// Definitions: alignment CAN be bumped because the compiler controls 
placement.
-struct S128Doubles defined_s128doubles = {0};
-struct S128 defined_s128 = {0};
-struct S64 defined_s64 = {0};
-struct S16 defined_s16 = {0};
-
-// Extern globals should use the type's natural preferred alignment:
 // CHECK-DAG: @extern_s128doubles = external {{(dso_local )?}}global 
%struct.S128Doubles, align 8
 // CHECK-DAG: @extern_s128 = external {{(dso_local )?}}global %struct.S128, 
align 1
 // CHECK-DAG: @extern_s64 = external {{(dso_local )?}}global %struct.S64, 
align 1
 // CHECK-DAG: @extern_s16 = external {{(dso_local )?}}global %struct.S16, 
align 1
 
-// Defined globals can use the bumped alignment:
+// --- Strong definitions: alignment CAN be bumped ---
+struct S128Doubles defined_s128doubles = {0};
+struct S128 defined_s128 = {0};
+struct S64 defined_s64 = {0};
+struct S16 defined_s16 = {0};
+
 // CHECK-DAG: @defined_s128doubles = {{(dso_local )?}}global 
%struct.S128Doubles zeroinitializer, align 16
 // CHECK-DAG: @defined_s128 = {{(dso_local )?}}global %struct.S128 
zeroinitializer, align 16
 // CHECK-DAG: @defined_s64 = {{(dso_local )?}}global %struct.S64 
zeroinitializer, align 8
 // CHECK-DAG: @defined_s16 = {{(dso_local )?}}global %struct.S16 
zeroinitializer, align 8
 
+// --- Weak definitions: alignment must NOT be bumped ---
+__attribute__((weak)) struct S128 weak_s128 = {0};
+__attribute__((weak)) struct S64 weak_s64 = {0};
+
+// CHECK-DAG: @weak_s128 = weak {{(dso_local )?}}global %struct.S128 
zeroinitializer, align 1
+// CHECK-DAG: @weak_s64 = weak {{(dso_local )?}}global %struct.S64 
zeroinitializer, align 1
+
+#ifdef __APPLE__
+__attribute__((section("__DATA,.mysect"))) struct S128 section_s128 = {0};
+__attribute__((section("__DATA,.mysect"))) struct S64 section_s64 = {0};
+#else
+__attribute__((section(".mysect"))) struct S128 section_s128 = {0};
+__attribute__((section(".mysect"))) struct S64 section_s64 = {0};
+#endif
+
+// CHECK-DAG: @section_s128 = {{(dso_local )?}}global %struct.S128 
zeroinitializer, section "{{[^"]+}}", align 1
+// CHECK-DAG: @section_s64 = {{(dso_local )?}}global %struct.S64 
zeroinitializer, section "{{[^"]+}}", align 1
+
+// --- Tentative definitions: alignment CAN be bumped (these are strong defs) 
---
+struct S128 tentative_s128;
+struct S64 tentative_s64;
+
+// CHECK-DAG: @tentative_s128 = {{(dso_local )?}}global %struct.S128 
zeroinitializer, align 16
+// CHECK-DAG: @tentative_s64 = {{(dso_local )?}}global %struct.S64 
zeroinitializer, align 8
+
+// --- ELF with -fPIC: default-visibility defs must NOT be bumped (COPY 
relocation risk) ---
+// ELF-PIC-DAG: @defined_s128 = global %struct.S128 zeroinitializer, align 1
+// ELF-PIC-DAG: @defined_s64 = global %struct.S64 zeroinitializer, align 1
+// ELF-PIC-DAG: @tentative_s128 = global %struct.S128 zeroinitializer, align 1
+// ELF-PIC-DAG: @tentative_s64 = global %struct.S64 zeroinitializer, align 1
+// Hidden visibility is dso_local even with -fPIC, so alignment CAN be bumped.
+// ELF-PIC-DAG: @hidden_s128 = hidden global %struct.S128 zeroinitializer, 
align 16
+// ELF-PIC-DAG: @hidden_s64 = hidden global %struct.S64 zeroinitializer, align 
8
+
+// --- ELF with -fPIE: definitions are dso_local, alignment CAN be bumped ---
+// ELF-PIE-DAG: @defined_s128 = dso_local global %struct.S128 zeroinitializer, 
align 16
+// ELF-PIE-DAG: @defined_s64 = dso_local global %struct.S64 zeroinitializer, 
align 8
+// ELF-PIE-DAG: @tentative_s128 = dso_local global %struct.S128 
zeroinitializer, align 16
+// ELF-PIE-DAG: @tentative_s64 = dso_local global %struct.S64 zeroinitializer, 
align 8
+
+// Hidden visibility: safe to bump on all configurations.
+__attribute__((visibility("hidden"))) struct S128 hidden_s128 = {0};
+__attribute__((visibility("hidden"))) struct S64 hidden_s64 = {0};
+
+// CHECK-DAG: @hidden_s128 = {{(dso_local )?}}hidden global %struct.S128 
zeroinitializer, align 16
+// CHECK-DAG: @hidden_s64 = {{(dso_local )?}}hidden global %struct.S64 
zeroinitializer, align 8
+
 void use(void *);
 void test(void) {
     use(&extern_s128doubles);
@@ -66,4 +115,12 @@ void test(void) {
     use(&defined_s128);
     use(&defined_s64);
     use(&defined_s16);
+    use(&weak_s128);
+    use(&weak_s64);
+    use(&section_s128);
+    use(&section_s64);
+    use(&tentative_s128);
+    use(&tentative_s64);
+    use(&hidden_s128);
+    use(&hidden_s64);
 }

>From 490a735e9e7e34641d1a3d9440854416bd6f9d18 Mon Sep 17 00:00:00 2001
From: Marco Bartoli <[email protected]>
Date: Fri, 10 Apr 2026 22:53:26 +0200
Subject: [PATCH 8/8] Format

---
 clang/lib/AST/ASTContext.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a910cbda43d89..8d0f32ab68f72 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1844,11 +1844,11 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool 
ForAlignof) const {
         // - Must not have a section attribute (may be densely packed)
         // - On ELF with PIC (not PIE), default-visibility symbols may be
         //   COPY-relocated, so the executable controls their alignment.
-        bool IsAlwaysLocal = VD->hasDefinition() && !VD->isWeak() &&
-                           !VD->hasAttr<SectionAttr>();
+        bool IsAlwaysLocal =
+            VD->hasDefinition() && !VD->isWeak() && 
!VD->hasAttr<SectionAttr>();
         bool IsELFCopyReloc = Target->getTriple().isOSBinFormatELF() &&
-            LangOpts.PICLevel && !LangOpts.PIE &&
-            VD->getVisibility() == DefaultVisibility;
+                              LangOpts.PICLevel && !LangOpts.PIE &&
+                              VD->getVisibility() == DefaultVisibility;
         if (IsAlwaysLocal && !IsELFCopyReloc)
           Align =
               std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align));

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to