https://github.com/wsxarcher updated https://github.com/llvm/llvm-project/pull/185487
>From 8a14bd08f085d961c7700fa63342b5846cb419f7 Mon Sep 17 00:00:00 2001 From: Marco Bartoli <[email protected]> Date: Mon, 9 Mar 2026 19:11:55 +0100 Subject: [PATCH 1/6] [clang] Refine preferred alignment for x86 globals --- clang/include/clang/AST/ASTContext.h | 2 + clang/include/clang/Basic/TargetInfo.h | 7 ++ clang/lib/AST/ASTContext.cpp | 16 ++++ clang/lib/Basic/TargetInfo.cpp | 2 + clang/lib/Basic/Targets/X86.h | 2 + clang/test/AST/ByteCode/codegen.cpp | 2 +- clang/test/CodeGen/Nontemporal.cpp | 4 +- clang/test/CodeGen/attr-counted-by.c | 8 +- clang/test/CodeGen/c-strings.c | 2 +- clang/test/CodeGen/complex.c | 8 +- clang/test/CodeGen/keep-static-consts.cpp | 4 +- .../linux-kernel-struct-union-initializer.c | 26 +++--- clang/test/CodeGen/matrix-type-builtins.c | 2 +- clang/test/CodeGen/matrix-type-indexing.c | 4 +- clang/test/CodeGen/no-opt-volatile-memcpy.c | 8 +- .../test/CodeGen/partial-reinitialization2.c | 8 +- clang/test/CodeGen/unaligned-decl.c | 12 +-- clang/test/CodeGen/unaligned-expr.c | 16 ++-- clang/test/CodeGen/vector-alignment.c | 4 +- clang/test/CodeGenCXX/const-init-cxx11.cpp | 2 +- .../ms-constexpr-static-data-member.cpp | 2 +- .../CodeGenCXX/no-opt-volatile-memcpy.cpp | 10 +- .../CodeGenCXX/pointers-to-data-members.cpp | 2 +- clang/test/CodeGenCXX/static-init.cpp | 4 +- clang/test/CodeGenObjCXX/encode.mm | 2 +- clang/test/DebugInfo/KeyInstructions/agg.c | 2 +- clang/test/OpenMP/atomic_capture_codegen.cpp | 92 +++++++++---------- clang/test/OpenMP/atomic_read_codegen.c | 21 +++-- clang/test/OpenMP/atomic_update_codegen.cpp | 92 +++++++++---------- clang/test/OpenMP/atomic_write_codegen.c | 60 ++++++------ clang/test/SemaCXX/builtin-assume-aligned.cpp | 2 +- 31 files changed, 230 insertions(+), 198 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index ba1b58489c327..4686c9de42083 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2805,6 +2805,8 @@ class ASTContext : public RefCountedBase<ASTContext> { } unsigned getPreferredTypeAlign(const Type *T) const; + unsigned getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Align) const; + /// Return the default alignment for __attribute__((aligned)) on /// this target, to be used if no alignment value is specified. unsigned getTargetDefaultAlignForAttributeAligned() const; diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 9f7d2a17a0f8a..762c4d6bc9034 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -97,6 +97,8 @@ struct TransferrableTargetInfo { unsigned char DoubleWidth, DoubleAlign; unsigned char LongDoubleWidth, LongDoubleAlign, Float128Align, Ibm128Align; unsigned char LargeArrayMinWidth, LargeArrayAlign; + unsigned short LargeGlobalMinWidth; + unsigned char LargeGlobalAlign; unsigned char LongWidth, LongAlign; unsigned char LongLongWidth, LongLongAlign; unsigned char Int128Align; @@ -851,6 +853,11 @@ class TargetInfo : public TransferrableTargetInfo, unsigned getLargeArrayMinWidth() const { return LargeArrayMinWidth; } unsigned getLargeArrayAlign() const { return LargeArrayAlign; } + // getLargeGlobalAlign/getLargeGlobalMinWidth - Return the minimum global size that is + // 'large' and its alignment. + unsigned getLargeGlobalMinWidth() const { return LargeGlobalMinWidth; } + unsigned getLargeGlobalAlign() const { return LargeGlobalAlign; } + /// Return the maximum width lock-free atomic operation which will /// ever be supported for the given target unsigned getMaxAtomicPromoteWidth() const { return MaxAtomicPromoteWidth; } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index ee7f823b014b2..c9798d53fa5fa 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1837,6 +1837,7 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const { uint64_t TypeSize = !BaseT->isIncompleteType() ? getTypeSize(T.getTypePtr()) : 0; Align = std::max(Align, getMinGlobalAlignOfVar(TypeSize, VD)); + Align = std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align)); } // Fields can be subject to extra alignment constraints, like if @@ -2642,6 +2643,21 @@ CharUnits ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const { return toCharUnitsFromBits(getTypeUnadjustedAlign(T)); } +/// getLargeGlobalPreferredAlign - Return the "preferred" alignment of the specified +/// global variable in bits. Only variables larger than the specifed "LargeGlobalMinWidth" will +/// be aligned using the "LargeGlobalAlign" alignment - typically 16 bytes +unsigned ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Align) const { + if (TypeSize >= Target->getLargeGlobalMinWidth()) + return Target->getLargeGlobalAlign(); + else if (TypeSize >= 128) + return (unsigned)64; + else if (TypeSize >= 32) + return (unsigned)32; + else + return Align; + +} + /// getPreferredTypeAlign - Return the "preferred" alignment of the specified /// type for the current target in bits. This can be different than the ABI /// alignment in cases where it is beneficial for performance or backwards diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index e6ae89e0948c5..a3bc603360959 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -128,6 +128,8 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { Ibm128Align = 128; LargeArrayMinWidth = 0; LargeArrayAlign = 0; + LargeGlobalMinWidth = 0; + LargeGlobalAlign = 0; MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0; MaxVectorAlign = 0; MaxTLSAlign = 0; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index c7afcc7c86053..686c2bdc17593 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -742,6 +742,8 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { LongDoubleAlign = 128; LargeArrayMinWidth = 128; LargeArrayAlign = 128; + LargeGlobalMinWidth = 1024; + LargeGlobalAlign = 128; SuitableAlign = 128; SizeType = IsX32 ? UnsignedInt : UnsignedLong; PtrDiffType = IsX32 ? SignedInt : SignedLong; diff --git a/clang/test/AST/ByteCode/codegen.cpp b/clang/test/AST/ByteCode/codegen.cpp index cbb0504c89f13..7a4150d0970cb 100644 --- a/clang/test/AST/ByteCode/codegen.cpp +++ b/clang/test/AST/ByteCode/codegen.cpp @@ -22,7 +22,7 @@ struct S { float c[3]; }; -// CHECK: @s = global %struct.S zeroinitializer, align 4 +// CHECK: @s = global %struct.S zeroinitializer, align 8 S s; // CHECK: @sp = constant ptr getelementptr (i8, ptr @s, i64 16), align 8 float &sp = s.c[3]; diff --git a/clang/test/CodeGen/Nontemporal.cpp b/clang/test/CodeGen/Nontemporal.cpp index 5052cb225d411..e6f9f2fceb9fa 100644 --- a/clang/test/CodeGen/Nontemporal.cpp +++ b/clang/test/CodeGen/Nontemporal.cpp @@ -53,8 +53,8 @@ S x; typedef int v4si __attribute__ ((vector_size(16))); // CHECK-LABEL: define void @_Z14test_alignmentv() -// CHECK: load <4 x i32>, ptr @x, align 1, !nontemporal -// CHECK: store <4 x i32> %1, ptr @x, align 1, !nontemporal +// CHECK: load <4 x i32>, ptr @x, align 8, !nontemporal +// CHECK: store <4 x i32> %1, ptr @x, align 8, !nontemporal void test_alignment() { auto t = __builtin_nontemporal_load((v4si*)x.c); diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c index 58d06f411b2c5..1d05b96ed2df5 100644 --- a/clang/test/CodeGen/attr-counted-by.c +++ b/clang/test/CodeGen/attr-counted-by.c @@ -1199,7 +1199,7 @@ int test12_a, test12_b; // SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITH-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // SANITIZE-WITH-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR9:[0-9]+]] -// SANITIZE-WITH-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] +// SANITIZE-WITH-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 8 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64, !nosanitize [[META6]] // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i32 [[INDEX]], 6 // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF7]], !nosanitize [[META6]] @@ -1225,7 +1225,7 @@ int test12_a, test12_b; // NO-SANITIZE-WITH-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITH-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // NO-SANITIZE-WITH-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR12:[0-9]+]] -// NO-SANITIZE-WITH-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] +// NO-SANITIZE-WITH-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 8 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] // NO-SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i8], ptr [[BAZ]], i64 [[IDXPROM]] // NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] @@ -1241,7 +1241,7 @@ int test12_a, test12_b; // SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // SANITIZE-WITHOUT-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR7:[0-9]+]] -// SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] +// SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 8 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64, !nosanitize [[META8:![0-9]+]] // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i32 [[INDEX]], 6 // SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP1]], label %[[CONT:.*]], label %[[HANDLER_OUT_OF_BOUNDS:.*]], !prof [[PROF9:![0-9]+]], !nosanitize [[META8]] @@ -1267,7 +1267,7 @@ int test12_a, test12_b; // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ENTRY:.*:]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4 // NO-SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAZ]]) #[[ATTR11:[0-9]+]] -// NO-SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] +// NO-SANITIZE-WITHOUT-ATTR-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 8 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64 // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i8], ptr [[BAZ]], i64 [[IDXPROM]] // NO-SANITIZE-WITHOUT-ATTR-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA2]] diff --git a/clang/test/CodeGen/c-strings.c b/clang/test/CodeGen/c-strings.c index 31c438fd8ff2e..c31fb779a0ca6 100644 --- a/clang/test/CodeGen/c-strings.c +++ b/clang/test/CodeGen/c-strings.c @@ -7,7 +7,7 @@ // CHECK: @align = {{(dso_local )?}}global i8 [[ALIGN:[0-9]+]] // ITANIUM: @.str = private unnamed_addr constant [6 x i8] c"hello\00" // MSABI: @"??_C@_05CJBACGMB@hello?$AA@" = linkonce_odr dso_local unnamed_addr constant [6 x i8] c"hello\00", comdat, align 1 -// ITANIUM: @f1.x = internal global ptr @.str +// ITANIUM: @f1.x = internal global ptr @.str, align 8 // MSABI: @f1.x = internal global ptr @"??_C@_05CJBACGMB@hello?$AA@" // CHECK: @f2.x = internal global [6 x i8] c"hello\00", align [[ALIGN]] // CHECK: @f3.x = internal global [8 x i8] c"hello\00\00\00", align [[ALIGN]] diff --git a/clang/test/CodeGen/complex.c b/clang/test/CodeGen/complex.c index ffa23badba09e..28f7c5380a9b6 100644 --- a/clang/test/CodeGen/complex.c +++ b/clang/test/CodeGen/complex.c @@ -286,7 +286,7 @@ int i; // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @ci1, align 4 // CHECK-NEXT: store i32 [[TMP0]], ptr @i, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @i, align 4 -// CHECK-NEXT: [[CS_REAL:%.*]] = load i16, ptr @cs, align 2 +// CHECK-NEXT: [[CS_REAL:%.*]] = load i16, ptr @cs, align 4 // CHECK-NEXT: [[CS_IMAG:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr @cs, i64 2), align 2 // CHECK-NEXT: [[CONV:%.*]] = sext i16 [[CS_REAL]] to i32 // CHECK-NEXT: [[CONV11:%.*]] = sext i16 [[CS_IMAG]] to i32 @@ -294,7 +294,7 @@ int i; // CHECK-NEXT: [[ADD_I13:%.*]] = add i32 [[CONV11]], 0 // CHECK-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD_R12]] to i16 // CHECK-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD_I13]] to i16 -// CHECK-NEXT: store i16 [[CONV14]], ptr @cs, align 2 +// CHECK-NEXT: store i16 [[CONV14]], ptr @cs, align 4 // CHECK-NEXT: store i16 [[CONV15]], ptr getelementptr inbounds nuw (i8, ptr @cs, i64 2), align 2 // CHECK-NEXT: [[CF_REAL:%.*]] = load float, ptr @cf, align 4 // CHECK-NEXT: [[CF_IMAG:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @cf, i64 4), align 4 @@ -305,7 +305,7 @@ int i; // CHECK-NEXT: store double [[ADD_R18]], ptr @D, align 8 // CHECK-NEXT: [[CI1_REAL19:%.*]] = load i32, ptr @ci1, align 4 // CHECK-NEXT: [[CI1_IMAG20:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @ci1, i64 4), align 4 -// CHECK-NEXT: [[CS_REAL21:%.*]] = load i16, ptr @cs, align 2 +// CHECK-NEXT: [[CS_REAL21:%.*]] = load i16, ptr @cs, align 4 // CHECK-NEXT: [[CS_IMAG22:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr @cs, i64 2), align 2 // CHECK-NEXT: [[CONV23:%.*]] = sext i16 [[CS_REAL21]] to i32 // CHECK-NEXT: [[CONV24:%.*]] = sext i16 [[CS_IMAG22]] to i32 @@ -322,7 +322,7 @@ int i; // CHECK-NEXT: [[TMP13:%.*]] = sdiv i32 [[TMP11]], [[TMP8]] // CHECK-NEXT: [[CONV25:%.*]] = trunc i32 [[TMP12]] to i16 // CHECK-NEXT: [[CONV26:%.*]] = trunc i32 [[TMP13]] to i16 -// CHECK-NEXT: store i16 [[CONV25]], ptr @cs, align 2 +// CHECK-NEXT: store i16 [[CONV25]], ptr @cs, align 4 // CHECK-NEXT: store i16 [[CONV26]], ptr getelementptr inbounds nuw (i8, ptr @cs, i64 2), align 2 // CHECK-NEXT: [[CI1_REAL27:%.*]] = load i32, ptr @ci1, align 4 // CHECK-NEXT: [[CI1_IMAG28:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @ci1, i64 4), align 4 diff --git a/clang/test/CodeGen/keep-static-consts.cpp b/clang/test/CodeGen/keep-static-consts.cpp index 9f4f95be287ae..1d867b3f0e3b6 100644 --- a/clang/test/CodeGen/keep-static-consts.cpp +++ b/clang/test/CodeGen/keep-static-consts.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fkeep-static-consts -emit-llvm %s -o - -triple=x86_64-unknown-linux-gnu | FileCheck %s -// CHECK: @_ZL7srcvers = internal constant [4 x i8] c"xyz\00", align 1 -// CHECK: @_ZL8srcvers2 = internal constant [4 x i8] c"abc\00", align 1 +// CHECK: @_ZL7srcvers = internal constant [4 x i8] c"xyz\00", align 4 +// CHECK: @_ZL8srcvers2 = internal constant [4 x i8] c"abc\00", align 4 // CHECK: @_ZL1N = internal constant i32 2, align 4 // CHECK: @llvm.compiler.used = appending global [4 x ptr] [ptr @_ZL7srcvers, ptr @b, ptr @_ZL8srcvers2, ptr @_ZL1N], section "llvm.metadata" diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c index ff7e5d3599048..119b2e77bba36 100644 --- a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c +++ b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c @@ -56,26 +56,26 @@ struct S3 global_s6 = {101, 15, 123}; // Test empty initializer for union. //. -// CHECK: @global_u1 = global %union.U1 zeroinitializer, align 4 -// CHECK: @global_u2 = global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_u2_from_cast = global { i32, [12 x i8] } { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_s1 = global %struct.S1 zeroinitializer, align 4 -// CHECK: @global_s2 = global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @global_s3 = global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @global_const_u1 = constant %union.U1 { i32 4, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_s3_from_const_u1 = global %struct.S1 { i32 0, %union.U1 { i32 4, [12 x i8] zeroinitializer } }, align 4 +// CHECK: @global_u1 = global %union.U1 zeroinitializer, align 8 +// CHECK: @global_u2 = global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 8 +// CHECK: @global_u2_from_cast = global { i32, [12 x i8] } { i32 3, [12 x i8] zeroinitializer }, align 8 +// CHECK: @global_s1 = global %struct.S1 zeroinitializer, align 8 +// CHECK: @global_s2 = global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 8 +// CHECK: @global_s3 = global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 8 +// CHECK: @global_const_u1 = constant %union.U1 { i32 4, [12 x i8] zeroinitializer }, align 8 +// CHECK: @global_s3_from_const_u1 = global %struct.S1 { i32 0, %union.U1 { i32 4, [12 x i8] zeroinitializer } }, align 8 // CHECK: @global_u3 = global %union.U2 zeroinitializer, align 32 // CHECK: @global_s4 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 // CHECK: @global_s5 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 // CHECK: @global_s6 = global { i8, i8, i8 } { i8 101, i8 -65, i8 7 }, align 1 -// CHECK: @test2.a = internal global %union.U1 zeroinitializer, align 4 +// CHECK: @test2.a = internal global %union.U1 zeroinitializer, align 8 // CHECK: @__const.test3.a = private unnamed_addr constant %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @test4.a = internal global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @test6.s = internal global %struct.S1 zeroinitializer, align 4 +// CHECK: @test4.a = internal global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 8 +// CHECK: @test6.s = internal global %struct.S1 zeroinitializer, align 8 // CHECK: @__const.test7.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @test8.s = internal global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 +// CHECK: @test8.s = internal global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 8 // CHECK: @__const.test9.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @test10.s = internal global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 +// CHECK: @test10.s = internal global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 8 // CHECK: @test12.a = internal global %union.U2 zeroinitializer, align 32 // CHECK: @test14.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 // CHECK: @__const.test15.s = private unnamed_addr constant { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 diff --git a/clang/test/CodeGen/matrix-type-builtins.c b/clang/test/CodeGen/matrix-type-builtins.c index f6e67d5933ee4..f042cc777c4af 100644 --- a/clang/test/CodeGen/matrix-type-builtins.c +++ b/clang/test/CodeGen/matrix-type-builtins.c @@ -97,7 +97,7 @@ void transpose_global(void) { // CHECK32-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 4 // CHECK32-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr @global_matrix, align 4 // CHECK64-NEXT: [[M_T_ADDR:%.*]] = alloca [25 x double], align 8 - // CHECK64-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr @global_matrix, align 8 + // CHECK64-NEXT: [[GLOBAL_MATRIX:%.*]] = load <25 x double>, ptr @global_matrix, align 16 // COMMON-NEXT: [[M_T:%.*]] = call <25 x double> @llvm.matrix.transpose.v25f64(<25 x double> [[GLOBAL_MATRIX]], i32 5, i32 5) // CHECK32-NEXT: store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 4 // CHECK64-NEXT: store <25 x double> [[M_T]], ptr [[M_T_ADDR]], align 8 diff --git a/clang/test/CodeGen/matrix-type-indexing.c b/clang/test/CodeGen/matrix-type-indexing.c index d76d14c3f67ef..610a27924db94 100644 --- a/clang/test/CodeGen/matrix-type-indexing.c +++ b/clang/test/CodeGen/matrix-type-indexing.c @@ -52,9 +52,9 @@ void storeAtMatrixSubscriptExpr(int row, int col, float value) { // ROW-CHECK-NEXT: [[row_major_index:%.*]] = add i64 [[row_offset]], [[col_load:%.*]] // COL-CHECK: [[col_offset:%.*]] = mul i64 [[col_load:%.*]], 2 // COL-CHECK-NEXT: [[col_major_index:%.*]] = add i64 [[col_offset]], [[row_load:%.*]] - // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr @gM, align 4 + // CHECK-NEXT: [[matrix_as_vec:%.*]] = load <6 x float>, ptr @gM, align 8 // ROW-CHECK-NEXT: [[matrix_after_insert:%.*]] = insertelement <6 x float> [[matrix_as_vec]], float [[value_load]], i64 [[row_major_index]] // COL-CHECK-NEXT: [[matrix_after_insert:%.*]] = insertelement <6 x float> [[matrix_as_vec]], float [[value_load]], i64 [[col_major_index]] - // CHECK-NEXT: store <6 x float> [[matrix_after_insert]], ptr @gM, align 4 + // CHECK-NEXT: store <6 x float> [[matrix_after_insert]], ptr @gM, align 8 gM[row][col] = value; } diff --git a/clang/test/CodeGen/no-opt-volatile-memcpy.c b/clang/test/CodeGen/no-opt-volatile-memcpy.c index 572b1faa11d5b..0d3bd3243a9e7 100644 --- a/clang/test/CodeGen/no-opt-volatile-memcpy.c +++ b/clang/test/CodeGen/no-opt-volatile-memcpy.c @@ -16,8 +16,8 @@ void foo (void) { // CHECK-LABEL: define{{.*}} void @foo() // CHECK: %[[LS:.*]] = alloca %struct.s, align 4 // CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 %[[LS]], i64 132, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 @gs, ptr align 4 @gs, i64 132, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 @gs, i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 16 @gs, ptr align 16 @gs, i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 16 @gs, i64 132, i1 true) struct s1 { @@ -31,6 +31,6 @@ void fee (void) { s.y = gs; } // CHECK-LABEL: define{{.*}} void @fee() -// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @s, i64 132, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @gs, i64 132, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 @s, i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 @gs, i64 132, i1 true) diff --git a/clang/test/CodeGen/partial-reinitialization2.c b/clang/test/CodeGen/partial-reinitialization2.c index 8d8e04f24541a..715df0a562850 100644 --- a/clang/test/CodeGen/partial-reinitialization2.c +++ b/clang/test/CodeGen/partial-reinitialization2.c @@ -15,7 +15,7 @@ union ULP3 { struct LP3 l3; }; // CHECK-LABEL: test1 void test1(void) { - // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g1, i64 6, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g1, i64 6, i1 false) // CHECK: store i8 120, ptr % struct LP1 l = { .p1 = g1, .p1.x[2] = 'x' }; @@ -24,7 +24,7 @@ void test1(void) // CHECK-LABEL: test2 void test2(void) { - // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g1, i64 6, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g1, i64 6, i1 false) // CHECK: store i8 114, ptr % struct LP1 l = { .p1 = g1, .p1.x[1] = 'r' }; @@ -75,10 +75,10 @@ void test4(void) void test5(void) { // .l3 = g3 - // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g3, i64 12, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g3, i64 12, i1 false) // .l3.p1 = { [0] = g1 } implicitly sets [1] to zero - // CHECK: call void @llvm.memcpy{{.*}}ptr align 1 @g1, i64 6, i1 false) + // CHECK: call void @llvm.memcpy{{.*}}ptr align 4 @g1, i64 6, i1 false) // CHECK: getelementptr{{.*}}%struct.P1, ptr{{.*}}i64 1 // CHECK: call void @llvm.memset{{.*}}i8 0, i64 6, i1 false) diff --git a/clang/test/CodeGen/unaligned-decl.c b/clang/test/CodeGen/unaligned-decl.c index 6e35827658f5f..17887ed56bd1b 100644 --- a/clang/test/CodeGen/unaligned-decl.c +++ b/clang/test/CodeGen/unaligned-decl.c @@ -1,22 +1,22 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fms-extensions -emit-llvm < %s | FileCheck %s -// CHECK: @a1 ={{.*}} global i32 1, align 1 +// CHECK: @a1 ={{.*}} global i32 1, align 4 __unaligned int a1 = 1; -// CHECK: @a2 ={{.*}} global i32 1, align 1 +// CHECK: @a2 ={{.*}} global i32 1, align 4 int __unaligned a2 = 1; -// CHECK: @a3 = {{.*}} align 1 +// CHECK: @a3 = {{.*}} align 8 __unaligned int a3[10]; -// CHECK: @a4 = {{.*}} align 1 +// CHECK: @a4 = {{.*}} align 8 int __unaligned a4[10]; -// CHECK: @p1 = {{.*}} align 1 +// CHECK: @p1 = {{.*}} align 4 int *__unaligned p1; // CHECK: @p2 = {{.*}} align 8 int __unaligned *p2; -// CHECK: @p3 = {{.*}} align 1 +// CHECK: @p3 = {{.*}} align 4 int __unaligned *__unaligned p3; diff --git a/clang/test/CodeGen/unaligned-expr.c b/clang/test/CodeGen/unaligned-expr.c index b9c706d3e94be..127ceeced72f4 100644 --- a/clang/test/CodeGen/unaligned-expr.c +++ b/clang/test/CodeGen/unaligned-expr.c @@ -5,8 +5,8 @@ // ------------- __unaligned int x; void test1(void) { - // CHECK: {{%.*}} = load i32, ptr @x, align 1 - // CHECK: store i32 {{%.*}}, ptr @x, align 1 + // CHECK: {{%.*}} = load i32, ptr @x, align 4 + // CHECK: store i32 {{%.*}}, ptr @x, align 4 x++; } @@ -30,7 +30,7 @@ void test2_1(void) { int *__unaligned p1; void test3(void) { - // CHECK: {{%.*}} = load ptr, ptr @p1, align 1 + // CHECK: {{%.*}} = load ptr, ptr @p1, align 4 // CHECK: {{%.*}} = load i32, ptr {{%.*}}, align 4 // CHECK: store i32 {{%.*}}, ptr {{%.*}}, align 4 (*p1)++; @@ -46,7 +46,7 @@ void test4(void) { int __unaligned *__unaligned p3; void test5(void) { - // CHECK: {{%.*}} = load ptr, ptr @p3, align 1 + // CHECK: {{%.*}} = load ptr, ptr @p3, align 4 // CHECK: {{%.*}} = load i32, ptr {{%.*}}, align 1 // CHECK: store i32 {{%.*}}, ptr {{%.*}}, align 1 (*p3)++; @@ -87,8 +87,8 @@ void test8(void) { // ------------- __unaligned int a[10]; void test9(void) { - // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, i64 12), align 1 - // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @a, i64 12), align 1 + // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, i64 12), align 4 + // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @a, i64 12), align 4 (a[3])++; } @@ -180,8 +180,8 @@ struct S1 { __unaligned S1 s1; void test20(void) { - // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @s1, i64 4), align 1 - // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @s1, i64 4), align 1 + // CHECK: {{%.*}} = load i32, ptr getelementptr inbounds nuw (i8, ptr @s1, i64 4), align 4 + // CHECK: store i32 {{%.*}}, ptr getelementptr inbounds nuw (i8, ptr @s1, i64 4), align 4 s1.x++; } diff --git a/clang/test/CodeGen/vector-alignment.c b/clang/test/CodeGen/vector-alignment.c index c0b607e96c618..29e2f236e62e9 100644 --- a/clang/test/CodeGen/vector-alignment.c +++ b/clang/test/CodeGen/vector-alignment.c @@ -74,6 +74,6 @@ double __attribute__((vector_size(24), aligned(64))) v11; // ALL: @v11 {{.*}}, align 64 double __attribute__((vector_size(80), aligned(16))) v12; // ALL: @v12 {{.*}}, align 16 -typedef __attribute__((ext_vector_type(248), aligned(4))) _Bool v12b_type; +typedef __attribute__((ext_vector_type(248), aligned(8))) _Bool v12b_type; v12b_type v12b; -// ALL: @v12b {{.*}}, align 4 +// ALL: @v12b {{.*}}, align 8 diff --git a/clang/test/CodeGenCXX/const-init-cxx11.cpp b/clang/test/CodeGenCXX/const-init-cxx11.cpp index 5dfe3488ca7bb..aa811b245768c 100644 --- a/clang/test/CodeGenCXX/const-init-cxx11.cpp +++ b/clang/test/CodeGenCXX/const-init-cxx11.cpp @@ -88,7 +88,7 @@ namespace BaseClass { struct E {}; struct Test2 : X<E,0>, X<E,1>, X<E,2>, X<E,3> {}; - // CHECK: @_ZN9BaseClass2t2E ={{.*}} constant {{.*}} zeroinitializer, align 1 + // CHECK: @_ZN9BaseClass2t2E ={{.*}} constant {{.*}} zeroinitializer, align 4 extern constexpr Test2 t2 = Test2(); struct __attribute((packed)) PackedD { double y = 2; }; diff --git a/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp b/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp index 604a49fefbacb..ddee0f4879be9 100644 --- a/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp +++ b/clang/test/CodeGenCXX/ms-constexpr-static-data-member.cpp @@ -19,7 +19,7 @@ void usethem() { useptr(&S::sdm_udt); } -// CHECK-DAG: @"?sdm_char_array@S@@2QBDB" = linkonce_odr dso_local constant [5 x i8] c"asdf\00", comdat, align 1 +// CHECK-DAG: @"?sdm_char_array@S@@2QBDB" = linkonce_odr dso_local constant [5 x i8] c"asdf\00", comdat, align 4 // CHECK-DAG: @"?sdm_char_ptr@S@@2QEBDEB" = linkonce_odr dso_local constant ptr @"??_C@_04JIHMPGLA@asdf?$AA@", comdat, align 8 diff --git a/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp b/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp index 1a52169d5a938..38b53b1b23c50 100644 --- a/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp +++ b/clang/test/CodeGenCXX/no-opt-volatile-memcpy.cpp @@ -16,8 +16,8 @@ void foo (void) { // CHECK-LABEL: define{{.*}} void @_Z3foov() // CHECK: %[[LS:.*]] = alloca %struct.s, align 4 // CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 %[[LS]], i64 132, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 @gs, ptr align 4 @gs, i64 132, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 4 @gs, i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 16 @gs, ptr align 16 @gs, i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 %[[LS]], ptr align 16 @gs, i64 132, i1 true) struct s1 { @@ -31,8 +31,8 @@ void fee (void) { s.y = gs; } // CHECK-LABEL: define{{.*}} void @_Z3feev() -// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @s, i64 132, i1 true) -// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 4 @s, ptr align 4 @gs, i64 132, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 @s, i64 132, i1 true) +// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(ptr align 16 @s, ptr align 16 @gs, i64 132, i1 true) struct d : s1 { }; @@ -43,4 +43,4 @@ void gorf(void) { gd = gd; } // CHECK-LABEL: define{{.*}} void @_Z4gorfv() -// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 4 @gd, ptr align 4 @gd, i64 132, i1 true) +// CHECK: call void @llvm.memcpy.{{.*}}(ptr align 16 @gd, ptr align 16 @gd, i64 132, i1 true) diff --git a/clang/test/CodeGenCXX/pointers-to-data-members.cpp b/clang/test/CodeGenCXX/pointers-to-data-members.cpp index 2ee6c65cf167d..ecf0ee2c0c8f5 100644 --- a/clang/test/CodeGenCXX/pointers-to-data-members.cpp +++ b/clang/test/CodeGenCXX/pointers-to-data-members.cpp @@ -54,7 +54,7 @@ namespace ZeroInit { }; struct C : A, B { int j; }; - // CHECK-GLOBAL: @_ZN8ZeroInit1cE ={{.*}} global {{%.*}} <{ %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::B" { [10 x %"struct.ZeroInit::A"] [%"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }], i8 0, i64 -1 }, i32 0, [4 x i8] zeroinitializer }>, align 8 + // CHECK-GLOBAL: @_ZN8ZeroInit1cE ={{.*}} global {{%.*}} <{ %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::B" { [10 x %"struct.ZeroInit::A"] [%"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }, %"struct.ZeroInit::A" { i64 -1, i32 0 }], i8 0, i64 -1 }, i32 0, [4 x i8] zeroinitializer }>, align 16 C c; } diff --git a/clang/test/CodeGenCXX/static-init.cpp b/clang/test/CodeGenCXX/static-init.cpp index 51080c895ec65..23c5cde726b9d 100644 --- a/clang/test/CodeGenCXX/static-init.cpp +++ b/clang/test/CodeGenCXX/static-init.cpp @@ -3,8 +3,8 @@ // RUN: %clang_cc1 %s -triple=x86_64-pc-linuxs -emit-llvm -std=c++20 -o - | FileCheck -check-prefix=CHECK -check-prefix=CHECK20 %s // CHECK: @_ZZ1hvE1i = internal global i32 0, align 4 -// CHECK: @base_req ={{.*}} global [4 x i8] c"foo\00", align 1 -// CHECK: @base_req_uchar ={{.*}} global [4 x i8] c"bar\00", align 1 +// CHECK: @base_req ={{.*}} global [4 x i8] c"foo\00", align 4 +// CHECK: @base_req_uchar ={{.*}} global [4 x i8] c"bar\00", align 4 // CHECK: @_ZZN5test31BC1EvE1u = internal global { i8, [3 x i8] } { i8 97, [3 x i8] undef }, align 4 diff --git a/clang/test/CodeGenObjCXX/encode.mm b/clang/test/CodeGenObjCXX/encode.mm index cad70e379c386..e16283945180f 100644 --- a/clang/test/CodeGenObjCXX/encode.mm +++ b/clang/test/CodeGenObjCXX/encode.mm @@ -349,7 +349,7 @@ @implementation N long c; }; - // CHECKCXX20: @_ZN7GH712501sE = constant [7 x i8] c"{S=qq}\00", align 1 + // CHECKCXX20: @_ZN7GH712501sE = constant [7 x i8] c"{S=qq}\00", align 4 extern const char s[] = @encode(S); } #endif diff --git a/clang/test/DebugInfo/KeyInstructions/agg.c b/clang/test/DebugInfo/KeyInstructions/agg.c index 58d923d1d9328..4215b70da0aab 100644 --- a/clang/test/DebugInfo/KeyInstructions/agg.c +++ b/clang/test/DebugInfo/KeyInstructions/agg.c @@ -20,7 +20,7 @@ void fun(Struct a) { // CHECK: store <1 x i8> %vecins, ptr @c{{.*}}, !dbg [[G3R1:!.*]] c[0] = 0; -// CHECK: %3 = load <25 x float>, ptr @m, align 4 +// CHECK: %3 = load <25 x float>, ptr @m, align 8 // CHECK: %matins = insertelement <25 x float> %3, float 0.000000e+00, i64 0, !dbg [[G4R2:!.*]] // CHECK: store <25 x float> %matins, ptr @m{{.*}}, !dbg [[G4R1:!.*]] m[0][0] = 0; diff --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp index 77312c2dac708..033a9dc555fd4 100644 --- a/clang/test/OpenMP/atomic_capture_codegen.cpp +++ b/clang/test/OpenMP/atomic_capture_codegen.cpp @@ -644,13 +644,12 @@ int main(void) { #pragma omp atomic capture iv = bfx.a = bfx.a - ldv; // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}} -// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP:%.+]], i32 noundef 0) +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4) monotonic, align 4 // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] -// CHECK: [[OLD:%.+]] = load i32, ptr [[LDTEMP]], -// CHECK: store i32 [[OLD]], ptr [[TEMP1:%.+]], -// CHECK: [[OLD:%.+]] = load i32, ptr [[LDTEMP]], -// CHECK: store i32 [[OLD]], ptr [[TEMP:%.+]], +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP1:%.+]], +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP:%.+]], // CHECK: [[A_LD:%.+]] = load i32, ptr [[TEMP]], // CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 1 // CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 1 @@ -662,7 +661,10 @@ int main(void) { // CHECK: [[BF_CLEAR:%.+]] = and i32 [[NEW_VAL]], -2147483648 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] // CHECK: store i32 %{{.+}}, ptr [[TEMP1]] -// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP]], ptr noundef [[TEMP1]], i32 noundef 0, i32 noundef 0) +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP1]] +// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] // CHECK: store i32 [[A_ASHR]], ptr @{{.+}}, @@ -696,29 +698,27 @@ int main(void) { #pragma omp atomic capture {bfx2.a -= ldv; iv = bfx2.a;} // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}} -// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 3) monotonic, align 1 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4 // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] -// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] -// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST_NEW:%.+]], -// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]], -// CHECK: [[A_LD:%.+]] = load i8, ptr [[BITCAST]], -// CHECK: [[A_ASHR:%.+]] = ashr i8 [[A_LD]], 7 -// CHECK: [[CAST:%.+]] = sext i8 [[A_ASHR]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80 -// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[X_RVAL]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST_NEW:%.+]], +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]], +// CHECK: [[A_LD:%.+]] = load i32, ptr [[BITCAST]], +// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_LD]], 31 +// CHECK: [[CAST:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80 +// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[CAST]] // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[DIV]] to i32 -// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8 -// CHECK: [[BF_LD:%.+]] = load i8, ptr [[BITCAST_NEW]], -// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1 -// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7 -// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127 -// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i8 %{{.+}}, ptr [[BITCAST_NEW]] -// CHECK: [[NEW_BF_VALUE:%.+]] = load i8, ptr [[BITCAST_NEW]] -// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1 -// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0 -// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1 +// CHECK: [[BF_LD:%.+]] = load i32, ptr [[BITCAST_NEW]], +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, ptr [[BITCAST_NEW]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[BITCAST_NEW]] +// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] // CHECK: store i32 [[NEW_VAL]], ptr @{{.+}}, @@ -753,28 +753,28 @@ int main(void) { #pragma omp atomic capture {iv = bfx3.a; bfx3.a /= ldv;} // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}} -// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP:%.+]], i32 noundef 0) -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[OLD:%.+]] = load i24, ptr [[LDTEMP]], -// CHECK: store i24 [[OLD]], ptr [[BITCAST2:%.+]], -// CHECK: [[OLD:%.+]] = load i24, ptr [[LDTEMP]], -// CHECK: store i24 [[OLD]], ptr [[BITCAST1:%.+]], -// CHECK: [[A_LD:%.+]] = load i24, ptr [[BITCAST1]], -// CHECK: [[A_SHL:%.+]] = shl i24 [[A_LD]], 7 -// CHECK: [[A_ASHR:%.+]] = ashr i24 [[A_SHL]], 10 -// CHECK: [[CAST:%.+]] = sext i24 [[A_ASHR]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4 +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store i32 [[OLD]], ptr [[BITCAST2:%.+]], +// CHECK: store i32 [[OLD]], ptr [[BITCAST1:%.+]], +// CHECK: [[A_LD:%.+]] = load i32, ptr [[BITCAST1]], +// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 7 +// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 18 +// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80 // CHECK: [[ADD:%.+]] = fadd x86_fp80 [[X_RVAL]], [[EXPR]] // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[ADD]] to i32 -// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24 -// CHECK: [[BF_LD:%.+]] = load i24, ptr [[BITCAST2]], -// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383 -// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3 -// CHECK: [[BF_CLEAR:%.+]] = and i24 [[BF_LD]], -131065 -// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i24 %{{.+}}, ptr [[BITCAST2]] -// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 3, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP]], ptr noundef [[BITCAST2]], i32 noundef 0, i32 noundef 0) +// CHECK: [[BF_LD:%.+]] = load i32, ptr [[BITCAST2]], +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11 +// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -33552385 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, ptr [[BITCAST2]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[BITCAST2]] +// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] // CHECK: store i32 [[NEW_VAL]], ptr @{{.+}}, diff --git a/clang/test/OpenMP/atomic_read_codegen.c b/clang/test/OpenMP/atomic_read_codegen.c index 8079d5fd557a3..d61ac4615e01b 100644 --- a/clang/test/OpenMP/atomic_read_codegen.c +++ b/clang/test/OpenMP/atomic_read_codegen.c @@ -246,7 +246,8 @@ int main(void) { // CHECK: store x86_fp80 #pragma omp atomic read ldv = bfx.a; -// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr inbounds nuw (i8, ptr @bfx_packed, i64 4), ptr noundef [[LDTEMP:%.+]], i32 noundef 0) +// CHECK: [[LD:%.+]] = load atomic i32, ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4) monotonic, align 4 +// CHECK: store i32 [[LD]], ptr [[LDTEMP:%.+]] // CHECK: [[LD:%.+]] = load i32, ptr [[LDTEMP]] // CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1 // CHECK: ashr i32 [[SHL]], 1 @@ -260,10 +261,10 @@ int main(void) { // CHECK: store x86_fp80 #pragma omp atomic read ldv = bfx2.a; -// CHECK: [[LD:%.+]] = load atomic i8, ptr getelementptr inbounds nuw (i8, ptr @bfx2_packed, i64 3) monotonic, align 1 -// CHECK: store i8 [[LD]], ptr [[LDTEMP:%.+]] -// CHECK: [[LD:%.+]] = load i8, ptr [[LDTEMP]] -// CHECK: ashr i8 [[LD]], 7 +// CHECK: [[LD:%.+]] = load atomic i32, ptr @bfx2_packed monotonic, align 4 +// CHECK: store i32 [[LD]], ptr [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i32, ptr [[LDTEMP]] +// CHECK: ashr i32 [[LD]], 31 // CHECK: store x86_fp80 #pragma omp atomic read ldv = bfx2_packed.a; @@ -275,11 +276,11 @@ int main(void) { // CHECK: store x86_fp80 #pragma omp atomic read ldv = bfx3.a; -// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr inbounds nuw (i8, ptr @bfx3_packed, i64 1), ptr noundef [[LDTEMP:%.+]], i32 noundef 0) -// CHECK: [[LD:%.+]] = load i24, ptr [[LDTEMP]] -// CHECK: [[SHL:%.+]] = shl i24 [[LD]], 7 -// CHECK: [[ASHR:%.+]] = ashr i24 [[SHL]], 10 -// CHECK: sext i24 [[ASHR]] to i32 +// CHECK: [[LD:%.+]] = load atomic i32, ptr @bfx3_packed monotonic, align 4 +// CHECK: store i32 [[LD]], ptr [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i32, ptr [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 7 +// CHECK: [[ASHR:%.+]] = ashr i32 [[SHL]], 18 // CHECK: store x86_fp80 #pragma omp atomic read ldv = bfx3_packed.a; diff --git a/clang/test/OpenMP/atomic_update_codegen.cpp b/clang/test/OpenMP/atomic_update_codegen.cpp index eeb7657ca90d3..b529a6748df0f 100644 --- a/clang/test/OpenMP/atomic_update_codegen.cpp +++ b/clang/test/OpenMP/atomic_update_codegen.cpp @@ -577,13 +577,12 @@ int main(void) { #pragma omp atomic bfx.a = bfx.a - ldv; // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}} -// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP:%.+]], i32 noundef 0) +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4) monotonic, align 4 // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] -// CHECK: [[PREV_VALUE:%.+]] = load i32, ptr [[LDTEMP]] -// CHECK: store i32 [[PREV_VALUE]], ptr [[TEMP1:%.+]], -// CHECK: [[PREV_VALUE:%.+]] = load i32, ptr [[LDTEMP]] -// CHECK: store i32 [[PREV_VALUE]], ptr [[TEMP:%.+]], +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP1:%.+]], +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[TEMP:%.+]], // CHECK: [[A_LD:%.+]] = load i32, ptr [[TEMP]], // CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 1 // CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 1 @@ -595,7 +594,10 @@ int main(void) { // CHECK: [[BF_CLEAR:%.+]] = and i32 [[NEW_VAL]], -2147483648 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] // CHECK: store i32 %{{.+}}, ptr [[TEMP1]] -// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP]], ptr noundef [[TEMP1]], i32 noundef 0, i32 noundef 0) +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP1]] +// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] #pragma omp atomic update @@ -627,29 +629,27 @@ int main(void) { #pragma omp atomic bfx2.a -= ldv; // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}} -// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 3) monotonic, align 1 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4 // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] -// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] -// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST1:%.+]], -// CHECK: store i8 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]], -// CHECK: [[A_LD:%.+]] = load i8, ptr [[BITCAST]], -// CHECK: [[A_ASHR:%.+]] = ashr i8 [[A_LD]], 7 -// CHECK: [[CAST:%.+]] = sext i8 [[A_ASHR]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80 -// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[X_RVAL]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST1:%.+]], +// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[BITCAST:%.+]], +// CHECK: [[A_LD:%.+]] = load i32, ptr [[BITCAST]], +// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_LD]], 31 +// CHECK: [[CAST:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80 +// CHECK: [[DIV:%.+]] = fdiv x86_fp80 [[EXPR]], [[CAST]] // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[DIV]] to i32 -// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8 -// CHECK: [[BF_LD:%.+]] = load i8, ptr [[BITCAST1]], -// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1 -// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7 -// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127 -// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i8 %{{.+}}, ptr [[BITCAST1]] -// CHECK: [[NEW_BF_VALUE:%.+]] = load i8, ptr [[BITCAST1]] -// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1 -// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0 -// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1 +// CHECK: [[BF_LD:%.+]] = load i32, ptr [[BITCAST1]], +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, ptr [[BITCAST1]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[BITCAST1]] +// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] #pragma omp atomic update @@ -682,28 +682,28 @@ int main(void) { #pragma omp atomic bfx3.a /= ldv; // CHECK: [[EXPR:%.+]] = load x86_fp80, ptr @{{.+}} -// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[BITCAST:%.+]], i32 noundef 0) -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[PREV_VALUE:%.+]] = load i24, ptr [[LDTEMP:%.+]], -// CHECK: store i24 [[PREV_VALUE]], ptr [[TEMP1:%.+]], -// CHECK: [[PREV_VALUE:%.+]] = load i24, ptr [[LDTEMP]] -// CHECK: store i24 [[PREV_VALUE]], ptr [[TEMP:%.+]], -// CHECK: [[A_LD:%.+]] = load i24, ptr [[TEMP]], -// CHECK: [[A_SHL:%.+]] = shl i24 [[A_LD]], 7 -// CHECK: [[A_ASHR:%.+]] = ashr i24 [[A_SHL]], 10 -// CHECK: [[CAST:%.+]] = sext i24 [[A_ASHR]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CAST]] to x86_fp80 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4 +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_VAL:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store i32 [[OLD_VAL]], ptr [[TEMP1:%[^,]+]], align 4 +// CHECK: store i32 [[OLD_VAL]], ptr [[TEMP:%[^,]+]], align 4 +// CHECK: [[A_LD:%.+]] = load i32, ptr [[TEMP]], +// CHECK: [[A_SHL:%.+]] = shl i32 [[A_LD]], 7 +// CHECK: [[A_ASHR:%.+]] = ashr i32 [[A_SHL]], 18 +// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[A_ASHR]] to x86_fp80 // CHECK: [[ADD:%.+]] = fadd x86_fp80 [[X_RVAL]], [[EXPR]] // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 [[ADD]] to i32 -// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24 -// CHECK: [[BF_LD:%.+]] = load i24, ptr [[TEMP1]], -// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383 -// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3 -// CHECK: [[BF_CLEAR:%.+]] = and i24 [[BF_LD]], -131065 -// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i24 %{{.+}}, ptr [[TEMP1]] -// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 3, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP]], ptr noundef [[TEMP1]], i32 noundef 0, i32 noundef 0) +// CHECK: [[BF_LD:%.+]] = load i32, ptr [[TEMP1]], +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11 +// CHECK: [[BF_CLEAR:%.+]] = and i32 [[BF_LD]], -33552385 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, ptr [[TEMP1]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP1]] +// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_VAL]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] #pragma omp atomic update diff --git a/clang/test/OpenMP/atomic_write_codegen.c b/clang/test/OpenMP/atomic_write_codegen.c index 7b3b38c43de75..410770b3f7132 100644 --- a/clang/test/OpenMP/atomic_write_codegen.c +++ b/clang/test/OpenMP/atomic_write_codegen.c @@ -300,17 +300,18 @@ int main(void) { bfx.a = ldv; // CHECK: load x86_fp80, ptr @{{.+}} // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 -// CHECK: call void @__atomic_load(i64 noundef 4, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP:%.+]], i32 noundef 0) +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4) monotonic, align 4 // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] -// CHECK: [[OLD_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP]], -// CHECK: store i32 [[OLD_BF_VALUE]], ptr [[LDTEMP1:%.+]], -// CHECK: [[OLD_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP1]], +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] // CHECK: [[BF_VALUE:%.+]] = and i32 [[NEW_VAL]], 2147483647 -// CHECK: [[BF_CLEAR:%.+]] = and i32 [[OLD_BF_VALUE]], -2147483648 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -2147483648 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i32 %{{.+}}, ptr [[LDTEMP1]] -// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), ptr noundef [[LDTEMP]], ptr noundef [[LDTEMP1]], i32 noundef 0, i32 noundef 0) +// CHECK: store i32 %{{.+}}, ptr [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 4), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] #pragma omp atomic write @@ -336,20 +337,19 @@ int main(void) { bfx2.a = ldv; // CHECK: load x86_fp80, ptr @{{.+}} // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 -// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 3) monotonic, align 1 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4 // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] -// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] -// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8 -// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1 -// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7 -// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127 -// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i8 %{{.+}}, ptr [[LDTEMP:%.+]] -// CHECK: [[NEW_BF_VALUE:%.+]] = load i8, ptr [[LDTEMP]] -// CHECK: [[RES:%.+]] = cmpxchg ptr getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1 -// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0 -// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1 +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, ptr [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] #pragma omp atomic write @@ -375,18 +375,20 @@ int main(void) { bfx3.a = ldv; // CHECK: load x86_fp80, ptr @{{.+}} // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 -// CHECK: call void @__atomic_load(i64 noundef 3, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[BITCAST:%.+]], i32 noundef 0) +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, ptr @{{.+}} monotonic, align 4 // CHECK: br label %[[CONT:.+]] // CHECK: [[CONT]] -// CHECK: [[OLD_VAL:%.+]] = load i24, ptr %{{.+}}, -// CHECK: store i24 [[OLD_VAL]], ptr [[TEMP:%.+]], -// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24 -// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383 -// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3 -// CHECK: [[BF_CLEAR:%.+]] = and i24 %{{.+}}, -131065 -// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK: store i24 %{{.+}}, ptr [[TEMP]] -// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 3, ptr noundef getelementptr inbounds nuw (i8, ptr @{{.+}}, i64 1), ptr noundef [[LDTEMP:%.+]], ptr noundef [[TEMP]], i32 noundef 0, i32 noundef 0) +// CHECK: [[OLD_VAL:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store i32 [[OLD_VAL]], ptr [[TEMP:%.+]] +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -33552385 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, ptr [[TEMP]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32, ptr [[TEMP]] +// CHECK: [[RES:%.+]] = cmpxchg ptr @{{.+}}, i32 [[OLD_VAL]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4 +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]] // CHECK: [[EXIT]] #pragma omp atomic write diff --git a/clang/test/SemaCXX/builtin-assume-aligned.cpp b/clang/test/SemaCXX/builtin-assume-aligned.cpp index 30296c72c6be8..ebd172ce7982f 100644 --- a/clang/test/SemaCXX/builtin-assume-aligned.cpp +++ b/clang/test/SemaCXX/builtin-assume-aligned.cpp @@ -26,7 +26,7 @@ constexpr void *q4 = __builtin_assume_aligned(&n, 4, -4); static char ar1[6]; // expected-error@+2 {{must be initialized by a constant expression}} -// expected-note@+1 {{alignment of the base pointee object (1 byte) is less than the asserted 16 bytes}} +// expected-note@+1 {{alignment of the base pointee object (4 bytes) is less than the asserted 16 bytes}} constexpr void *r1 = __builtin_assume_aligned(&ar1[2], 16); static char ar2[6] __attribute__((aligned(32))); >From 5719b2b28cba1f8739a2e22bfe31eb762ea0dd12 Mon Sep 17 00:00:00 2001 From: Marco Bartoli <[email protected]> Date: Mon, 9 Mar 2026 19:36:36 +0100 Subject: [PATCH 2/6] Add test case --- .../X86/x86_64-global-preferred-alignment.c | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c diff --git a/clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c b/clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c new file mode 100644 index 0000000000000..1e2b339552b0b --- /dev/null +++ b/clang/test/CodeGen/X86/x86_64-global-preferred-alignment.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s + +struct S3 { + char Buffer[3]; +}; + +struct S4 { + char Buffer[4]; +}; + +struct S15 { + char Buffer[15]; +}; + +struct S16 { + char Buffer[16]; +}; + +struct S127 { + char Buffer[127]; +}; + +struct S128 { + char Buffer[128]; +}; + +struct S3 g3; +struct S4 g4; +struct S15 g15; +struct S16 g16; +struct S127 g127; +struct S128 g128; + +// CHECK: @g3 = global %struct.S3 zeroinitializer, align 1 +// CHECK: @g4 = global %struct.S4 zeroinitializer, align 4 +// CHECK: @g15 = global %struct.S15 zeroinitializer, align 4 +// CHECK: @g16 = global %struct.S16 zeroinitializer, align 8 +// CHECK: @g127 = global %struct.S127 zeroinitializer, align 8 +// CHECK: @g128 = global %struct.S128 zeroinitializer, align 16 >From 8bf10c140b63e5f05308bfb5efea2df0d2a061e4 Mon Sep 17 00:00:00 2001 From: Marco Bartoli <[email protected]> Date: Fri, 10 Apr 2026 21:07:39 +0200 Subject: [PATCH 3/6] Only increase alignment for non external --- clang/lib/AST/ASTContext.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index c9798d53fa5fa..8af0da62ee2f7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1837,7 +1837,11 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const { uint64_t TypeSize = !BaseT->isIncompleteType() ? getTypeSize(T.getTypePtr()) : 0; Align = std::max(Align, getMinGlobalAlignOfVar(TypeSize, VD)); - Align = std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align)); + // Do not increase alignment for externally defined variables + // to not break ABI compatibility. + if (VD->hasDefinition()) + Align = + std::max(Align, getLargeGlobalPreferredAlign(TypeSize, Align)); } // Fields can be subject to extra alignment constraints, like if >From 19b30de57b417b28a5969976ed0864308a545c18 Mon Sep 17 00:00:00 2001 From: Marco Bartoli <[email protected]> Date: Fri, 10 Apr 2026 21:07:52 +0200 Subject: [PATCH 4/6] Format --- clang/include/clang/Basic/TargetInfo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 762c4d6bc9034..fa64ee5cb2ff8 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -853,8 +853,8 @@ class TargetInfo : public TransferrableTargetInfo, unsigned getLargeArrayMinWidth() const { return LargeArrayMinWidth; } unsigned getLargeArrayAlign() const { return LargeArrayAlign; } - // getLargeGlobalAlign/getLargeGlobalMinWidth - Return the minimum global size that is - // 'large' and its alignment. + // getLargeGlobalAlign/getLargeGlobalMinWidth - Return the minimum global size + // that is 'large' and its alignment. unsigned getLargeGlobalMinWidth() const { return LargeGlobalMinWidth; } unsigned getLargeGlobalAlign() const { return LargeGlobalAlign; } >From 2d3a757eefc13f063c751fc35416308f642a5c08 Mon Sep 17 00:00:00 2001 From: Marco Bartoli <[email protected]> Date: Fri, 10 Apr 2026 21:08:31 +0200 Subject: [PATCH 5/6] Format --- clang/include/clang/AST/ASTContext.h | 3 ++- clang/lib/AST/ASTContext.cpp | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 4686c9de42083..175256fee8264 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2805,7 +2805,8 @@ class ASTContext : public RefCountedBase<ASTContext> { } unsigned getPreferredTypeAlign(const Type *T) const; - unsigned getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Align) const; + unsigned getLargeGlobalPreferredAlign(uint64_t TypeSize, + unsigned Align) const; /// Return the default alignment for __attribute__((aligned)) on /// this target, to be used if no alignment value is specified. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 8af0da62ee2f7..4539b4677f0ef 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2647,10 +2647,12 @@ CharUnits ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const { return toCharUnitsFromBits(getTypeUnadjustedAlign(T)); } -/// getLargeGlobalPreferredAlign - Return the "preferred" alignment of the specified -/// global variable in bits. Only variables larger than the specifed "LargeGlobalMinWidth" will -/// be aligned using the "LargeGlobalAlign" alignment - typically 16 bytes -unsigned ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Align) const { +/// getLargeGlobalPreferredAlign - Return the "preferred" alignment of the +/// specified global variable in bits. Only variables larger than the specifed +/// "LargeGlobalMinWidth" will be aligned using the "LargeGlobalAlign" alignment +/// - typically 16 bytes +unsigned ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize, + unsigned Align) const { if (TypeSize >= Target->getLargeGlobalMinWidth()) return Target->getLargeGlobalAlign(); else if (TypeSize >= 128) @@ -2659,7 +2661,6 @@ unsigned ASTContext::getLargeGlobalPreferredAlign(uint64_t TypeSize, unsigned Al return (unsigned)32; else return Align; - } /// getPreferredTypeAlign - Return the "preferred" alignment of the specified >From 4cad029e8dac070c12b2cf8481ab85d991c23031 Mon Sep 17 00:00:00 2001 From: Marco Bartoli <[email protected]> Date: Fri, 10 Apr 2026 21:16:34 +0200 Subject: [PATCH 6/6] Add test --- .../CodeGen/x86_64-extern-global-alignment.c | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 clang/test/CodeGen/x86_64-extern-global-alignment.c diff --git a/clang/test/CodeGen/x86_64-extern-global-alignment.c b/clang/test/CodeGen/x86_64-extern-global-alignment.c new file mode 100644 index 0000000000000..1c612832a1345 --- /dev/null +++ b/clang/test/CodeGen/x86_64-extern-global-alignment.c @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.15.0 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s + +// Test that getLargeGlobalPreferredAlign does not bump alignment for extern +// global variable declarations. Extern variables are defined in external +// libraries, and the compiler cannot assume they are aligned beyond their +// natural type alignment. + +// A 128-byte struct of doubles (natural alignment 8) +struct S128Doubles { + double m11,m12,m13,m14; + double m21,m22,m23,m24; + double m31,m32,m33,m34; + double m41,m42,m43,m44; +}; + +// A 128-byte struct of chars (natural alignment 1) +struct S128 { + char Buffer[128]; +}; + +// A 64-byte struct of chars (natural alignment 1) +struct S64 { + char Buffer[64]; +}; + +// A 16-byte struct of chars (natural alignment 1) +struct S16 { + char Buffer[16]; +}; + +// Extern declarations: alignment must NOT be bumped beyond the type's natural +// preferred alignment, because the compiler does not control placement of +// extern symbols. +extern struct S128Doubles extern_s128doubles; +extern struct S128 extern_s128; +extern struct S64 extern_s64; +extern struct S16 extern_s16; + +// Definitions: alignment CAN be bumped because the compiler controls placement. +struct S128Doubles defined_s128doubles = {0}; +struct S128 defined_s128 = {0}; +struct S64 defined_s64 = {0}; +struct S16 defined_s16 = {0}; + +// Extern globals should use the type's natural preferred alignment: +// CHECK-DAG: @extern_s128doubles = external {{(dso_local )?}}global %struct.S128Doubles, align 8 +// CHECK-DAG: @extern_s128 = external {{(dso_local )?}}global %struct.S128, align 1 +// CHECK-DAG: @extern_s64 = external {{(dso_local )?}}global %struct.S64, align 1 +// CHECK-DAG: @extern_s16 = external {{(dso_local )?}}global %struct.S16, align 1 + +// Defined globals can use the bumped alignment: +// CHECK-DAG: @defined_s128doubles = {{(dso_local )?}}global %struct.S128Doubles zeroinitializer, align 16 +// CHECK-DAG: @defined_s128 = {{(dso_local )?}}global %struct.S128 zeroinitializer, align 16 +// CHECK-DAG: @defined_s64 = {{(dso_local )?}}global %struct.S64 zeroinitializer, align 8 +// CHECK-DAG: @defined_s16 = {{(dso_local )?}}global %struct.S16 zeroinitializer, align 8 + +void use(void *); +void test(void) { + use(&extern_s128doubles); + use(&extern_s128); + use(&extern_s64); + use(&extern_s16); + use(&defined_s128doubles); + use(&defined_s128); + use(&defined_s64); + use(&defined_s16); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
