https://github.com/Amichaxx updated https://github.com/llvm/llvm-project/pull/162679
>From e654aa7c7d051e355c391f16c5ff3e152fa5784f Mon Sep 17 00:00:00 2001 From: Amichaxx <[email protected]> Date: Mon, 6 Oct 2025 15:42:18 +0000 Subject: [PATCH 1/4] [LLVM] Fix clang to emit llvm-ir for fadd/fsub atomics Currently, Clang emits CAS loops for atoic fp compound assignments, instead of atomicrmw instructions. The code in CGExprScalar.cpp now checks for both integer and floating-point atomic types and emits atomicrmw fadd/fsub instructions in the LLVM IR. --- clang/lib/CodeGen/CGExprScalar.cpp | 24 +++++++-- clang/test/CodeGen/aarch64-lsfe-atomics.c | 61 +++++++++++++++++++++++ 2 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-lsfe-atomics.c diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index f319b176513f8..0be5f230406ec 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -3847,7 +3847,17 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( llvm::PHINode *atomicPHI = nullptr; if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) { QualType type = atomicTy->getValueType(); - if (!type->isBooleanType() && type->isIntegerType() && + const bool isFloat = type->isFloatingType(); + const bool isInteger = type->isIntegerType(); + + bool isPowerOfTwo = false; + if (isFloat || isInteger) { + llvm::Type *IRTy = CGF.ConvertType(type); + uint64_t StoreBits = + CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy); + isPowerOfTwo = llvm::isPowerOf2_64(StoreBits); + } + if (!type->isBooleanType() && (isInteger || isFloat) && isPowerOfTwo && !(type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) && CGF.getLangOpts().getSignedOverflowBehavior() != @@ -3862,12 +3872,16 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( case BO_ShrAssign: break; case BO_AddAssign: - AtomicOp = llvm::AtomicRMWInst::Add; - Op = llvm::Instruction::Add; + AtomicOp = isFloat ? llvm::AtomicRMWInst::FAdd + : llvm::AtomicRMWInst::Add; + Op = isFloat ? llvm::Instruction::FAdd + : llvm::Instruction::Add; break; case BO_SubAssign: - AtomicOp = llvm::AtomicRMWInst::Sub; - Op = llvm::Instruction::Sub; + AtomicOp = isFloat ? llvm::AtomicRMWInst::FSub + : llvm::AtomicRMWInst::Sub; + Op = isFloat ? llvm::Instruction::FSub + : llvm::Instruction::Sub; break; case BO_AndAssign: AtomicOp = llvm::AtomicRMWInst::And; diff --git a/clang/test/CodeGen/aarch64-lsfe-atomics.c b/clang/test/CodeGen/aarch64-lsfe-atomics.c new file mode 100644 index 0000000000000..957e960b3fe3a --- /dev/null +++ b/clang/test/CodeGen/aarch64-lsfe-atomics.c @@ -0,0 +1,61 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=aarch64-linux-gnu | FileCheck %s --check-prefix=CHECK-LLVM + +_Atomic(float) f; +_Atomic(double) d; + +// CHECK-LLVM-LABEL: define dso_local void @test_float_add( +// CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4 +// CHECK-LLVM-NEXT: store float [[VAL]], ptr [[VAL_ADDR]], align 4 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load float, ptr [[VAL_ADDR]], align 4 +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @f, float [[TMP0]] seq_cst, align 4 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], [[TMP0]] +// CHECK-LLVM-NEXT: ret void +// +void test_float_add(float val) { + f += val; +} + +// CHECK-LLVM-LABEL: define dso_local void @test_double_add( +// CHECK-LLVM-SAME: double noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca double, align 8 +// CHECK-LLVM-NEXT: store double [[VAL]], ptr [[VAL_ADDR]], align 8 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load double, ptr [[VAL_ADDR]], align 8 +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @d, double [[TMP0]] seq_cst, align 8 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd double [[TMP1]], [[TMP0]] +// CHECK-LLVM-NEXT: ret void +// +void test_double_add(double val) { + d += val; +} + +// CHECK-LLVM-LABEL: define dso_local void @test_float_sub( +// CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4 +// CHECK-LLVM-NEXT: store float [[VAL]], ptr [[VAL_ADDR]], align 4 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load float, ptr [[VAL_ADDR]], align 4 +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @f, float [[TMP0]] seq_cst, align 4 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub float [[TMP1]], [[TMP0]] +// CHECK-LLVM-NEXT: ret void +// +void test_float_sub(float val) { + f -= val; +} + +// CHECK-LLVM-LABEL: define dso_local void @test_double_sub( +// CHECK-LLVM-SAME: double noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca double, align 8 +// CHECK-LLVM-NEXT: store double [[VAL]], ptr [[VAL_ADDR]], align 8 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load double, ptr [[VAL_ADDR]], align 8 +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @d, double [[TMP0]] seq_cst, align 8 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub double [[TMP1]], [[TMP0]] +// CHECK-LLVM-NEXT: ret void +// +void test_double_sub(double val){ + d -= val; +} >From 653b974e21051fa663ba9196f23baa68c30d7561 Mon Sep 17 00:00:00 2001 From: Amichaxx <[email protected]> Date: Thu, 9 Oct 2025 15:32:00 +0000 Subject: [PATCH 2/4] clang format --- clang/lib/CodeGen/CGExprScalar.cpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 0be5f230406ec..4028a91ad7639 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -3847,14 +3847,13 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( llvm::PHINode *atomicPHI = nullptr; if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) { QualType type = atomicTy->getValueType(); - const bool isFloat = type->isFloatingType(); + const bool isFloat = type->isFloatingType(); const bool isInteger = type->isIntegerType(); bool isPowerOfTwo = false; if (isFloat || isInteger) { llvm::Type *IRTy = CGF.ConvertType(type); - uint64_t StoreBits = - CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy); + uint64_t StoreBits = CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy); isPowerOfTwo = llvm::isPowerOf2_64(StoreBits); } if (!type->isBooleanType() && (isInteger || isFloat) && isPowerOfTwo && @@ -3872,16 +3871,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( case BO_ShrAssign: break; case BO_AddAssign: - AtomicOp = isFloat ? llvm::AtomicRMWInst::FAdd - : llvm::AtomicRMWInst::Add; - Op = isFloat ? llvm::Instruction::FAdd - : llvm::Instruction::Add; + AtomicOp = + isFloat ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::Add; + Op = isFloat ? llvm::Instruction::FAdd : llvm::Instruction::Add; break; case BO_SubAssign: - AtomicOp = isFloat ? llvm::AtomicRMWInst::FSub - : llvm::AtomicRMWInst::Sub; - Op = isFloat ? llvm::Instruction::FSub - : llvm::Instruction::Sub; + AtomicOp = + isFloat ? llvm::AtomicRMWInst::FSub : llvm::AtomicRMWInst::Sub; + Op = isFloat ? llvm::Instruction::FSub : llvm::Instruction::Sub; break; case BO_AndAssign: AtomicOp = llvm::AtomicRMWInst::And; >From 52d55964c53ee8d531f3b3e032fe9f135e81bde6 Mon Sep 17 00:00:00 2001 From: Amichaxx <[email protected]> Date: Wed, 22 Oct 2025 10:07:38 +0000 Subject: [PATCH 3/4] Comments addressing - Modified conditional - Added bf16 and fp16 tests --- clang/lib/CodeGen/CGExprScalar.cpp | 2 +- clang/test/CodeGen/aarch64-lsfe-atomics.c | 64 ++++++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 4028a91ad7639..1bdab1563dda2 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -3856,7 +3856,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( uint64_t StoreBits = CGF.CGM.getDataLayout().getTypeStoreSizeInBits(IRTy); isPowerOfTwo = llvm::isPowerOf2_64(StoreBits); } - if (!type->isBooleanType() && (isInteger || isFloat) && isPowerOfTwo && + if (!type->isBooleanType() && (isInteger || (isPowerOfTwo && isFloat)) && !(type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) && CGF.getLangOpts().getSignedOverflowBehavior() != diff --git a/clang/test/CodeGen/aarch64-lsfe-atomics.c b/clang/test/CodeGen/aarch64-lsfe-atomics.c index 957e960b3fe3a..428d038df198b 100644 --- a/clang/test/CodeGen/aarch64-lsfe-atomics.c +++ b/clang/test/CodeGen/aarch64-lsfe-atomics.c @@ -3,6 +3,8 @@ _Atomic(float) f; _Atomic(double) d; +_Atomic(__bf16) bf; +_Atomic(_Float16) h; // CHECK-LLVM-LABEL: define dso_local void @test_float_add( // CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { @@ -32,6 +34,36 @@ void test_double_add(double val) { d += val; } +// CHECK-LLVM-LABEL: define dso_local void @test_bf16_add( +// CHECK-LLVM-SAME: bfloat noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LLVM-NEXT: store bfloat [[VAL]], ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float +// CHECK-LLVM-NEXT: [[CONV:%.*]] = fptrunc float [[EXT]] to bfloat +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @bf, bfloat [[CONV]] seq_cst, align 2 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd bfloat [[TMP1]], [[CONV]] +// CHECK-LLVM-NEXT: ret void +// +void test_bf16_add(__bf16 val){ + bf += val; +} + +// CHECK-LLVM-LABEL: define dso_local void @test_f16_add( +// CHECK-LLVM-SAME: half noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca half, align 2 +// CHECK-LLVM-NEXT: store half [[VAL]], ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load half, ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr @h, half [[TMP0]] seq_cst, align 2 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fadd half [[TMP1]], [[TMP0]] +// CHECK-LLVM-NEXT: ret void +// +void test_f16_add(_Float16 val){ + h += val; +} + // CHECK-LLVM-LABEL: define dso_local void @test_float_sub( // CHECK-LLVM-SAME: float noundef [[VAL:%.*]]) #[[ATTR0]] { // CHECK-LLVM-NEXT: [[ENTRY:.*:]] @@ -57,5 +89,35 @@ void test_float_sub(float val) { // CHECK-LLVM-NEXT: ret void // void test_double_sub(double val){ - d -= val; + d -= val; +} + +// CHECK-LLVM-LABEL: define dso_local void @test_bf16_sub( +// CHECK-LLVM-SAME: bfloat noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LLVM-NEXT: store bfloat [[VAL]], ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float +// CHECK-LLVM-NEXT: [[CONV:%.*]] = fptrunc float [[EXT]] to bfloat +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @bf, bfloat [[CONV]] seq_cst, align 2 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub bfloat [[TMP1]], [[CONV]] +// CHECK-LLVM-NEXT: ret void +// +void test_bf16_sub(__bf16 val){ + bf -= val; +} + +// CHECK-LLVM-LABEL: define dso_local void @test_f16_sub( +// CHECK-LLVM-SAME: half noundef [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca half, align 2 +// CHECK-LLVM-NEXT: store half [[VAL]], ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = load half, ptr [[VAL_ADDR]], align 2 +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr @h, half [[TMP0]] seq_cst, align 2 +// CHECK-LLVM-NEXT: [[TMP2:%.*]] = fsub half [[TMP1]], [[TMP0]] +// CHECK-LLVM-NEXT: ret void +// +void test_f16_sub(_Float16 val){ + h -= val; } >From 97148205d2857f9bbbd99d6d33feede222d2401b Mon Sep 17 00:00:00 2001 From: Amichaxx <[email protected]> Date: Thu, 6 Nov 2025 14:48:01 +0000 Subject: [PATCH 4/4] Added tests test_float_compound_add and test_float_read_add to aarch64-lsfe-atomics.c --- clang/test/CodeGen/aarch64-lsfe-atomics.c | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/clang/test/CodeGen/aarch64-lsfe-atomics.c b/clang/test/CodeGen/aarch64-lsfe-atomics.c index 428d038df198b..d51850591b038 100644 --- a/clang/test/CodeGen/aarch64-lsfe-atomics.c +++ b/clang/test/CodeGen/aarch64-lsfe-atomics.c @@ -20,6 +20,35 @@ void test_float_add(float val) { f += val; } +// CHECK-LLVM-LABEL: define dso_local void @test_float_compound_add( +// CHECK-LLVM-SAME: float [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4 +// CHECK-LLVM-NEXT: store float [[VAL]], ptr [[VAL_ADDR]], align 4 +// CHECK-LLVM-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic float, ptr [[VAL_ADDR]] seq_cst, align 4 +// CHECK-LLVM-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @f, float [[ATOMIC_LOAD]] seq_cst, align 4 +// CHECK-LLVM-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], [[ATOMIC_LOAD]] +// CHECK-LLVM-NEXT: ret void +// +void test_float_compound_add(_Atomic(float) val){ + f += val; +} + +// CHECK-LLVM-LABEL: define dso_local void @test_float_read_add( +// CHECK-LLVM-SAME: float [[VAL:%.*]]) #[[ATTR0]] { +// CHECK-LLVM-NEXT: [[ENTRY:.*:]] +// CHECK-LLVM-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4 +// CHECK-LLVM-NEXT: store float [[VAL]], ptr [[VAL_ADDR]], align 4 +// CHECK-LLVM-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic float, ptr @f seq_cst, align 4 +// CHECK-LLVM-NEXT: [[ATOMIC_LOAD1:%.*]] = load atomic float, ptr [[VAL_ADDR]] seq_cst, align 4 +// CHECK-LLVM-NEXT: [[ADD:%.*]] = fadd float [[ATOMIC_LOAD]], [[ATOMIC_LOAD1]] +// CHECK-LLVM-NEXT: store atomic float [[ADD]], ptr @f seq_cst, align 4 +// CHECK-LLVM-NEXT: ret void +// +void test_float_read_add(_Atomic(float) val){ + f = f + val; +} + // CHECK-LLVM-LABEL: define dso_local void @test_double_add( // CHECK-LLVM-SAME: double noundef [[VAL:%.*]]) #[[ATTR0]] { // CHECK-LLVM-NEXT: [[ENTRY:.*:]] _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
