https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/145474
>From 3d51cf4796fe5f1e1577ce23f2970975bd216157 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Tue, 24 Jun 2025 10:08:40 +0200 Subject: [PATCH 1/4] [EarlyCSE] Add tests for writeonly --- llvm/test/Transforms/EarlyCSE/writeonly.ll | 153 ++++++++++++++++++++- 1 file changed, 152 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll index 0bfffa3c825a3..354638e9f3fe1 100644 --- a/llvm/test/Transforms/EarlyCSE/writeonly.ll +++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s +; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO-MSSA +; RUN: opt -S -passes='early-cse<memssa>' < %s | FileCheck %s --check-prefixes=CHECK,MSSA @var = global i32 undef declare void @foo() nounwind @@ -15,3 +16,153 @@ define void @test() { store i32 2, ptr @var ret void } + +declare void @writeonly_void() memory(write) + +; Can CSE writeonly calls, including non-nounwind/willreturn. +define void @writeonly_cse() { +; CHECK-LABEL: @writeonly_cse( +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: ret void +; + call void @writeonly_void() + call void @writeonly_void() + ret void +} + +; Can CSE, loads do not matter. +define i32 @writeonly_cse_intervening_load(ptr %p) { +; CHECK-LABEL: @writeonly_cse_intervening_load( +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: ret i32 [[V]] +; + call void @writeonly_void() + %v = load i32, ptr %p + call void @writeonly_void() + ret i32 %v +} + +; Cannot CSE, the store may be to the same memory. +define void @writeonly_cse_intervening_store(ptr %p) { +; CHECK-LABEL: @writeonly_cse_intervening_store( +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: ret void +; + call void @writeonly_void() + store i32 0, ptr %p + call void @writeonly_void() + ret void +} + +; Can CSE, the store does not alias the writeonly call. +define void @writeonly_cse_intervening_noalias_store(ptr noalias %p) { +; CHECK-LABEL: @writeonly_cse_intervening_noalias_store( +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: ret void +; + call void @writeonly_void() + store i32 0, ptr %p + call void @writeonly_void() + ret void +} + +; Cannot CSE loads across writeonly call. +define i32 @load_cse_across_writeonly(ptr %p) { +; CHECK-LABEL: @load_cse_across_writeonly( +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v1 = load i32, ptr %p + call void @writeonly_void() + %v2 = load i32, ptr %p + %res = sub i32 %v1, %v2 + ret i32 %res +} + +; Can CSE loads across eliminated writeonly call. +define i32 @load_cse_across_csed_writeonly(ptr %p) { +; CHECK-LABEL: @load_cse_across_csed_writeonly( +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: call void @writeonly_void() +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[RES]] +; + call void @writeonly_void() + %v1 = load i32, ptr %p + call void @writeonly_void() + %v2 = load i32, ptr %p + %res = sub i32 %v1, %v2 + ret i32 %res +} + +declare i32 @writeonly(ptr %p) memory(write) + +; Can CSE writeonly calls with arg and return. +define i32 @writeonly_ret_cse(ptr %p) { +; CHECK-LABEL: @writeonly_ret_cse( +; CHECK-NEXT: [[V1:%.*]] = call i32 @writeonly(ptr [[P:%.*]]) +; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P]]) +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v1 = call i32 @writeonly(ptr %p) + %v2 = call i32 @writeonly(ptr %p) + %res = sub i32 %v1, %v2 + ret i32 %res +} + +; Cannot CSE writeonly calls with different arguments. +define i32 @writeonly_different_args(ptr %p1, ptr %p2) { +; CHECK-LABEL: @writeonly_different_args( +; CHECK-NEXT: [[V1:%.*]] = call i32 @writeonly(ptr [[P1:%.*]]) +; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P2:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[RES]] +; + %v1 = call i32 @writeonly(ptr %p1) + %v2 = call i32 @writeonly(ptr %p2) + %res = sub i32 %v1, %v2 + ret i32 %res +} + +declare void @callee() + +; These are weird cases where the same call is both readonly and writeonly +; based on call-site attributes. I believe this implies that both calls are +; actually readnone and safe to CSE, but leave them alone to be conservative. +define void @readonly_and_writeonly() { +; CHECK-LABEL: @readonly_and_writeonly( +; CHECK-NEXT: call void @callee() #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @callee() #[[ATTR1]] +; CHECK-NEXT: ret void +; + call void @callee() memory(read) + call void @callee() memory(write) + ret void +} + +define void @writeonly_and_readonly() { +; CHECK-LABEL: @writeonly_and_readonly( +; CHECK-NEXT: call void @callee() #[[ATTR1]] +; CHECK-NEXT: call void @callee() #[[ATTR2]] +; CHECK-NEXT: ret void +; + call void @callee() memory(write) + call void @callee() memory(read) + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; MSSA: {{.*}} +; NO-MSSA: {{.*}} >From 13503e2fde2c9e503bf6a0f952b871fc8b242614 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Tue, 24 Jun 2025 09:37:15 +0200 Subject: [PATCH 2/4] [EarlyCSE] Add support for writeonly call CSE --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 16 ++++++++--- llvm/test/Transforms/EarlyCSE/writeonly.ll | 33 +++++++++------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 381de60fcface..caa9c4cd0401f 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -493,7 +493,7 @@ struct CallValue { static bool canHandle(Instruction *Inst) { CallInst *CI = dyn_cast<CallInst>(Inst); - if (!CI || !CI->onlyReadsMemory() || + if (!CI || (!CI->onlyReadsMemory() && !CI->onlyWritesMemory()) || // FIXME: Currently the calls which may access the thread id may // be considered as not accessing the memory. But this is // problematic for coroutines, since coroutines may resume in a @@ -1626,14 +1626,17 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { !(MemInst.isValid() && !MemInst.mayReadFromMemory())) LastStore = nullptr; - // If this is a read-only call, process it. - if (CallValue::canHandle(&Inst)) { + // If this is a read-only or write-only call, process it. Skip store + // MemInsts, as they will be more precisely handled lateron. + if (CallValue::canHandle(&Inst) && + (!MemInst.isValid() || !MemInst.isStore())) { // If we have an available version of this call, and if it is the right // generation, replace this instruction. std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst); if (InVal.first != nullptr && isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first, - &Inst)) { + &Inst) && + InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) { LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst << " to: " << *InVal.first << '\n'); if (!DebugCounter::shouldExecute(CSECounter)) { @@ -1651,6 +1654,11 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + // Increase memory generation for writes. Do this before inserting + // the call, so it has the generation after the write occurred. + if (Inst.mayWriteToMemory()) + ++CurrentGeneration; + // Otherwise, remember that we have this instruction. AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration)); continue; diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll index 354638e9f3fe1..c09b913f9ff2b 100644 --- a/llvm/test/Transforms/EarlyCSE/writeonly.ll +++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll @@ -23,7 +23,6 @@ declare void @writeonly_void() memory(write) define void @writeonly_cse() { ; CHECK-LABEL: @writeonly_cse( ; CHECK-NEXT: call void @writeonly_void() -; CHECK-NEXT: call void @writeonly_void() ; CHECK-NEXT: ret void ; call void @writeonly_void() @@ -36,7 +35,6 @@ define i32 @writeonly_cse_intervening_load(ptr %p) { ; CHECK-LABEL: @writeonly_cse_intervening_load( ; CHECK-NEXT: call void @writeonly_void() ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4 -; CHECK-NEXT: call void @writeonly_void() ; CHECK-NEXT: ret i32 [[V]] ; call void @writeonly_void() @@ -61,11 +59,16 @@ define void @writeonly_cse_intervening_store(ptr %p) { ; Can CSE, the store does not alias the writeonly call. define void @writeonly_cse_intervening_noalias_store(ptr noalias %p) { -; CHECK-LABEL: @writeonly_cse_intervening_noalias_store( -; CHECK-NEXT: call void @writeonly_void() -; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4 -; CHECK-NEXT: call void @writeonly_void() -; CHECK-NEXT: ret void +; NO-MSSA-LABEL: @writeonly_cse_intervening_noalias_store( +; NO-MSSA-NEXT: call void @writeonly_void() +; NO-MSSA-NEXT: store i32 0, ptr [[P:%.*]], align 4 +; NO-MSSA-NEXT: call void @writeonly_void() +; NO-MSSA-NEXT: ret void +; +; MSSA-LABEL: @writeonly_cse_intervening_noalias_store( +; MSSA-NEXT: call void @writeonly_void() +; MSSA-NEXT: store i32 0, ptr [[P:%.*]], align 4 +; MSSA-NEXT: ret void ; call void @writeonly_void() store i32 0, ptr %p @@ -93,11 +96,8 @@ define i32 @load_cse_across_writeonly(ptr %p) { define i32 @load_cse_across_csed_writeonly(ptr %p) { ; CHECK-LABEL: @load_cse_across_csed_writeonly( ; CHECK-NEXT: call void @writeonly_void() -; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4 -; CHECK-NEXT: call void @writeonly_void() -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P]], align 4 -; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 0 ; call void @writeonly_void() %v1 = load i32, ptr %p @@ -112,10 +112,8 @@ declare i32 @writeonly(ptr %p) memory(write) ; Can CSE writeonly calls with arg and return. define i32 @writeonly_ret_cse(ptr %p) { ; CHECK-LABEL: @writeonly_ret_cse( -; CHECK-NEXT: [[V1:%.*]] = call i32 @writeonly(ptr [[P:%.*]]) -; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P]]) -; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P:%.*]]) +; CHECK-NEXT: ret i32 0 ; %v1 = call i32 @writeonly(ptr %p) %v2 = call i32 @writeonly(ptr %p) @@ -163,6 +161,3 @@ define void @writeonly_and_readonly() { call void @callee() memory(read) ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; MSSA: {{.*}} -; NO-MSSA: {{.*}} >From a8d3da9620a623629f6792723ab8c570d820af9f Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Tue, 24 Jun 2025 11:20:57 +0200 Subject: [PATCH 3/4] Fix clang test --- clang/test/CodeGenCXX/auto-var-init.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index 94386e44573b5..67bc5d417bce9 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -1342,6 +1342,7 @@ TEST_UNINIT(base, base); // ZERO-O1-NOT: !annotation TEST_BRACES(base, base); +// ZERO-LABEL: @test_base_braces() // CHECK-LABEL: @test_base_braces() // CHECK: %braces = alloca %struct.base, align [[ALIGN:[0-9]*]] // CHECK-NEXT: call void @llvm.memset{{.*}}(ptr align [[ALIGN]] %{{.*}}, i8 0, i64 8, i1 false) >From 2b59ce10919a78eae5b3b9d0a127a4e2a72a0335 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Fri, 27 Jun 2025 17:21:07 +0200 Subject: [PATCH 4/4] skip memset --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index caa9c4cd0401f..b6cb987c0423f 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1627,9 +1627,11 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore = nullptr; // If this is a read-only or write-only call, process it. Skip store - // MemInsts, as they will be more precisely handled lateron. + // MemInsts, as they will be more precisely handled later on. Also skip + // memsets, as DSE may be able to optimize them better by removing the + // earlier rather than later store. if (CallValue::canHandle(&Inst) && - (!MemInst.isValid() || !MemInst.isStore())) { + (!MemInst.isValid() || !MemInst.isStore()) && !isa<MemSetInst>(&Inst)) { // If we have an available version of this call, and if it is the right // generation, replace this instruction. std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits