https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/130964
Backport 5da9044c40840187330526ca888290a95927a629 Requested by: @nikic >From d630c925b0ef0f3a60b6f2173f1859e56bc6928d Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Wed, 12 Mar 2025 14:52:01 +0100 Subject: [PATCH] [MemCpyOpt] Fix clobber check in fca2memcpy optimization This effectively reverts #108535. The old AA code was looking for the *first* clobber between the load and store and then trying to move all the way up there. The new MSSA based code instead found the *last* clobber. There might still be an earlier clobber that has not been accounted for. Fixes #130632. (cherry picked from commit 5da9044c40840187330526ca888290a95927a629) --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 24 ++++++----- llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll | 40 +++++++++++++++---- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a80a85f38e74d..971d6012f6129 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -638,17 +638,19 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, (EnableMemCpyOptWithoutLibcalls || (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) { MemoryLocation LoadLoc = MemoryLocation::get(LI); - MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI), - *StoreAccess = MSSA->getMemoryAccess(SI); - - // We use MSSA to check if an instruction may store to the memory we load - // from in between the load and the store. If such an instruction is found, - // we try to promote there instead of at the store position. - auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - StoreAccess->getDefiningAccess(), LoadLoc, BAA); - Instruction *P = MSSA->dominates(LoadAccess, Clobber) - ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst() - : SI; + + // We use alias analysis to check if an instruction may store to + // the memory we load from in between the load and the store. If + // such an instruction is found, we try to promote there instead + // of at the store position. + // TODO: Can use MSSA for this. + Instruction *P = SI; + for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { + if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) { + P = &I; + break; + } + } // If we found an instruction that may write to the loaded memory, // we can try to promote at this position instead of the store diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll index 61e349e01ed91..7d4557aa331c4 100644 --- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -51,8 +51,8 @@ define void @destroysrc(ptr %src, ptr %dst) { define void @destroynoaliassrc(ptr noalias %src, ptr %dst) { ; CHECK-LABEL: @destroynoaliassrc( -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false) ; CHECK-NEXT: ret void ; %1 = load %S, ptr %src @@ -79,9 +79,9 @@ define void @copyalias(ptr %src, ptr %dst) { ; sure we lift the computation as well if needed and possible. define void @addrproducer(ptr %src, ptr %dst) { ; CHECK-LABEL: @addrproducer( -; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i64 1 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST]], i8 undef, i64 16, i1 false) ; CHECK-NEXT: ret void ; %1 = load %S, ptr %src @@ -113,8 +113,8 @@ define void @noaliasaddrproducer(ptr %src, ptr noalias %dst, ptr noalias %dstidp ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4 ; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1 ; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 [[DSTINDEX]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 undef, i64 16, i1 false) ; CHECK-NEXT: ret void ; %1 = load %S, ptr %src @@ -130,7 +130,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) { ; CHECK-LABEL: @throwing_call( ; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false) -; CHECK-NEXT: call void @call() [[ATTR2:#.*]] +; CHECK-NEXT: call void @call() #[[ATTR2:[0-9]+]] ; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -156,4 +156,30 @@ loop: br label %loop } +; There are multiple instructions that can clobber the source memory here. +; We can move the dest write past the store to %ptr.24, but not the memcpy. +; Make sure we don't perform fca2memcpy conversion in this case. +define void @multiple_clobbering(ptr %ptr, ptr %ptr.copy) { +; CHECK-LABEL: @multiple_clobbering( +; CHECK-NEXT: [[PTR_8:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 8 +; CHECK-NEXT: [[PTR_24:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 24 +; CHECK-NEXT: [[PTR_32:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 32 +; CHECK-NEXT: [[PTR_COPY_8:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_COPY:%.*]], i64 8 +; CHECK-NEXT: [[STRUCT:%.*]] = load { i32, i64 }, ptr [[PTR_COPY_8]], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR_8]], ptr [[PTR_32]], i64 12, i1 false) +; CHECK-NEXT: store i64 1, ptr [[PTR_24]], align 8 +; CHECK-NEXT: store { i32, i64 } [[STRUCT]], ptr [[PTR_32]], align 8 +; CHECK-NEXT: ret void +; + %ptr.8 = getelementptr inbounds nuw i8, ptr %ptr, i64 8 + %ptr.24 = getelementptr inbounds nuw i8, ptr %ptr, i64 24 + %ptr.32 = getelementptr inbounds nuw i8, ptr %ptr, i64 32 + %ptr.copy.8 = getelementptr inbounds nuw i8, ptr %ptr.copy, i64 8 + %struct = load { i32, i64 }, ptr %ptr.copy.8, align 8 + call void @llvm.memcpy.p0.p0.i64(ptr %ptr.8, ptr %ptr.32, i64 12, i1 false) + store i64 1, ptr %ptr.24, align 8 + store { i32, i64 } %struct, ptr %ptr.32, align 8 + ret void +} + declare void @call() _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits