https://github.com/nikic created https://github.com/llvm/llvm-project/pull/135615
Backport of 716b02d8c575afde7af1af13df145019659abca2, with conflicts in the test resolved. >From e385f5c5b9bd32f89754e8088c29f42a761f2880 Mon Sep 17 00:00:00 2001 From: Dominik Adamski <dominik.adam...@amd.com> Date: Thu, 10 Apr 2025 12:23:53 +0200 Subject: [PATCH] [LLVM][MemCpyOpt] Unify alias tags if we optimize allocas (#129537) Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can result in incorrect optimization outcomes for Fortran source code compiled by Flang with flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit removes alias tags when memcpy optimization replaces two arrays with one array, thus ensuring correct compilation of Fortran source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`. This commit is also a proposal to fix the reported issue: https://github.com/llvm/llvm-project/issues/133984 --------- Co-authored-by: Shilei Tian <i...@tianshilei.me> (cherry picked from commit 716b02d8c575afde7af1af13df145019659abca2) --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 19 +++-- llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll | 77 +++++++++++++++++++ llvm/test/Transforms/MemCpyOpt/stack-move.ll | 10 +-- 3 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 971d6012f6129..9202c341da92e 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1518,7 +1518,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, // to remove them. SmallVector<Instruction *, 4> LifetimeMarkers; - SmallSet<Instruction *, 4> NoAliasInstrs; + SmallSet<Instruction *, 4> AAMetadataInstrs; bool SrcNotDom = false; // Recursively track the user and check whether modified alias exist. @@ -1573,8 +1573,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, continue; } } - if (UI->hasMetadata(LLVMContext::MD_noalias)) - NoAliasInstrs.insert(UI); + AAMetadataInstrs.insert(UI); + if (!ModRefCallback(UI)) return false; } @@ -1679,11 +1679,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, } // As this transformation can cause memory accesses that didn't previously - // alias to begin to alias one another, we remove !noalias metadata from any - // uses of either alloca. This is conservative, but more precision doesn't - // seem worthwhile right now. - for (Instruction *I : NoAliasInstrs) + // alias to begin to alias one another, we remove !alias.scope, !noalias, + // !tbaa and !tbaa_struct metadata from any uses of either alloca. + // This is conservative, but more precision doesn't seem worthwhile + // right now. + for (Instruction *I : AAMetadataInstrs) { + I->setMetadata(LLVMContext::MD_alias_scope, nullptr); I->setMetadata(LLVMContext::MD_noalias, nullptr); + I->setMetadata(LLVMContext::MD_tbaa, nullptr); + I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr); + } LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); NumStackMove++; diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll new file mode 100644 index 0000000000000..6e446e5ff267c --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s + +define void @test() local_unnamed_addr { +; CHECK-LABEL: define void @test() local_unnamed_addr { +; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1 +; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret void +; + %test_array_a = alloca [31 x float], align 4 + %test_array_b = alloca [31 x float], align 4 + %1 = getelementptr float, ptr %test_array_b, i64 1 + store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !4 + call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false) + %2 = getelementptr float, ptr %test_array_a, i64 1 + %3 = load float, ptr %2, align 4, !tbaa !7 + ret void +} + +%struct.Outer = type { float, double, %struct.Inner } +%struct.Inner = type { i32, float } + +; Function Attrs: nounwind uwtable +define dso_local float @f() { +; CHECK-LABEL: define dso_local float @f() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TEST1:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 8 +; CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0 +; CHECK-NEXT: store float 0.000000e+00, ptr [[F]], align 8 +; CHECK-NEXT: [[F1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F1]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], 2.000000e+00 +; CHECK-NEXT: store float [[ADD]], ptr [[F1]], align 8 +; CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F2]], align 8 +; CHECK-NEXT: ret float [[TMP1]] +; +entry: + %test = alloca %struct.Outer, align 8 + %test1 = alloca %struct.Outer, align 8 + %f = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 0 + store float 0.000000e+00, ptr %f, align 8, !tbaa !9 + %inner_a = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 2 + %i = getelementptr inbounds nuw %struct.Inner, ptr %inner_a, i32 0, i32 0 + store i32 0, ptr %i, align 8, !tbaa !17 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %test, ptr align 8 %test1, i64 24, i1 false) + %f1 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0 + %0 = load float, ptr %f1, align 8, !tbaa !9 + %add = fadd float %0, 2.000000e+00 + store float %add, ptr %f1, align 8, !tbaa !9 + %f2 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0 + %1 = load float, ptr %f2, align 8, !tbaa !9 + ret float %1 +} + +!1 = !{!"any data access", !2, i64 0} +!2 = !{!"any access", !3, i64 0} +!3 = !{!"Flang function root test"} +!4 = !{!5, !5, i64 0} +!5 = !{!"allocated data/test_array_a", !6, i64 0} +!6 = !{!"allocated data", !1, i64 0} +!7 = !{!8, !8, i64 0} +!8 = !{!"allocated data/test_array_b", !6, i64 0} +!9 = !{!10, !11, i64 0} +!10 = !{!"Outer", !11, i64 0, !14, i64 8, !15, i64 16} +!11 = !{!"float", !12, i64 0} +!12 = !{!"omnipotent char", !13, i64 0} +!13 = !{!"Simple C/C++ TBAA"} +!14 = !{!"double", !12, i64 0} +!15 = !{!"Inner", !16, i64 0, !11, i64 4} +!16 = !{!"int", !12, i64 0} +!17 = !{!10, !16, i64 16} + + diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll index 6089c0a4d7cf5..5ff6f01021208 100644 --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -259,7 +259,7 @@ define void @remove_scoped_noalias() { ; CHECK-LABEL: define void @remove_scoped_noalias() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: ret void ; @@ -283,7 +283,7 @@ define void @remove_alloca_metadata() { ; CHECK-LABEL: define void @remove_alloca_metadata() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: ret void ; @@ -308,7 +308,7 @@ define void @noalias_on_lifetime() { ; CHECK-LABEL: define void @noalias_on_lifetime() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: ret void ; @@ -399,10 +399,10 @@ define void @terminator_lastuse() personality i32 0 { ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[RV:%.*]] = invoke i32 @use_nocapture(ptr [[SRC]]) -; CHECK-NEXT: to label [[SUC:%.*]] unwind label [[UNW:%.*]] +; CHECK-NEXT: to label [[SUC:%.*]] unwind label [[UNW:%.*]] ; CHECK: unw: ; CHECK-NEXT: [[LP:%.*]] = landingpad i32 -; CHECK-NEXT: cleanup +; CHECK-NEXT: cleanup ; CHECK-NEXT: resume i32 0 ; CHECK: suc: ; CHECK-NEXT: ret void _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits