https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/80274
>From aa6980841e587eba9c98bf54c51f5414f8a15871 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Wed, 24 Jan 2024 12:33:57 +0100 Subject: [PATCH 1/3] [Loads] Use BatchAAResults for available value APIs (NFCI) This allows caching AA queries both within and across the calls, and enables us to use a custom AAQI configuration. (cherry picked from commit 89dae798cc77789a43e9a60173f647dae03a65fe) --- llvm/include/llvm/Analysis/Loads.h | 12 ++++++------ llvm/lib/Analysis/Lint.cpp | 3 ++- llvm/lib/Analysis/Loads.cpp | 9 ++++----- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 3 ++- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 11 ++++++----- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 2880ed33a34cbc..0926093bba99de 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -18,7 +18,7 @@ namespace llvm { -class AAResults; +class BatchAAResults; class AssumptionCache; class DataLayout; class DominatorTree; @@ -129,11 +129,10 @@ extern cl::opt<unsigned> DefMaxInstsToScan; /// location in memory, as opposed to the value operand of a store. /// /// \returns The found value, or nullptr if no value is found. -Value *FindAvailableLoadedValue(LoadInst *Load, - BasicBlock *ScanBB, +Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, - AAResults *AA = nullptr, + BatchAAResults *AA = nullptr, bool *IsLoadCSE = nullptr, unsigned *NumScanedInst = nullptr); @@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load, /// FindAvailableLoadedValue() for the case where we are not interested in /// finding the closest clobbering instruction if no available load is found. /// This overload cannot be used to scan across multiple blocks. -Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, +Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, + bool *IsLoadCSE, unsigned MaxInstsToScan = DefMaxInstsToScan); /// Scan backwards to see if we have the value of the given pointer available @@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, AAResults *AA, + unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); /// Returns true if a pointer value \p A can be replace with another pointer diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 1ebc593016bc0d..16635097d20afe 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -657,11 +657,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, BasicBlock::iterator BBI = L->getIterator(); BasicBlock *BB = L->getParent(); SmallPtrSet<BasicBlock *, 4> VisitedBlocks; + BatchAAResults BatchAA(*AA); for (;;) { if (!VisitedBlocks.insert(BB).second) break; if (Value *U = - FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) + FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA)) return findValueImpl(U, OffsetOk, Visited); if (BBI != BB->begin()) break; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 97d21db86abf28..6bf0d2f56eb4eb 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -450,11 +450,10 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, "to scan backward from a given instruction, when searching for " "available loaded value")); -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, - BasicBlock *ScanBB, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoad, + BatchAAResults *AA, bool *IsLoad, unsigned *NumScanedInst) { // Don't CSE load that is volatile or anything stronger than unordered. if (!Load->isUnordered()) @@ -583,7 +582,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, Value *llvm::findAvailablePtrLoadStore( const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { + BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; @@ -664,7 +663,7 @@ Value *llvm::findAvailablePtrLoadStore( return nullptr; } -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, bool *IsLoadCSE, unsigned MaxInstsToScan) { const DataLayout &DL = Load->getModule()->getDataLayout(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index bb2a77daa60a76..1254a050027a45 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1032,7 +1032,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) { // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. bool IsLoadCSE = false; - if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) { + BatchAAResults BatchAA(*AA); + if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) { if (IsLoadCSE) combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 8603c5cf9c022c..d7d689f58a0708 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1260,8 +1260,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // the entry to its block. BasicBlock::iterator BBIt(LoadI); bool IsLoadCSE; + BatchAAResults BatchAA(*AA); if (Value *AvailableVal = FindAvailableLoadedValue( - LoadI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) { + LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. @@ -1322,9 +1323,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB), LocationSize::precise(DL.getTypeStoreSize(AccessTy)), AATags); - PredAvailable = findAvailablePtrLoadStore(Loc, AccessTy, LoadI->isAtomic(), - PredBB, BBIt, DefMaxInstsToScan, - AA, &IsLoadCSE, &NumScanedInst); + PredAvailable = findAvailablePtrLoadStore( + Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan, + &BatchAA, &IsLoadCSE, &NumScanedInst); // If PredBB has a single predecessor, continue scanning through the // single predecessor. @@ -1336,7 +1337,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { BBIt = SinglePredBB->end(); PredAvailable = findAvailablePtrLoadStore( Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt, - (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE, + (DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE, &NumScanedInst); } } >From a581690c57d153f329ded71004a8616b93cb88ca Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Wed, 24 Jan 2024 15:03:24 +0100 Subject: [PATCH 2/3] [JumpThreading] Add test for #79175 (NFC) (cherry picked from commit 7143b451d71fe314730f7610d7908e3b9611815c) --- llvm/test/Transforms/JumpThreading/pr79175.ll | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 llvm/test/Transforms/JumpThreading/pr79175.ll diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll new file mode 100644 index 00000000000000..6815aabb26dfc6 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/pr79175.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=jump-threading < %s | FileCheck %s + +@f = external global i32 + +; Make sure the value of @f is reloaded prior to the final comparison. +; FIXME: This is a miscompile. +define i32 @test(i64 %idx, i32 %val) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[RETURN:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[F:%.*]] = load i32, ptr @f, align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[F]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END_THREAD:%.*]], label [[COND_END:%.*]] +; CHECK: cond.end: +; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]] +; CHECK: cond.end.thread: +; CHECK-NEXT: [[F_RELOAD_PR:%.*]] = load i32, ptr @f, align 4 +; CHECK-NEXT: br label [[TMP0]] +; CHECK: 0: +; CHECK-NEXT: [[F_RELOAD:%.*]] = phi i32 [ [[F]], [[COND_END]] ], [ [[F_RELOAD_PR]], [[COND_END_THREAD]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ] +; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]] +; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1 +; CHECK-NEXT: br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; CHECK: return2: +; CHECK-NEXT: ret i32 1 +; +entry: + %cmp = icmp slt i64 %idx, 1 + br i1 %cmp, label %for.body, label %return + +for.body: + %f = load i32, ptr @f, align 4 + %cmp1 = icmp eq i32 %f, 0 + br i1 %cmp1, label %cond.end, label %cond.false + +cond.false: + br label %cond.end + +cond.end: + %phi = phi i32 [ %val, %cond.false ], [ 1, %for.body ] + %cmp.i = icmp sgt i32 %phi, 0 + %sel = select i1 %cmp.i, i32 0, i32 %phi + %f.idx = getelementptr inbounds i32, ptr @f, i64 %idx + store i32 %sel, ptr %f.idx, align 4 + %f.reload = load i32, ptr @f, align 4 + %cmp3 = icmp slt i32 %f.reload, 1 + br i1 %cmp3, label %return2, label %return + +return: + ret i32 0 + +return2: + ret i32 1 +} >From 28879ab8276e7237bfc86f4c7d7890fd4311d334 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Wed, 31 Jan 2024 15:23:53 +0100 Subject: [PATCH 3/3] [AA][JumpThreading] Don't use DomTree for AA in JumpThreading (#79294) JumpThreading may perform AA queries while the dominator tree is not up to date, which may result in miscompilations. Fix this by adding a new AAQI option to disable the use of the dominator tree in BasicAA. Fixes https://github.com/llvm/llvm-project/issues/79175. (cherry picked from commit 4f32f5d5720fbef06672714a62376f236a36aef5) --- llvm/include/llvm/Analysis/AliasAnalysis.h | 7 +++++++ llvm/include/llvm/Analysis/BasicAliasAnalysis.h | 14 ++++++++++---- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 6 ++++-- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 2 ++ llvm/test/Transforms/JumpThreading/pr79175.ll | 4 +--- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index d6f732d35fd4cd..e8e4f491be5a3d 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -287,6 +287,10 @@ class AAQueryInfo { /// store %l, ... bool MayBeCrossIteration = false; + /// Whether alias analysis is allowed to use the dominator tree, for use by + /// passes that lazily update the DT while performing AA queries. + bool UseDominatorTree = true; + AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {} }; @@ -668,6 +672,9 @@ class BatchAAResults { void enableCrossIterationMode() { AAQI.MayBeCrossIteration = true; } + + /// Disable the use of the dominator tree during alias analysis queries. + void disableDominatorTree() { AAQI.UseDominatorTree = false; } }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index afc1811239f283..7eca82729430dd 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase { const Function &F; const TargetLibraryInfo &TLI; AssumptionCache &AC; - DominatorTree *DT; + /// Use getDT() instead of accessing this member directly, in order to + /// respect the AAQI.UseDominatorTree option. + DominatorTree *DT_; + + DominatorTree *getDT(const AAQueryInfo &AAQI) const { + return AAQI.UseDominatorTree ? DT_ : nullptr; + } public: BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree *DT = nullptr) - : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {} + : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), - DT(Arg.DT) {} + DT_(Arg.DT_) {} BasicAAResult(BasicAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), - AC(Arg.AC), DT(Arg.DT) {} + AC(Arg.AC), DT_(Arg.DT_) {} /// Handle invalidation events in the new pass manager. bool invalidate(Function &Fn, const PreservedAnalyses &PA, diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3178e2d2781674..1028b52a79123f 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -89,7 +89,7 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, // may be created without handles to some analyses and in that case don't // depend on them. if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) || - (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA))) + (DT_ && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA))) return true; // Otherwise this analysis result remains valid. @@ -1063,6 +1063,7 @@ AliasResult BasicAAResult::aliasGEP( : AliasResult::MayAlias; } + DominatorTree *DT = getDT(AAQI); DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT); DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT); @@ -1556,6 +1557,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, const Value *HintO1 = getUnderlyingObject(Hint1); const Value *HintO2 = getUnderlyingObject(Hint2); + DominatorTree *DT = getDT(AAQI); auto ValidAssumeForPtrContext = [&](const Value *Ptr) { if (const Instruction *PtrI = dyn_cast<Instruction>(Ptr)) { return isValidAssumeForContext(Assume, PtrI, DT, @@ -1735,7 +1737,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, if (!Inst || Inst->getParent()->isEntryBlock()) return true; - return isNotInCycle(Inst, DT, /*LI*/ nullptr); + return isNotInCycle(Inst, getDT(AAQI), /*LI*/ nullptr); } /// Computes the symbolic difference between two de-composed GEPs. diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index d7d689f58a0708..87c01ead634ff8 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1261,6 +1261,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { BasicBlock::iterator BBIt(LoadI); bool IsLoadCSE; BatchAAResults BatchAA(*AA); + // The dominator tree is updated lazily and may not be valid at this point. + BatchAA.disableDominatorTree(); if (Value *AvailableVal = FindAvailableLoadedValue( LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) { // If the value of the load is locally available within the block, just use diff --git a/llvm/test/Transforms/JumpThreading/pr79175.ll b/llvm/test/Transforms/JumpThreading/pr79175.ll index 6815aabb26dfc6..2c7ee0770cdc73 100644 --- a/llvm/test/Transforms/JumpThreading/pr79175.ll +++ b/llvm/test/Transforms/JumpThreading/pr79175.ll @@ -4,7 +4,6 @@ @f = external global i32 ; Make sure the value of @f is reloaded prior to the final comparison. -; FIXME: This is a miscompile. define i32 @test(i64 %idx, i32 %val) { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: i64 [[IDX:%.*]], i32 [[VAL:%.*]]) { @@ -20,13 +19,12 @@ define i32 @test(i64 %idx, i32 %val) { ; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]] ; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]] ; CHECK: cond.end.thread: -; CHECK-NEXT: [[F_RELOAD_PR:%.*]] = load i32, ptr @f, align 4 ; CHECK-NEXT: br label [[TMP0]] ; CHECK: 0: -; CHECK-NEXT: [[F_RELOAD:%.*]] = phi i32 [ [[F]], [[COND_END]] ], [ [[F_RELOAD_PR]], [[COND_END_THREAD]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ] ; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]] ; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4 +; CHECK-NEXT: [[F_RELOAD:%.*]] = load i32, ptr @f, align 4 ; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[F_RELOAD]], 1 ; CHECK-NEXT: br i1 [[CMP3]], label [[RETURN2:%.*]], label [[RETURN]] ; CHECK: return: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits