llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Peter Collingbourne (pcc) <details> <summary>Changes</summary> isProfitableToSinkOperands() may now return a phi node. If it does, check that all incoming values are identical and if so, replace the phi use with a clone of the incoming value. Use this mechanism to sink phi node operands of shifts whose incoming values are identical casts of a constant. This could be improved a bit by changing the isProfitableToSinkOperands() API so that we don't need to call hasIdenticalValue() again in the caller but hopefully the case of a PHI operand to a shift is rare enough that it doesn't matter that we need to do it multiple times. --- Full diff: https://github.com/llvm/llvm-project/pull/141716.diff 5 Files Affected: - (modified) llvm/include/llvm/IR/Instructions.h (+5) - (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+13-4) - (modified) llvm/lib/IR/Instructions.cpp (+35) - (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+10-3) - (added) llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll (+92) ``````````diff diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index c164f76eb335b..e8229036b51db 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2822,6 +2822,11 @@ class PHINode : public Instruction { /// non-undef value. bool hasConstantOrUndefValue() const; + /// If the specified PHI node (possibly via other PHI nodes) merges together + /// the same or identical (i.e. Instruction::isIdenticalTo() returns true) + /// values, return one of the values, otherwise return null. + Value *hasIdenticalValue(); + /// If the PHI node is complete which means all of its parent's predecessors /// have incoming value in this PHI, return true, otherwise return false. bool isComplete() const { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 52263026d6cea..422916a28669f 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { for (Use *U : reverse(OpsToSink)) { auto *UI = cast<Instruction>(U->get()); - if (isa<PHINode>(UI)) - continue; - if (UI->getParent() == TargetBB) { + if (auto *PN = dyn_cast<PHINode>(UI)) { + auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue()); + if (!I0) + continue; + if (I0->getParent() == TargetBB && + InstOrdering[I0] < InstOrdering[InsertPoint]) + InsertPoint = I0; + } else if (UI->getParent() == TargetBB) { if (InstOrdering[UI] < InstOrdering[InsertPoint]) InsertPoint = UI; continue; @@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { DenseMap<Instruction *, Instruction *> NewInstructions; for (Use *U : ToReplace) { auto *UI = cast<Instruction>(U->get()); - Instruction *NI = UI->clone(); + Instruction *NI; + if (auto *PN = dyn_cast<PHINode>(UI)) + NI = cast<Instruction>(PN->hasIdenticalValue())->clone(); + else + NI = UI->clone(); if (IsHugeFunc) { // Now we clone an instruction, its operands' defs may sink to this BB diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index f404e11b9c0f0..9c59378213991 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -48,6 +48,7 @@ #include <cassert> #include <cstdint> #include <optional> +#include <set> #include <vector> using namespace llvm; @@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const { return true; } +/// If the specified PHI node (possibly via other PHI nodes) merges together the +/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values, +/// return one of the values, otherwise return null. +Value *PHINode::hasIdenticalValue() { + std::vector<PHINode *> Worklist; + std::set<PHINode *> Seen; + Value *Result = nullptr; + Worklist.push_back(this); + while (!Worklist.empty()) { + PHINode *PN = Worklist.back(); + Worklist.pop_back(); + if (!Seen.insert(PN).second) + continue; + for (Value *V : PN->incoming_values()) { + if (auto *PN = dyn_cast<PHINode>(V)) { + Worklist.push_back(PN); + continue; + } + if (!Result) { + Result = V; + continue; + } + if (V == Result) + continue; + if (auto *I = dyn_cast<Instruction>(V)) + if (auto *ResultI = dyn_cast<Instruction>(Result)) + if (I->isIdenticalTo(ResultI)) + continue; + return nullptr; + } + } + return Result; +} + //===----------------------------------------------------------------------===// // LandingPadInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 072705bd31b1a..5baf5e3001baa 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -7167,7 +7167,14 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, } if (ShiftAmountOpNum == -1) return false; - auto *ShiftAmount = &I->getOperandUse(ShiftAmountOpNum); + auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum); + + Value *ShiftAmount = ShiftAmountUse->get(); + if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) { + ShiftAmount = PN->hasIdenticalValue(); + if (!ShiftAmount) + return false; + } // A uniform shift amount in a vector shift or funnel shift may be much // cheaper than a generic variable vector shift, so make that pattern visible @@ -7175,7 +7182,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount); if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 && isVectorShiftByScalarCheap(I->getType())) { - Ops.push_back(ShiftAmount); + Ops.push_back(ShiftAmountUse); return true; } @@ -7186,7 +7193,7 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, // instruction's immediate operand. if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) { if (isa<ConstantExpr>(CI->getOperand(0))) { - Ops.push_back(ShiftAmount); + Ops.push_back(ShiftAmountUse); return true; } } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll new file mode 100644 index 0000000000000..98f71cdce913d --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/inhibit-zext-constant-hoist-phi.ll @@ -0,0 +1,92 @@ +; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare. + +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small" +@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0 +@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1 + +; Check that we recover the third pair of zexts from the phi. + +; CHECK: define void @f4 +define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 { + br i1 %0, label %5, label %18 + +5: + %6 = load ptr, ptr %1, align 8 + %7 = ptrtoint ptr %6 to i64 + %8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %10 = lshr i64 %8, %9 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %12 = shl i64 %8, %11 + %13 = or i64 %10, %12 + %14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %14, label %15, label %16 + +15: + tail call void @llvm.ubsantrap(i8 2) #5 + unreachable + +16: + %17 = load ptr, ptr %6, align 8 + tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1) + br label %31 + +18: + %19 = load ptr, ptr %2, align 8 + %20 = ptrtoint ptr %19 to i64 + %21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64 + %23 = lshr i64 %21, %22 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64 + %25 = shl i64 %21, %24 + %26 = or i64 %23, %25 + %27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %27, label %28, label %29 + +28: + tail call void @llvm.ubsantrap(i8 2) #5 + unreachable + +29: + %30 = load ptr, ptr %19, align 8 + tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2) + br label %31 + +31: + %32 = phi i64 [ %24, %29 ], [ %11, %16 ] + %33 = phi i64 [ %22, %29 ], [ %9, %16 ] + %34 = load ptr, ptr %3, align 8 + %35 = ptrtoint ptr %34 to i64 + %36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64) + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %37 = lshr i64 %36, %33 + ; CHECK: zext {{.*}} @__typeid__ZTS1S_align + %38 = shl i64 %36, %32 + %39 = or i64 %37, %38 + %40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64) + br i1 %40, label %41, label %42 + +41: + tail call void @llvm.ubsantrap(i8 2) #5 + unreachable + +42: + %43 = load ptr, ptr %34, align 8 + tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3) + ret void +} + +declare i1 @llvm.type.test(ptr, metadata) +declare void @llvm.ubsantrap(i8 immarg) + +!0 = !{i64 0, i64 256} +!1 = !{i64 0, i64 128} `````````` </details> https://github.com/llvm/llvm-project/pull/141716 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits