================
@@ -0,0 +1,157 @@
+//===- AMDGPUExpandFeaturePredicates.cpp - Feature Predicate Expander Pass 
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file implements a pass that deals with expanding AMDGCN generic feature
+// predicates into target specific quantities / sequences. In this context, a
+// generic feature predicate is an implementation detail global variable that
+// is inserted by the FE as a consequence of using either the __builtin_cpu_is
+// or the __builtin_amdgcn_is_invocable special builtins on an abstract target
+// (AMDGCNSPIRV). These placeholder globals are used to guide target specific
+// lowering, once the concrete target is known, by way of constant folding 
their
+// value all the way into a terminator (i.e. a controlled block) or into a no
+// live use scenario. The pass makes a best effort attempt to look through
+// calls, i.e. a constant evaluatable passthrough of a predicate value will
+// generally work, however we hard fail if the folding fails, to avoid obtuse
+// BE errors or opaque run time errors. This pass should run as early as
+// possible / immediately after Clang CodeGen, so that the optimisation 
pipeline
+// and the BE operate with concrete target data.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+template <typename C> void collectUsers(Value *V, C &Container) {
+  assert(V && "Must pass an existing Value!");
+
+  for (auto &&U : V->users())
+    if (auto *I = dyn_cast<Instruction>(U))
+      Container.insert(Container.end(), I);
+}
+
+inline void setPredicate(const GCNSubtarget &ST, GlobalVariable *P) {
+  const auto IsFeature = P->getName().starts_with("llvm.amdgcn.has");
+  const auto Offset =
+      IsFeature ? sizeof("llvm.amdgcn.has") : sizeof("llvm.amdgcn.is");
+
+  auto PV = P->getName().substr(Offset).str();
+  if (IsFeature) {
+    auto Dx = PV.find(',');
+    while (Dx != std::string::npos) {
+      PV.insert(++Dx, {'+'});
+
+      Dx = PV.find(',', Dx);
+    }
+    PV.insert(PV.cbegin(), '+');
+  }
+
+  auto *PTy = P->getValueType();
+  P->setLinkage(GlobalValue::PrivateLinkage);
+  P->setExternallyInitialized(false);
+
+  if (IsFeature)
+    P->setInitializer(ConstantInt::getBool(PTy, ST.checkFeatures(PV)));
+  else
+    P->setInitializer(ConstantInt::getBool(PTy, PV == ST.getCPU()));
+}
+
+std::pair<PreservedAnalyses, bool>
+unfoldableFound(Function *Caller, GlobalVariable *P, Instruction *NoFold) {
+  std::string W;
+  raw_string_ostream OS(W);
+
+  OS << "Impossible to constant fold feature predicate: " << *P << " used by "
+     << *NoFold << ", please simplify.\n";
+
+  Caller->getContext().diagnose(
+      DiagnosticInfoUnsupported(*Caller, W, NoFold->getDebugLoc(), DS_Error));
+
+  return {PreservedAnalyses::none(), false};
+}
+
+std::pair<PreservedAnalyses, bool> handlePredicate(const GCNSubtarget &ST,
+                                                   GlobalVariable *P) {
+  setPredicate(ST, P);
+
+  SmallPtrSet<Instruction *, 32> ToFold;
+  collectUsers(P, ToFold);
+
+  if (ToFold.empty())
+    return {PreservedAnalyses::all(), true};
+
+  do {
+    auto *I = *ToFold.begin();
+    ToFold.erase(I);
+
+    if (auto *C = ConstantFoldInstruction(I, P->getDataLayout())) {
----------------
AlexVlx wrote:

In what regards unreachable BBs, this looks like so because I hadn't fully 
considered the implications, and because my understanding is that we (LLVM, not 
AMDGPU) unconditionally run 
['UnreachableBlockElimPass'](https://github.com/llvm/llvm-project/blob/e175ecff936287823b5443d7b2d443fc6569f31f/llvm/include/llvm/Passes/CodeGenPassBuilder.h#L717),
 irrespective of optimisation level. I *think* the latter is not incorrect, and 
that there is at least one other transform ('LowerInvokePass') that creates 
unreachable BBs and leaves them around. Having said that, it's not very 
hygienic and I will add cleanup for unreachable BBs.

With functions it's a bit trickier, and can actually get into somewhat 
convoluted use cases, which these predicates, as low-level target specific 
things, are not meant for. To be more specific, with normal use one would 
expect that for any and all functions the user would've applied predicates 
locally at the finest possible granularity i.e.

```cpp
// THIS
void foo() {
    if (__builtin_processor_is("gfx900"))
        do_something();
    else if (__builtin_is_invocable(__builtin_amdgcn_some_builtin))
        __builtin_amdgcn_some_builtin();
}

void bar() {
    foo();
}

// NOT THIS
void foo() {
    do_something_that_only_works_on_gfx900_no_guard();
    __builtin_amdgcn_only_gfx900();
}

void bar() {
    if (__builtin_processor_is("gfx900"))
        foo();
}
```

If the guards are granular at expression / block scope at most, then there's no 
need to remove unused functions as they'd have been "cleaned up", for lack of a 
better word. I do appreciate that that is not an entirely satisfactory answer. 
I would lightly argue that since the second case is an anti-pattern (imagine 
these are proper large functions), it failing at compile time during ISEL is 
not that bad / an opportunity to not write it in the first place. Having said 
that, here's how we could handle functions:

- We could remove functions with internal linkage, iff they end up unused after 
predicate expansion, as that implies that their only uses were predicate 
guarded;
- We cannot do this for functions with external linkage (using internal and 
external loosely here), as they might have other valid uses in other TUs;
- What we can do for the latter is:
  - Tag (metadata / attribute) when running the predicate expansion makes a 
previously used function unused;
  - Add an `UnreachableFuncElimPass` which unconditionally runs right before 
ISEL, and removes functions iff they are unused and carry the tag;
    - We can only do this for AMDGPU since at the moment we do not do dynamic 
linking

Dealing with the first category is straightforward, I could add it now or in a 
follow-up patch (I am not entirely sure that we do not already remove these 
unconditionally before ISEL anyway, the AMDGPU `opt` pipeline is fairly 
voluminous).

https://github.com/llvm/llvm-project/pull/134016
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to