mibintc updated this revision to Diff 337879.
mibintc edited the summary of this revision.
mibintc added a comment.
This is a minor update from @pengfei which allows simple tests cases to run
end-to-end with clang.
Also I changed the "summary" to reflect the review discussion around the FMA
optimization, to choose "FMA is not allowed across a fence".
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D99675/new/
https://reviews.llvm.org/D99675
Files:
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/CodeGen/SelectionDAGISel.h
llvm/include/llvm/IR/IRBuilder.h
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/Support/TargetOpcodes.def
llvm/include/llvm/Target/Target.td
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2321,6 +2321,11 @@
N->getOperand(0));
}
+void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::ARITH_FENCE, N->getValueType(0),
+ N->getOperand(0));
+}
+
/// GetVBR - decode a vbr encoding whose top bit is set.
LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -2872,6 +2877,9 @@
case ISD::FREEZE:
Select_FREEZE(NodeToMatch);
return;
+ case ISD::ARITH_FENCE:
+ Select_ARITH_FENCE(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7210,6 +7210,13 @@
}
break;
}
+ case Intrinsic::arithmetic_fence: {
+ auto DL = getCurSDLoc();
+ SDValue Val = getValue(FPI.getArgOperand(0));
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());
+ setValue(&FPI, DAG.getNode(ISD::ARITH_FENCE, DL, ResultVT, Val));
+ return;
+ }
}
// A few strict DAG nodes carry additional operands that are not
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1275,6 +1275,9 @@
case TargetOpcode::PSEUDO_PROBE:
emitPseudoProbe(MI);
break;
+ case TargetOpcode::ARITH_FENCE:
+ OutStreamer->emitRawComment("ARITH_FENCE");
+ break;
default:
emitInstruction(&MI);
if (CanDoExtraAnalysis) {
Index: llvm/include/llvm/Target/Target.td
===================================================================
--- llvm/include/llvm/Target/Target.td
+++ llvm/include/llvm/Target/Target.td
@@ -1172,6 +1172,12 @@
let AsmString = "PSEUDO_PROBE";
let hasSideEffects = 1;
}
+def ARITH_FENCE : StandardPseudoInstruction {
+ let OutOperandList = (outs unknown:$dst);
+ let InOperandList = (ins unknown:$src);
+ let AsmString = "";
+ let hasSideEffects = false;
+}
def STACKMAP : StandardPseudoInstruction {
let OutOperandList = (outs);
Index: llvm/include/llvm/Support/TargetOpcodes.def
===================================================================
--- llvm/include/llvm/Support/TargetOpcodes.def
+++ llvm/include/llvm/Support/TargetOpcodes.def
@@ -117,6 +117,9 @@
/// Pseudo probe
HANDLE_TARGET_OPCODE(PSEUDO_PROBE)
+/// Arithmetic fence.
+HANDLE_TARGET_OPCODE(ARITH_FENCE)
+
/// A Stackmap instruction captures the location of live variables at its
/// position in the instruction stream. It is followed by a shadow of bytes
/// that must lie within the function and not contain another stackmap.
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1311,6 +1311,9 @@
def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrInaccessibleMemOnly, IntrWillReturn]>;
+// Arithmetic fence intrinsic.
+def int_arithmetic_fence : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
// Intrinsics to support half precision floating point format
let IntrProperties = [IntrNoMem, IntrWillReturn] in {
def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -897,6 +897,13 @@
return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name);
}
+ /// Create a call to the arithmetic_fence intrinsic.
+ CallInst *CreateArithmeticFence(Value *Val, Type *DstType,
+ const Twine &Name = "") {
+ return CreateIntrinsic(Intrinsic::arithmetic_fence, {DstType}, {Val}, nullptr,
+ Name);
+ }
+
/// Create a call to the experimental.vector.extract intrinsic.
CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx,
const Twine &Name = "") {
Index: llvm/include/llvm/CodeGen/SelectionDAGISel.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -317,6 +317,7 @@
void CannotYetSelect(SDNode *N);
void Select_FREEZE(SDNode *N);
+ void Select_ARITH_FENCE(SDNode *N);
private:
void DoInstructionSelection();
Index: llvm/include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1085,6 +1085,10 @@
/// specifier.
PREFETCH,
+ /// ARITH_FENCE - This corresponds to a arithmetic fence intrinsic. Both its
+ /// operand and output are the same floating type.
+ ARITH_FENCE,
+
/// OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope)
/// This corresponds to the fence instruction. It takes an input chain, and
/// two integer constants: an AtomicOrdering and a SynchronizationScope.
Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1545,6 +1545,7 @@
case Intrinsic::lifetime_end:
case Intrinsic::sideeffect:
case Intrinsic::pseudoprobe:
+ case Intrinsic::arithmetic_fence:
return 0;
case Intrinsic::masked_store: {
Type *Ty = Tys[0];
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -572,6 +572,7 @@
case Intrinsic::assume:
case Intrinsic::sideeffect:
case Intrinsic::pseudoprobe:
+ case Intrinsic::arithmetic_fence:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits