[clang] [llvm] [clang-tools-extra] [PowerPC] Implement llvm.set.rounding intrinsic (PR #67302)

2024-01-10 Thread Qiu Chaofan via cfe-commits


@@ -8900,6 +8900,82 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
   return FP;
 }
 
+SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
+ SelectionDAG ) const {
+  SDLoc Dl(Op);
+  MachineFunction  = DAG.getMachineFunction();
+  EVT PtrVT = getPointerTy(MF.getDataLayout());
+  SDValue Chain = Op.getOperand(0);
+
+  // If requested mode is constant, just use simpler mtfsb.
+  if (auto *CVal = dyn_cast(Op.getOperand(1))) {

ecnelises wrote:

Here we want to make sure higher bits are all zeroes. KnownBits and constant 
don't make an difference?

https://github.com/llvm/llvm-project/pull/67302
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang-tools-extra] [PowerPC] Implement llvm.set.rounding intrinsic (PR #67302)

2023-11-28 Thread Serge Pavlov via cfe-commits

spavloff wrote:

LGTM.

https://github.com/llvm/llvm-project/pull/67302
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang-tools-extra] [PowerPC] Implement llvm.set.rounding intrinsic (PR #67302)

2023-11-27 Thread Qiu Chaofan via cfe-commits

https://github.com/ecnelises updated 
https://github.com/llvm/llvm-project/pull/67302

>From a1567f579531c3abbd1f4e9b7c7edd2f95ead42c Mon Sep 17 00:00:00 2001
From: Qiu Chaofan 
Date: Mon, 25 Sep 2023 17:10:51 +0800
Subject: [PATCH 1/3] [PowerPC] Implement llvm.set.rounding intrinsic

According to LangRef, llvm.set.rounding sets rounding mode by integer argument:

0 - toward zero
1 - to nearest, ties to even
2 - toward positive infinity
3 - toward negative infinity
4 - to nearest, ties away from zero

While PowerPC ISA says:

0 - to nearest
1 - toward zero
2 - toward positive infinity
3 - toward negative infinity

This patch maps the argument and write into last two bits of FPSCR (rounding 
mode).
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  80 
 llvm/lib/Target/PowerPC/PPCISelLowering.h   |   1 +
 llvm/test/CodeGen/PowerPC/frounds.ll| 194 +++-
 3 files changed, 274 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f4e3531980d165f..4e5ff0cb7169662 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -426,6 +426,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine 
,
 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
 
   setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
+  setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
 
   // If we're enabling GP optimizations, use hardware square root
   if (!Subtarget.hasFSQRT() &&
@@ -8898,6 +8899,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
   return FP;
 }
 
+SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
+ SelectionDAG ) const {
+  SDLoc Dl(Op);
+  MachineFunction  = DAG.getMachineFunction();
+  EVT PtrVT = getPointerTy(MF.getDataLayout());
+  SDValue Chain = Op.getOperand(0);
+
+  // If requested mode is constant, just use simpler mtfsb.
+  if (auto *CVal = dyn_cast(Op.getOperand(1))) {
+uint64_t Mode = CVal->getZExtValue();
+if (Mode >= 4)
+  llvm_unreachable("Unsupported rounding mode!");
+unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
+SDNode *SetHi = DAG.getMachineNode(
+(InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
+{DAG.getConstant(30, Dl, MVT::i32, true), Chain});
+SDNode *SetLo = DAG.getMachineNode(
+(InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
+{DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
+return SDValue(SetLo, 0);
+  }
+
+  // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
+  SDValue One = DAG.getConstant(1, Dl, MVT::i32);
+  SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
+DAG.getConstant(3, Dl, MVT::i32));
+  SDValue DstFlag = DAG.getNode(
+  ISD::XOR, Dl, MVT::i32, SrcFlag,
+  DAG.getNode(ISD::AND, Dl, MVT::i32,
+  DAG.getNOT(Dl,
+ DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
+ MVT::i32),
+  One));
+  SDValue MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
+  Chain = MFFS.getValue(1);
+  SDValue NewFPSCR;
+  if (isTypeLegal(MVT::i64)) {
+// Set the last two bits (rounding mode) of bitcasted FPSCR.
+NewFPSCR = DAG.getNode(
+ISD::OR, Dl, MVT::i64,
+DAG.getNode(ISD::AND, Dl, MVT::i64,
+DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
+DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i64), 
MVT::i64)),
+DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag));
+NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
+  } else {
+// In 32-bit mode, store f64, load and update the lower half.
+int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
+SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
+SDValue Addr;
+if (Subtarget.isLittleEndian())
+  Addr = StackSlot;
+else
+  Addr = DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
+ DAG.getConstant(4, Dl, PtrVT));
+SDValue Tmp = DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
+Chain = Tmp.getValue(1);
+
+Tmp = DAG.getNode(
+ISD::OR, Dl, MVT::i32,
+DAG.getNode(ISD::AND, Dl, MVT::i32, Tmp,
+DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i32), 
MVT::i32)),
+DstFlag);
+
+Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
+NewFPSCR =
+DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
+Chain = NewFPSCR.getValue(1);
+  }
+  SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
+  SDNode *MTFSF = DAG.getMachineNode(
+  PPC::MTFSF, Dl,