From: Tom Stellard <thomas.stell...@amd.com> v2: - Fold the immediates using the SelectionDAG --- lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 94 +++++++++++++++++++++++++ lib/Target/AMDGPU/R600InstrInfo.cpp | 16 ++++- lib/Target/AMDGPU/R600InstrInfo.h | 7 ++ test/CodeGen/R600/fcmp-cnd.ll | 4 +- test/CodeGen/R600/fcmp-cnde-int-args.ll | 2 +- test/CodeGen/R600/literals.ll | 30 ++++++++ test/CodeGen/R600/selectcc-icmp-select-float.ll | 2 +- test/CodeGen/R600/selectcc_cnde.ll | 2 +- test/CodeGen/R600/selectcc_cnde_int.ll | 2 +- 9 files changed, 153 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/R600/literals.ll
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp index 10ce6ad..2a80f1b 100644 --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPURegisterInfo.h" #include "AMDILDevices.h" +#include "R600InstrInfo.h" #include "llvm/ADT/ValueMap.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -167,6 +168,99 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } } break; + case ISD::ConstantFP: + case ISD::Constant: + { + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + // XXX: Custom immediate lowering not implemented yet. Instead we use + // pseudo instructions defined in SIInstructions.td + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + break; + } + const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); + + uint64_t ImmValue = 0; + unsigned ImmReg = AMDGPU::ALU_LITERAL_X; + + if (N->getOpcode() == ISD::ConstantFP) { + // XXX: 64-bit Immediates not supported yet + assert(N->getValueType(0) != MVT::f64); + + ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); + APFloat Value = C->getValueAPF(); + float FloatValue = Value.convertToFloat(); + if (FloatValue == 0.0) { + ImmReg = AMDGPU::ZERO; + } else if (FloatValue == 0.5) { + ImmReg = AMDGPU::HALF; + } else if (FloatValue == 1.0) { + ImmReg = AMDGPU::ONE; + } else { + ImmValue = Value.bitcastToAPInt().getZExtValue(); + } + } else { + // XXX: 64-bit Immediates not supported yet + assert(N->getValueType(0) != MVT::i64); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + if (C->getZExtValue() == 0) { + ImmReg = AMDGPU::ZERO; + } else if (C->getZExtValue() == 1) { + ImmReg = AMDGPU::ONE_INT; + } else { + ImmValue = C->getZExtValue(); + } + } + + for (SDNode::use_iterator Use = N->use_begin(), E = SDNode::use_end(); + Use != E; ++Use) { + std::vector<SDValue> Ops; + for (unsigned i = 0; i < Use->getNumOperands(); ++i) { + Ops.push_back(Use->getOperand(i)); + } + + if (!Use->isMachineOpcode()) { + if (ImmReg == AMDGPU::ALU_LITERAL_X) { + // We can only use literal constants (e.g. AMDGPU::ZERO, + // AMDGPU::ONE, etc) in machine opcodes. + continue; + } + } else { + if (!TII->isALUInstr(Use->getMachineOpcode())) { + continue; + } + + int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM); + assert(ImmIdx != -1); + + // subtract one from ImmIdx, because the DST operand is usually index + // 0 for MachineInstrs, but we have no DST in the Ops vector. + ImmIdx--; + + // Check that we aren't already using an immediate. + // XXX: It's possible for an instruction to have more than one + // immediate operand, but this is not supported yet. + if (ImmReg == AMDGPU::ALU_LITERAL_X) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); + assert(C); + + if (C->getZExtValue() != 0) { + // This instruction is already using an immediate. + continue; + } + + // Set the immediate value + Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); + } + } + // Set the immediate register + Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); + + CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); + } + break; + } + } return SelectCode(N); } diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 20b1aa3..814e0a2 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -127,6 +127,15 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const { } } +bool R600InstrInfo::isALUInstr(unsigned Opcode) const +{ + unsigned TargetFlags = get(Opcode).TSFlags; + + return ((TargetFlags & R600_InstFlag::OP1) | + (TargetFlags & R600_InstFlag::OP2) | + (TargetFlags & R600_InstFlag::OP3)); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); @@ -505,6 +514,11 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, int R600InstrInfo::getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const { + return getOperandIdx(MI.getOpcode(), Op); +} + +int R600InstrInfo::getOperandIdx(unsigned Opcode, + R600Operands::Ops Op) const { const static int OpTable[3][R600Operands::COUNT] = { // W C S S S S S S S S // R O D L S R R R S R R R S R R L P @@ -515,7 +529,7 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17}, {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14} }; - unsigned TargetFlags = get(MI.getOpcode()).TSFlags; + unsigned TargetFlags = get(Opcode).TSFlags; unsigned OpTableIdx; if (!HAS_NATIVE_OPERANDS(TargetFlags)) { diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h index cec1c3b..81e1828 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.h +++ b/lib/Target/AMDGPU/R600InstrInfo.h @@ -50,6 +50,9 @@ namespace llvm { bool isReductionOp(unsigned opcode) const; bool isCubeOp(unsigned opcode) const; + /// isALUInstr - Returns true if this Opcode represents an ALU instruction. + bool isALUInstr(unsigned Opcode) const; + /// isVector - Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; @@ -130,6 +133,10 @@ namespace llvm { /// if the Instruction does not contain the specified Op. int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const; + /// getOperandIdx - Get the index of Op for the given Opcode. Returns -1 + /// if the Instruction does not contain the specified Op. + int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const; + /// setImmOperand - Helper function for setting instruction flag values. void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const; diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll index c6b6236..a94cfb5 100644 --- a/test/CodeGen/R600/fcmp-cnd.ll +++ b/test/CodeGen/R600/fcmp-cnd.ll @@ -1,6 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;Not checking arguments 2 and 3 to CNDE, because they may change between +;registers and literal.x depending on what the optimizer does. +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll index 92f3b5f..5c981ef 100644 --- a/test/CodeGen/R600/fcmp-cnde-int-args.ll +++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll @@ -4,7 +4,7 @@ ; chance to optimize the fcmp + select instructions to CNDE was missed ; due to the fact that the operands to fcmp and select had different types -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}} define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll new file mode 100644 index 0000000..4c731b2 --- /dev/null +++ b/test/CodeGen/R600/literals.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Test using an integer literal constant. +; Generated ASM should be: +; ADD_INT REG literal.x, 5 +; or +; ADD_INT literal.x REG, 5 + +; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5 +define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = add i32 5, %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; Test using a float literal constant. +; Generated ASM should be: +; ADD REG literal.x, 5.0 +; or +; ADD literal.x REG, 5.0 + +; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0 +define void @float_literal(float addrspace(1)* %out, float %in) { +entry: + %0 = fadd float 5.0, %in + store float %0, float addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll index f1f8ab1..f65a300 100644 --- a/test/CodeGen/R600/selectcc-icmp-select-float.ll +++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll @@ -2,7 +2,7 @@ ; Note additional optimizations may cause this SGT to be replaced with a ; CND* instruction. -; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} ; Test a selectcc with i32 LHS/RHS and float True/False define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll index e06a170..f0a0f51 100644 --- a/test/CodeGen/R600/selectcc_cnde.ll +++ b/test/CodeGen/R600/selectcc_cnde.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}} define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { %1 = load float addrspace(1)* %in %2 = fcmp oeq float %1, 0.0 diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll index 03d000f..b38078e 100644 --- a/test/CodeGen/R600/selectcc_cnde_int.ll +++ b/test/CodeGen/R600/selectcc_cnde_int.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE_INT -;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %1 = load i32 addrspace(1)* %in %2 = icmp eq i32 %1, 0 -- 1.7.11.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev