I've attached the latest version of my patch to support the PPC 440/450
cores in the PowerPC backend.
I've also included a small patch to clang so that it will recognize
-target-cpu 440 and pass that through to the code generator (and
assembler).
Thanks in advance,
Hal
On Wed, 2011-10-05 at 14:14 -0700, Jakob Stoklund Olesen wrote:
> On Oct 5, 2011, at 8:22 AM, Hal Finkel wrote:
>
> > I've been working on adding support for the PPC 440/450 "embedded" cores
> > to the PowerPC backend. These are used on IBM's Blue Gene L and P
> > supercomputers, but are also used in other environments (like on the
> > Xilinx Virtex-5). Here is my first patch. I'm new to LLVM, and so I
> > apologize if this is the wrong way to do this [the online docs seem to
> > imply that a patch should be sent to a mailing list, although does not
> > specify which one, should it go to the bug tracker instead?].
>
> llvm-commits is the place to send your patches.
>
> > I've tried to touch as little of the existing code as possible. There
> > are some other changes which should probably be made, but would require
> > touching the existing PowerPC code.
>
> This patch looks fine to me, but I am missing some test cases.
>
> There should at least be a test case just sending some code through this
> scheduler.
>
> > For example, the general load/store
> > itinerary should really be split into a general load and general store.
> > Also, on the so-called "Book E" embedded PPC cores, the sync instruction
> > is called msync (same opcode, different name). I'm not sure what the
> > best way of doing a predicate-base asm name is.
>
> I am not sure how to do that. X86 has alternative syntaxes, but that is way
> overkill.
>
> > I'm just about done with a patch to add support for the FP2 (aka Double
> > Hummer) v2f64 vector instruction set, but I figured that it would be
> > good to make that a separate patch.
>
> Definitely.
>
> /jakob
>
--
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index aabf494..aebb85f 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -23,6 +23,7 @@ include "llvm/Target/Target.td"
// CPU Directives //
//===----------------------------------------------------------------------===//
+def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
@@ -45,7 +46,9 @@ def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
- "Enable the stfiwx instruction">;
+ "Enable the stfiwx instruction">;
+def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
+ "Enable Book E instructions">;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -60,6 +63,8 @@ include "PPCInstrInfo.td"
//
def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>;
+def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9528459..b8aad8f 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -374,6 +374,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
OutStreamer.EmitInstruction(TmpInst);
return;
+ case PPC::SYNC:
+ // In Book E sync is called msync, handle this special case here...
+ if (Subtarget.isBookE()) {
+ OutStreamer.EmitRawText(StringRef("\tmsync"));
+ return;
+ }
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
@@ -421,6 +427,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static const char *const CPUDirectives[] = {
"",
"ppc",
+ "ppc440",
"ppc601",
"ppc602",
"ppc603",
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index cddc9d8..3197fc8 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -22,6 +22,19 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// PowerPC 440 Hazard Recognizer
+void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID) {
+ // This is a PPC pseudo-instruction.
+ // FIXME: Should something else be done?
+ return;
+ }
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+//===----------------------------------------------------------------------===//
// PowerPC 970 Hazard Recognizer
//
// This models the dispatch group formation of the PPC970 processor. Dispatch
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 2f81f0f..32fac91 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -15,11 +15,24 @@
#define PPCHAZRECS_H
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "PPCInstrInfo.h"
namespace llvm {
+/// PPCHazardRecognizer440 - This class implements a scoreboard-based
+/// hazard recognizer for the PPC 440 and friends.
+class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer {
+ const ScheduleDAG *DAG;
+public:
+ PPCHazardRecognizer440(const InstrItineraryData *ItinData,
+ const ScheduleDAG *DAG_) :
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
+
+ virtual void EmitInstruction(SUnit *SU);
+};
+
/// PPCHazardRecognizer970 - This class defines a finite state automata that
/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
/// promotes good dispatch group formation and implements noop insertion to
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2bc109c..649a45a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -53,7 +53,15 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
// now, always return a PPC970 recognizer.
const TargetInstrInfo *TII = TM->getInstrInfo();
assert(TII && "No InstrInfo?");
- return new PPCHazardRecognizer970(*TII);
+
+ unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+ if (Directive == PPC::DIR_440) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new PPCHazardRecognizer440(II, DAG);
+ }
+ else {
+ return new PPCHazardRecognizer970(*TII);
+ }
}
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f248b5b..17f63e0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -352,7 +352,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
def FPContractions : Predicate<"!NoExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
-
+def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 9664f14..69e435b 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -103,6 +103,7 @@ def VecVSR : InstrItinClass;
// Processor instruction itineraries.
include "PPCScheduleG3.td"
+include "PPCSchedule440.td"
include "PPCScheduleG4.td"
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
new file mode 100644
index 0000000..604d5c8
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -0,0 +1,568 @@
+//===- PPCSchedule440.td - PPC 440 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// PowerPC 440x6 Embedded Processor Core User’s Manual.
+// IBM (as updated in) 2010.
+
+// The basic PPC 440 does not include a floating-point unit; the pipeline
+// timings here are constructed to match the FP2 unit shipped with the
+// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
+// References:
+// S. Chatterjee, et al. Design and exploitation of a high-performance
+// SIMD floating-point unit for Blue Gene/L.
+// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
+// also:
+// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
+// Blue Gene/P Application Development.
+// IBM (as updated in) 2009.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC 440/450 chip sets
+//
+def IFTH1 : FuncUnit; // Fetch unit 1
+def IFTH2 : FuncUnit; // Fetch unit 2
+def PDCD1 : FuncUnit; // Decode unit 1
+def PDCD2 : FuncUnit; // Decode unit 2
+def DISS1 : FuncUnit; // Issue unit 1
+def DISS2 : FuncUnit; // Issue unit 2
+def LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def IWB : FuncUnit; // Write-back unit for the I pipeline
+def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def JWB : FuncUnit; // Write-back unit for the J pipeline
+def AGEN : FuncUnit; // Address generation for the L pipeline
+def CRD : FuncUnit; // D-cache access for the L pipeline
+def LWB : FuncUnit; // Write-back unit for the L pipeline
+def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def FWB : FuncUnit; // Write-back unit for the F pipeline
+
+def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
+
+def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+// Notes:
+// Instructions are held in the FRACC, LRACC and IRACC pipeline
+// stages until their source operands become ready. Exceptions:
+// - Store instructions will hold in the AGEN stage
+// - The integer multiply-accumulate instruction will hold in
+// the IEXE1 stage
+//
+// For most I-pipe operations, the result is available at the end of
+// the IEXE1 stage. Operations such as multiply and divide must
+// continue to execute in IEXE2 and IWB. Divide resides in IWB for
+// 33 cycles (multiply also calculates its result in IWB). For all
+// J-pipe instructions, the result is available
+// at the end of the JEXE1 stage. Loads have a 3-cycle latency
+// (data is not available until after the LWB stage).
+//
+// The L1 cache hit latency is four cycles for floating point loads
+// and three cycles for integer loads.
+//
+// The stwcx. instruction requires both the LRACC and the IRACC
+// dispatch stages. It must be issued from DISS0.
+//
+// All lwarx/stwcx. instructions hold in LRACC if another
+// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
+//
+// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
+// resources are empty. AGEN and CRD are held empty until the msync/mbar
+// commits.
+//
+// Most floating-point instructions, computational and move,
+// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
+// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
+// loads take 4 cycles (for L1 hit).
+
+//
+// This file defines the itinerary class data for the PPC 440 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPC440Itineraries : ProcessorItineraries<
+ [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
+ IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
+ FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
+ [GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<33, [IWB]>],
+ [40, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5], // FIXME: should be [9, 5] for loads and
+ // [8, 5] for stores.
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<3, [AGEN], 1>,
+ InstrStage<2, [CRD], 1>,
+ InstrStage<1, [LWB]>]>,
+ InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC], 0>,
+ InstrStage<1, [LRACC], 0>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [FEXE1], 0>,
+ InstrStage<1, [AGEN], 0>,
+ InstrStage<1, [JEXE1], 0>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [FEXE2], 0>,
+ InstrStage<1, [CRD], 0>,
+ InstrStage<1, [JEXE2], 0>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<6, [FEXE3], 0>,
+ InstrStage<6, [LWB], 0>,
+ InstrStage<6, [JWB], 0>,
+ InstrStage<6, [IWB]>]>,
+ InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [9, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<25, [FWB]>],
+ [35, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<13, [FWB]>],
+ [23, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4],
+ [FPR_Bypass, FPR_Bypass]>
+]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index cf194de..8acf75c 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -74,6 +74,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, HasAltivec(false)
, HasFSQRT(false)
, HasSTFIWX(false)
+ , IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
, TargetTriple(TT) {
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e028de6..d2b853d 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -33,6 +33,7 @@ namespace PPC {
enum {
DIR_NONE,
DIR_32,
+ DIR_440,
DIR_601,
DIR_602,
DIR_603,
@@ -66,6 +67,7 @@ protected:
bool HasAltivec;
bool HasFSQRT;
bool HasSTFIWX;
+ bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -136,6 +138,7 @@ public:
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasAltivec() const { return HasAltivec; }
bool isGigaProcessor() const { return IsGigaProcessor; }
+ bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
new file mode 100644
index 0000000..51c1437
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=ppc32 -mcpu=440 | grep fmadd
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+ %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+ %a.real = load double* %a.realp
+ %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+ %a.imag = load double* %a.imagp
+ %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+ %b.real = load double* %b.realp
+ %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+ %b.imag = load double* %b.imagp
+ %mul.rl = fmul double %a.real, %b.real
+ %mul.rr = fmul double %a.imag, %b.imag
+ %mul.r = fsub double %mul.rl, %mul.rr
+ %mul.il = fmul double %a.imag, %b.real
+ %mul.ir = fmul double %a.real, %b.imag
+ %mul.i = fadd double %mul.il, %mul.ir
+ %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+ %c.real = load double* %c.realp
+ %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+ %c.imag = load double* %c.imagp
+ %add.r = fadd double %mul.r, %c.real
+ %add.i = fadd double %mul.i, %c.imag
+ %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+ %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+ store double %add.r, double* %real
+ store double %add.i, double* %imag
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/ppc440-msync.ll b/test/CodeGen/PowerPC/ppc440-msync.ll
new file mode 100644
index 0000000..4d663bc
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-msync.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: grep sync %t
+; RUN: not grep msync %t
+; RUN: llc < %s -march=ppc32 -mcpu=440 | grep msync
+
+define i32 @has_a_fence(i32 %a, i32 %b) nounwind {
+entry:
+ fence acquire
+ %cond = icmp eq i32 %a, %b
+ br i1 %cond, label %IfEqual, label %IfUnequal
+
+IfEqual:
+ fence release
+ br label %end
+
+IfUnequal:
+ fence release
+ ret i32 0
+
+end:
+ ret i32 1
+}
+
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 249fd48..96f56f0 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -536,9 +536,19 @@ class PPCTargetInfo : public TargetInfo {
static const Builtin::Info BuiltinInfo[];
static const char * const GCCRegNames[];
static const TargetInfo::GCCRegAlias GCCRegAliases[];
+ std::string CPU;
public:
PPCTargetInfo(const std::string& triple) : TargetInfo(triple) {}
+ virtual bool setCPU(const std::string &Name) {
+ if (Name == "440" || Name == "450") {
+ CPU = Name;
+ return true;
+ }
+
+ return false;
+ }
+
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
Records = BuiltinInfo;
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index 648da87..ea5e6aa 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -793,6 +793,19 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args,
}
}
+void Clang::AddPPCTargetArgs(const ArgList &Args,
+ ArgStringList &CmdArgs) const {
+ // Set target cpu.
+ if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+ const char *CPUName;
+ CPUName = A->getValue(Args);
+
+ CmdArgs.push_back("-target-cpu");
+ CmdArgs.push_back(CPUName);
+ }
+}
+
+
void Clang::AddSparcTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
@@ -1391,6 +1404,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
AddMIPSTargetArgs(Args, CmdArgs);
break;
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppc64:
+ AddPPCTargetArgs(Args, CmdArgs);
+ break;
+
case llvm::Triple::sparc:
AddSparcTargetArgs(Args, CmdArgs);
break;
@@ -4200,6 +4218,12 @@ void linuxtools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
StringRef MArch = getToolChain().getArchName();
if (MArch == "armv7" || MArch == "armv7a" || MArch == "armv7-a")
CmdArgs.push_back("-mfpu=neon");
+ } else if (getToolChain().getArch() == llvm::Triple::ppc) {
+ if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+ const char *CPUName = A->getValue(Args);
+ if (StringRef(CPUName) == "440" || StringRef(CPUName) == "450")
+ CmdArgs.push_back("-m440");
+ }
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h
index a4f732e..3c1e87d 100644
--- a/lib/Driver/Tools.h
+++ b/lib/Driver/Tools.h
@@ -38,6 +38,7 @@ namespace tools {
void AddARMTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
bool KernelOrKext) const;
void AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const;
+ void AddPPCTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const;
void AddSparcTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const;
void AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits