Re: [gem5-dev] changeset in gem5: cpu: o3: replace issueLatency with bool pipel...

Gutierrez, Anthony Fri, 15 May 2015 15:59:58 -0700

Nilay,

Sorry I didn't review this before it went out, but would you mind reverting 
this patch? I do believe that issueLat should remain Cycles be allowed to be 
set to any value. One can certainly imagine a FU where you have a latency of X, 
and yet a throughput of 1 op / Y cycles, where Y != X or 1.


Perhaps a patch that instead adds a comment to explain the semantics of 
issueLat - as opposed to removing it - would suffice?

-----Original Message-----
From: gem5-dev [mailto:[email protected]] On Behalf Of Nilay Vaish
Sent: Wednesday, April 29, 2015 8:40 PM
To: [email protected]
Subject: [gem5-dev] changeset in gem5: cpu: o3: replace issueLatency with bool 
pipel...

changeset dac26eb4cb64 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=dac26eb4cb64
description:
        cpu: o3: replace issueLatency with bool pipelined

        Currently, each op class has a parameter issueLat that denotes the 
cycles after
        which another op of the same class can be issued.  As of now, this 
latency can
        either be one cycle (fully pipelined) or same as execution latency of 
the op
        (not at all pipelined).  The fact that issueLat is a parameter of type 
Cycles
        makes one believe that it can be set to any value.  To avoid the 
confusion, the
        parameter is being renamed as 'pipelined' with type boolean.  If set to 
true,
        the op would execute in a fully pipelined fashion. Otherwise, it would 
execute
        in an unpipelined fashion.

diffstat:

 configs/common/O3_ARM_v7a.py  |  10 +++++-----
 src/cpu/FuncUnit.py           |   3 ++-
 src/cpu/func_unit.cc          |  14 +++++++-------
 src/cpu/func_unit.hh          |  10 +++++-----
 src/cpu/o3/FuncUnitConfig.py  |   9 ++++-----
 src/cpu/o3/fu_pool.cc         |   8 ++++----
 src/cpu/o3/fu_pool.hh         |   8 ++++----
 src/cpu/o3/inst_queue_impl.hh |   5 ++---
 8 files changed, 33 insertions(+), 34 deletions(-)

diffs (244 lines):

diff -r b9410e821c41 -r dac26eb4cb64 configs/common/O3_ARM_v7a.py
--- a/configs/common/O3_ARM_v7a.py      Wed Apr 29 22:35:22 2015 -0500
+++ b/configs/common/O3_ARM_v7a.py      Wed Apr 29 22:35:22 2015 -0500
@@ -36,9 +36,9 @@
 
 # Complex ALU instructions have a variable latencies  class 
O3_ARM_v7a_Complex_Int(FUDesc):
-    opList = [ OpDesc(opClass='IntMult', opLat=3, issueLat=1),
-               OpDesc(opClass='IntDiv', opLat=12, issueLat=12),
-               OpDesc(opClass='IprAccess', opLat=3, issueLat=1) ]
+    opList = [ OpDesc(opClass='IntMult', opLat=3, pipelined=True),
+               OpDesc(opClass='IntDiv', opLat=12, pipelined=False),
+               OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
     count = 1
 
 
@@ -67,8 +67,8 @@
                OpDesc(opClass='FloatAdd', opLat=5),
                OpDesc(opClass='FloatCmp', opLat=5),
                OpDesc(opClass='FloatCvt', opLat=5),
-               OpDesc(opClass='FloatDiv', opLat=9, issueLat=9),
-               OpDesc(opClass='FloatSqrt', opLat=33, issueLat=33),
+               OpDesc(opClass='FloatDiv', opLat=9, pipelined=False),
+               OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
                OpDesc(opClass='FloatMult', opLat=4) ]
     count = 2
 
diff -r b9410e821c41 -r dac26eb4cb64 src/cpu/FuncUnit.py
--- a/src/cpu/FuncUnit.py       Wed Apr 29 22:35:22 2015 -0500
+++ b/src/cpu/FuncUnit.py       Wed Apr 29 22:35:22 2015 -0500
@@ -54,9 +54,10 @@
 class OpDesc(SimObject):
     type = 'OpDesc'
     cxx_header = "cpu/func_unit.hh"
-    issueLat = Param.Cycles(1, "cycles until another can be issued")
     opClass = Param.OpClass("type of operation")
     opLat = Param.Cycles(1, "cycles until result is available")
+    pipelined = Param.Bool(True, "set to true when the functional unit for"
+        "this op is fully pipelined. False means not pipelined at 
+ all.")
 
 class FUDesc(SimObject):
     type = 'FUDesc'
diff -r b9410e821c41 -r dac26eb4cb64 src/cpu/func_unit.cc
--- a/src/cpu/func_unit.cc      Wed Apr 29 22:35:22 2015 -0500
+++ b/src/cpu/func_unit.cc      Wed Apr 29 22:35:22 2015 -0500
@@ -52,7 +52,7 @@
 
     for (int i = 0; i < Num_OpClasses; ++i) {
         opLatencies[i] = fu.opLatencies[i];
-        issueLatencies[i] = fu.issueLatencies[i];
+        pipelined[i] = fu.pipelined[i];
     }
 
     capabilityList = fu.capabilityList; @@ -60,15 +60,15 @@
 
 
 void
-FuncUnit::addCapability(OpClass cap, unsigned oplat, unsigned issuelat)
+FuncUnit::addCapability(OpClass cap, unsigned oplat, bool pipeline)
 {
-    if (issuelat == 0 || oplat == 0)
+    if (oplat == 0)
         panic("FuncUnit:  you don't really want a zero-cycle latency do you?");
 
     capabilityList.set(cap);
 
     opLatencies[cap] = oplat;
-    issueLatencies[cap] = issuelat;
+    pipelined[cap] = pipeline;
 }
 
 bool
@@ -89,10 +89,10 @@
     return opLatencies[cap];
 }
 
-unsigned
-FuncUnit::issueLatency(OpClass capability)
+bool
+FuncUnit::isPipelined(OpClass capability)
 {
-    return issueLatencies[capability];
+    return pipelined[capability];
 }
 
 ////////////////////////////////////////////////////////////////////////////
diff -r b9410e821c41 -r dac26eb4cb64 src/cpu/func_unit.hh
--- a/src/cpu/func_unit.hh      Wed Apr 29 22:35:22 2015 -0500
+++ b/src/cpu/func_unit.hh      Wed Apr 29 22:35:22 2015 -0500
@@ -52,11 +52,11 @@
   public:
     OpClass opClass;
     Cycles opLat;
-    Cycles issueLat;
+    bool pipelined;
 
     OpDesc(const OpDescParams *p)
         : SimObject(p), opClass(p->opClass), opLat(p->opLat),
-          issueLat(p->issueLat) {};
+          pipelined(p->pipelined) {};
 };
 
 class FUDesc : public SimObject
@@ -85,7 +85,7 @@
 {
   private:
     unsigned opLatencies[Num_OpClasses];
-    unsigned issueLatencies[Num_OpClasses];
+    bool pipelined[Num_OpClasses];
     std::bitset<Num_OpClasses> capabilityList;
 
   public:
@@ -94,13 +94,13 @@
 
     std::string name;
 
-    void addCapability(OpClass cap, unsigned oplat, unsigned issuelat);
+    void addCapability(OpClass cap, unsigned oplat, bool pipelined);
 
     bool provides(OpClass capability);
     std::bitset<Num_OpClasses> capabilities();
 
     unsigned &opLatency(OpClass capability);
-    unsigned issueLatency(OpClass capability);
+    bool isPipelined(OpClass capability);
 };
 
 #endif // __FU_POOL_HH__
diff -r b9410e821c41 -r dac26eb4cb64 src/cpu/o3/FuncUnitConfig.py
--- a/src/cpu/o3/FuncUnitConfig.py      Wed Apr 29 22:35:22 2015 -0500
+++ b/src/cpu/o3/FuncUnitConfig.py      Wed Apr 29 22:35:22 2015 -0500
@@ -49,7 +49,7 @@
 
 class IntMultDiv(FUDesc):
     opList = [ OpDesc(opClass='IntMult', opLat=3),
-               OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ]
+               OpDesc(opClass='IntDiv', opLat=20, pipelined=False) ]
 
     # DIV and IDIV instructions in x86 are implemented using a loop which
     # issues division microops.  The latency of these microops should really 
be @@ -57,7 +57,6 @@
     # of the quotient.
     if buildEnv['TARGET_ISA'] in ('x86'):
         opList[1].opLat=1
-        opList[1].issueLat=1
 
     count=2
 
@@ -69,8 +68,8 @@
 
 class FP_MultDiv(FUDesc):
     opList = [ OpDesc(opClass='FloatMult', opLat=4),
-               OpDesc(opClass='FloatDiv', opLat=12, issueLat=12),
-               OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ]
+               OpDesc(opClass='FloatDiv', opLat=12, pipelined=False),
+               OpDesc(opClass='FloatSqrt', opLat=24, pipelined=False) ]
     count = 2
 
 class SIMD_Unit(FUDesc):
@@ -109,6 +108,6 @@
     count = 4
 
 class IprPort(FUDesc):
-    opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ]
+    opList = [ OpDesc(opClass='IprAccess', opLat = 3, pipelined = 
+ False) ]
     count = 1
 
diff -r b9410e821c41 -r dac26eb4cb64 src/cpu/o3/fu_pool.cc
--- a/src/cpu/o3/fu_pool.cc     Wed Apr 29 22:35:22 2015 -0500
+++ b/src/cpu/o3/fu_pool.cc     Wed Apr 29 22:35:22 2015 -0500
@@ -89,7 +89,7 @@
 
     for (int i = 0; i < Num_OpClasses; ++i) {
         maxOpLatencies[i] = Cycles(0);
-        maxIssueLatencies[i] = Cycles(0);
+        pipelined[i] = true;
     }
 
     //
@@ -123,13 +123,13 @@
                     fuPerCapList[(*j)->opClass].addFU(numFU + k);
 
                 // indicate that this FU has the capability
-                fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
+                fu->addCapability((*j)->opClass, (*j)->opLat, 
+ (*j)->pipelined);
 
                 if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
                     maxOpLatencies[(*j)->opClass] = (*j)->opLat;
 
-                if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
-                    maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
+                if (!(*j)->pipelined)
+                    pipelined[(*j)->opClass] = false;
             }
 
             numFU++;
diff -r b9410e821c41 -r dac26eb4cb64 src/cpu/o3/fu_pool.hh
--- a/src/cpu/o3/fu_pool.hh     Wed Apr 29 22:35:22 2015 -0500
+++ b/src/cpu/o3/fu_pool.hh     Wed Apr 29 22:35:22 2015 -0500
@@ -72,8 +72,8 @@
   private:
     /** Maximum op execution latencies, per op class. */
     Cycles maxOpLatencies[Num_OpClasses];
-    /** Maximum issue latencies, per op class. */
-    Cycles maxIssueLatencies[Num_OpClasses];
+    /** Whether op is pipelined or not. */
+    bool pipelined[Num_OpClasses];
 
     /** Bitvector listing capabilities of this FU pool. */
     std::bitset<Num_OpClasses> capabilityList; @@ -160,8 +160,8 @@
     }
 
     /** Returns the issue latency of the given capability. */
-    Cycles getIssueLatency(OpClass capability) {
-        return maxIssueLatencies[capability];
+    bool isPipelined(OpClass capability) {
+        return pipelined[capability];
     }
 
     /** Have all the FUs drained? */
diff -r b9410e821c41 -r dac26eb4cb64 src/cpu/o3/inst_queue_impl.hh
--- a/src/cpu/o3/inst_queue_impl.hh     Wed Apr 29 22:35:22 2015 -0500
+++ b/src/cpu/o3/inst_queue_impl.hh     Wed Apr 29 22:35:22 2015 -0500
@@ -825,7 +825,7 @@
                 if (idx >= 0)
                     fuPool->freeUnitNextCycle(idx);
             } else {
-                Cycles issue_latency = fuPool->getIssueLatency(op_class);
+                bool pipelined = fuPool->isPipelined(op_class);
                 // Generate completion event for the FU
                 ++wbOutstanding;
                 FUCompletion *execution = new FUCompletion(issuing_inst, @@ 
-834,8 +834,7 @@
                 cpu->schedule(execution,
                               cpu->clockEdge(Cycles(op_latency - 1)));
 
-                // @todo: Enforce that issue_latency == 1 or op_latency
-                if (issue_latency > Cycles(1)) {
+                if (!pipelined) {
                     // If FU isn't pipelined, then it must be freed
                     // upon the execution completing.
                     execution->setFreeFU(); 
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Re: [gem5-dev] changeset in gem5: cpu: o3: replace issueLatency with bool pipel...

Reply via email to