changeset 87a6f2ed585a in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=87a6f2ed585a
description:
        inorder: add a fetch buffer to fetch unit
        Give fetch unit it's own parameterizable fetch buffer to read from. 
Very inefficient
        (architecturally and in simulation) to continually fetch at the 
granularity of the
        wordsize. As expected, the number of fetch memory requests drops 
dramatically

diffstat:

 src/cpu/inorder/InOrderCPU.py           |    5 +-
 src/cpu/inorder/resources/cache_unit.cc |   76 +++--
 src/cpu/inorder/resources/cache_unit.hh |   18 +-
 src/cpu/inorder/resources/fetch_unit.cc |  358 ++++++++++++++++++++++++-------
 src/cpu/inorder/resources/fetch_unit.hh |   51 +++-
 5 files changed, 371 insertions(+), 137 deletions(-)

diffs (truncated from 801 to 300 lines):

diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/InOrderCPU.py
--- a/src/cpu/inorder/InOrderCPU.py     Fri Feb 04 00:08:21 2011 -0500
+++ b/src/cpu/inorder/InOrderCPU.py     Fri Feb 04 00:08:22 2011 -0500
@@ -48,6 +48,9 @@
     dcache_port = Port("Data Port")
     _cached_ports = ['icache_port', 'dcache_port']
 
+    fetchBuffSize = Param.Unsigned(4, "Fetch Buffer Size (Number of Cache 
Blocks Stored)")
+    memBlockSize = Param.Unsigned(64, "Memory Block Size")
+
     predType = Param.String("tournament", "Branch predictor type ('local', 
'tournament')")
     localPredictorSize = Param.Unsigned(2048, "Size of local predictor")
     localCtrBits = Param.Unsigned(2, "Bits per counter")
@@ -69,8 +72,6 @@
     functionTraceStart = Param.Tick(0, "Cycle to start function trace")
     stageTracing = Param.Bool(False, "Enable tracing of each stage in CPU")
 
-    memBlockSize = Param.Unsigned(64, "Memory Block Size")
-
     multLatency = Param.Unsigned(1, "Latency for Multiply Operations")
     multRepeatRate = Param.Unsigned(1, "Repeat Rate for Multiply Operations")
     div8Latency = Param.Unsigned(1, "Latency for 8-bit Divide Operations")
diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/resources/cache_unit.cc
--- a/src/cpu/inorder/resources/cache_unit.cc   Fri Feb 04 00:08:21 2011 -0500
+++ b/src/cpu/inorder/resources/cache_unit.cc   Fri Feb 04 00:08:22 2011 -0500
@@ -97,7 +97,7 @@
 CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
         int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
     : Resource(res_name, res_id, res_width, res_latency, _cpu),
-      cachePortBlocked(false), predecoder(NULL)
+      cachePortBlocked(false)
 {
     cachePort = new CachePort(this);
 
@@ -137,6 +137,9 @@
     // Switch to Timing TLB translations.
     resourceEvent = new CacheUnitEvent[width];
 
+    cacheBlkSize = this->cachePort->peerBlockSize();
+    cacheBlkMask = cacheBlkSize  - 1;
+
     initSlots();
 }
 
@@ -375,28 +378,20 @@
     }
 }
 
-Fault
-CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
-                       int flags, TheISA::TLB::Mode tlb_mode)
+void
+CacheUnit::setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req,
+                           int acc_size, int flags)
 {
     ThreadID tid = inst->readTid();
     Addr aligned_addr = inst->getMemAddr();
-    unsigned stage_num = cache_req->getStageNum();
-    unsigned slot_idx = cache_req->getSlot();
 
-    if (tlb_mode == TheISA::TLB::Execute) {
-        inst->fetchMemReq =
-            new Request(inst->readTid(), aligned_addr, acc_size, flags,
-                        inst->instAddr(), cpu->readCpuId(), inst->readTid());
-        cache_req->memReq = inst->fetchMemReq;
-    } else {
-        if (!cache_req->is2ndSplit()) {            
+    if (!cache_req->is2ndSplit()) {
             inst->dataMemReq =
                 new Request(cpu->asid[tid], aligned_addr, acc_size, flags,
                             inst->instAddr(), cpu->readCpuId(),
-                            inst->readTid());
+                            tid);
             cache_req->memReq = inst->dataMemReq;
-        } else {
+    } else {
             assert(inst->splitInst);
             
             inst->splitMemReq = new Request(cpu->asid[tid], 
@@ -407,9 +402,19 @@
                                             cpu->readCpuId(), 
                                             tid);
             cache_req->memReq = inst->splitMemReq;            
-        }
     }
-    
+}
+
+Fault
+CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
+                       int flags, TheISA::TLB::Mode tlb_mode)
+{
+    ThreadID tid = inst->readTid();
+    //Addr aligned_addr = inst->getMemAddr();
+    unsigned stage_num = cache_req->getStageNum();
+    unsigned slot_idx = cache_req->getSlot();
+
+    setupMemRequest(inst, cache_req, acc_size, flags);
 
     cache_req->fault =
         _tlb->translateAtomic(cache_req->memReq,
@@ -842,8 +847,8 @@
     }
 
     DPRINTF(InOrderCachePort,
-            "[tid:%i] [sn:%i] attempting to access cache\n",
-            tid, inst->seqNum);
+            "[tid:%i] [sn:%i] attempting to access cache for addr %08p\n",
+            tid, inst->seqNum, cache_req->dataPkt->getAddr());
 
     if (do_access) {
         if (!cachePort->sendTiming(cache_req->dataPkt)) {
@@ -1086,6 +1091,24 @@
     squash(inst, stage_num, squash_seq_num + 1, tid);    
 }
 
+void
+CacheUnit::squashCacheRequest(CacheReqPtr req_ptr)
+{
+    DynInstPtr inst =  req_ptr->getInst();
+
+    req_ptr->setSquashed();
+    inst->setSquashed();
+    if (inst->validMemAddr()) {
+        DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to "
+                "remove addr. %08p dependencies.\n",
+                inst->readTid(),
+                inst->seqNum,
+                inst->getMemAddr());
+
+        removeAddrDependency(inst);
+    }
+}
+
 
 void
 CacheUnit::squash(DynInstPtr inst, int stage_num,
@@ -1115,14 +1138,12 @@
                 map_it++;                
                 continue;                
             }
-            
-            req_ptr->setSquashed();
-
-            req_ptr->getInst()->setSquashed();
 
             CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(req_ptr);
             assert(cache_req);
 
+            squashCacheRequest(cache_req);
+
             int req_slot_num = req_ptr->getSlot();
 
             if (cache_req->tlbStall) {
@@ -1152,15 +1173,6 @@
                         req_ptr->getInst()->splitInst);
             }
 
-            if (req_ptr->getInst()->validMemAddr()) {                    
-                DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to "
-                        "remove addr. %08p dependencies.\n",
-                        req_ptr->getInst()->readTid(),
-                        req_ptr->getInst()->seqNum, 
-                        req_ptr->getInst()->getMemAddr());
-                
-                removeAddrDependency(req_ptr->getInst());
-            }            
         }
 
         map_it++;
diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/resources/cache_unit.hh
--- a/src/cpu/inorder/resources/cache_unit.hh   Fri Feb 04 00:08:21 2011 -0500
+++ b/src/cpu/inorder/resources/cache_unit.hh   Fri Feb 04 00:08:22 2011 -0500
@@ -139,10 +139,16 @@
     void squashDueToMemStall(DynInstPtr inst, int stage_num,
                              InstSeqNum squash_seq_num, ThreadID tid);
 
+    virtual void squashCacheRequest(CacheReqPtr req_ptr);
+
     /** After memory request is completedd in the cache, then do final
         processing to complete the request in the CPU.
     */
-   virtual void processCacheCompletion(PacketPtr pkt);
+    virtual void processCacheCompletion(PacketPtr pkt);
+
+    /** Create request that will interface w/TLB and Memory objects */
+    virtual void setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req,
+                                 int acc_size, int flags);
 
     void recvRetry();
 
@@ -167,7 +173,7 @@
     uint64_t getMemData(Packet *packet);
 
     void setAddrDependency(DynInstPtr inst);
-    void removeAddrDependency(DynInstPtr inst);
+    virtual void removeAddrDependency(DynInstPtr inst);
     
   protected:
     /** Cache interface. */
@@ -190,8 +196,6 @@
         return (addr & ~(cacheBlkMask));
     }
 
-    TheISA::Predecoder predecoder;
-
     bool tlbBlocked[ThePipeline::MaxThreads];
 
     TheISA::TLB* tlb();
@@ -225,7 +229,7 @@
           pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
           retryPkt(NULL), memAccComplete(false), memAccPending(false),
           tlbStall(false), splitAccess(false), splitAccessNum(-1),
-          split2ndAccess(false), instIdx(idx)
+          split2ndAccess(false), instIdx(idx), fetchBufferFill(false)
     { }
 
 
@@ -270,7 +274,9 @@
     int splitAccessNum;
     bool split2ndAccess;
     int instIdx;    
-    
+
+    /** Should we expect block from cache access or fetch buffer? */
+    bool fetchBufferFill;
 };
 
 class CacheReqPacket : public Packet
diff -r fa81553d67ea -r 87a6f2ed585a src/cpu/inorder/resources/fetch_unit.cc
--- a/src/cpu/inorder/resources/fetch_unit.cc   Fri Feb 04 00:08:21 2011 -0500
+++ b/src/cpu/inorder/resources/fetch_unit.cc   Fri Feb 04 00:08:22 2011 -0500
@@ -37,6 +37,7 @@
 #include "arch/utility.hh"
 #include "arch/predecoder.hh"
 #include "config/the_isa.hh"
+#include "cpu/inorder/resources/cache_unit.hh"
 #include "cpu/inorder/resources/fetch_unit.hh"
 #include "cpu/inorder/pipeline_traits.hh"
 #include "cpu/inorder/cpu.hh"
@@ -50,10 +51,42 @@
 FetchUnit::FetchUnit(string res_name, int res_id, int res_width,
                      int res_latency, InOrderCPU *_cpu,
                      ThePipeline::Params *params)
-    : CacheUnit(res_name, res_id, res_width, res_latency, _cpu,
-                params)
+    : CacheUnit(res_name, res_id, res_width, res_latency, _cpu, params),
+      instSize(sizeof(TheISA::MachInst)), fetchBuffSize(params->fetchBuffSize),
+      predecoder(NULL)
 { }
 
+void
+FetchUnit::createMachInst(std::list<FetchBlock*>::iterator fetch_it,
+                          DynInstPtr inst)
+{
+    ExtMachInst ext_inst;
+    Addr block_addr = cacheBlockAlign(inst->getMemAddr());
+    Addr fetch_addr = inst->getMemAddr();
+    unsigned fetch_offset = (fetch_addr - block_addr) / instSize;
+    ThreadID tid = inst->readTid();
+    TheISA::PCState instPC = inst->pcState();
+
+
+    DPRINTF(InOrderCachePort, "Creating instruction [sn:%i] w/fetch data @"
+            "addr:%08p block:%08p\n", inst->seqNum, fetch_addr, block_addr);
+
+    assert((*fetch_it)->valid);
+
+    TheISA::MachInst *fetchInsts =
+        reinterpret_cast<TheISA::MachInst *>((*fetch_it)->block);
+
+    MachInst mach_inst =
+        TheISA::gtoh(fetchInsts[fetch_offset]);
+
+    predecoder.setTC(cpu->thread[tid]->getTC());
+    predecoder.moreBytes(instPC, inst->instAddr(), mach_inst);
+    ext_inst = predecoder.getExtMachInst(instPC);
+
+    inst->pcState(instPC);
+    inst->setMachInst(ext_inst);
+}
+
 int
 FetchUnit::getSlot(DynInstPtr inst)
 {
@@ -119,15 +152,64 @@
                            int acc_size, int flags)
 {
     ThreadID tid = inst->readTid();
-    Addr aligned_addr = inst->getMemAddr();
+    Addr aligned_addr = cacheBlockAlign(inst->getMemAddr());
 
     inst->fetchMemReq =
-            new Request(inst->readTid(), aligned_addr, acc_size, flags,
-                        inst->instAddr(), cpu->readCpuId(), inst->readTid());
+            new Request(tid, aligned_addr, acc_size, flags,
+                        inst->instAddr(), cpu->readCpuId(), tid);
 
     cache_req->memReq = inst->fetchMemReq;
 }
_______________________________________________
m5-dev mailing list
m5-dev@m5sim.org
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to